From da28438cae9a271c5c232177f81dfb243de9b7fa Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Thu, 11 Nov 2004 11:18:10 +0000
Subject: [PATCH 001/113] NTFS: Use i_size_read() in
 fs/ntfs/attrib.c::ntfs_attr_set().

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog | 1 +
 fs/ntfs/Makefile  | 2 +-
 fs/ntfs/attrib.c  | 6 +++++-
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 1d2ad15f1533..9c4e78a8e4b1 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -31,6 +31,7 @@ ToDo/Notes:
 	  compiled without debug.  This avoids a possible denial of service
 	  attack.  Thanks to Carl-Daniel Hailfinger from SuSE for pointing this
 	  out.
+	- Use i_size_read() in fs/ntfs/attrib.c::ntfs_attr_set().
 
 2.1.22 - Many bug and race fixes and error handling improvements.
 
diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile
index 7b66381a0b0f..f8c97d41226b 100644
--- a/fs/ntfs/Makefile
+++ b/fs/ntfs/Makefile
@@ -6,7 +6,7 @@ ntfs-objs := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \
 	     index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \
 	     unistr.o upcase.o
 
-EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.22\"
+EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.23-WIP\"
 
 ifeq ($(CONFIG_NTFS_DEBUG),y)
 EXTRA_CFLAGS += -DDEBUG
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index 1ff7f90a18b0..7d668466dcd7 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -1127,6 +1127,10 @@ int ntfs_attr_record_resize(MFT_RECORD *m, ATTR_RECORD *a, u32 new_size)
  * byte offset @ofs inside the attribute with the constant byte @val.
  *
  * This function is effectively like memset() applied to an ntfs attribute.
+ * Note thie function actually only operates on the page cache pages belonging
+ * to the ntfs attribute and it marks them dirty after doing the memset().
+ * Thus it relies on the vm dirty page write code paths to cause the modified
+ * pages to be written to the mft record/disk.
  *
  * Return 0 on success and -errno on error.  An error code of -ESPIPE means
  * that @ofs + @cnt were outside the end of the attribute and no write was
@@ -1155,7 +1159,7 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
 	end = ofs + cnt;
 	end_ofs = end & ~PAGE_CACHE_MASK;
 	/* If the end is outside the inode size return -ESPIPE. */
-	if (unlikely(end > VFS_I(ni)->i_size)) {
+	if (unlikely(end > i_size_read(VFS_I(ni)))) {
 		ntfs_error(vol->sb, "Request exceeds end of attribute.");
 		return -ESPIPE;
 	}

From 66129f88c4cc719591f687e5c8c764fe9d3e437a Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Thu, 11 Nov 2004 12:34:00 +0000
Subject: [PATCH 002/113] NTFS: Use i_size_read() in
 fs/ntfs/logfile.c::ntfs_{check,empty}_logfile().

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog | 25 ++++++++++++-------------
 fs/ntfs/logfile.c |  5 +++--
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 9c4e78a8e4b1..b1a949401be3 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -2,20 +2,18 @@ ToDo/Notes:
 	- Find and fix bugs.
 	- Checkpoint or disable the user space journal ($UsnJrnl).
 	- In between ntfs_prepare/commit_write, need exclusion between
-	  simultaneous file extensions. Need perhaps an NInoResizeUnderway()
-	  flag which we can set in ntfs_prepare_write() and clear again in
-	  ntfs_commit_write(). Just have to be careful in readpage/writepage,
-	  as well as in truncate, that we play nice... We might need to have
-	  a data_size field in the ntfs_inode to store the real attribute
-	  length. Also need to be careful with initialized_size extention in
+	  simultaneous file extensions.  This is given to us by holding i_sem
+	  on the inode.  The only places in the kernel when a file is resized
+	  are prepare/commit write and truncate for both of which i_sem is
+	  held.  Just have to be careful in readpage/writepage and all other
+	  helpers not running under i_sem that we play nice...
+	  Also need to be careful with initialized_size extention in
 	  ntfs_prepare_write. Basically, just be _very_ careful in this code...
-	  OTOH, perhaps i_sem, which is held accross generic_file_write is
-	  sufficient for synchronisation here. We then just need to make sure
-	  ntfs_readpage/writepage/truncate interoperate properly with us.
-	  UPDATE: The above is all ok as it is due to i_sem held.  The only
-	  thing that needs to be checked is ntfs_writepage() which does not
-	  hold i_sem.  It cannot change i_size but it needs to cope with a
-	  concurrent i_size change.
+	  UPDATE: The only things that need to be checked are read/writepage
+	  which do not hold i_sem.  Note writepage cannot change i_size but it
+	  needs to cope with a concurrent i_size change, just like readpage.
+	  Also both need to cope with concurrent changes to the other sizes,
+	  i.e. initialized/allocated/compressed size, as well.
 	- Implement mft.c::sync_mft_mirror_umount().  We currently will just
 	  leave the volume dirty on umount if the final iput(vol->mft_ino)
 	  causes a write of any mirrored mft records due to the mft mirror
@@ -32,6 +30,7 @@ ToDo/Notes:
 	  attack.  Thanks to Carl-Daniel Hailfinger from SuSE for pointing this
 	  out.
 	- Use i_size_read() in fs/ntfs/attrib.c::ntfs_attr_set().
+	- Use i_size_read() in fs/ntfs/logfile.c::ntfs_{check,empty}_logfile().
 
 2.1.22 - Many bug and race fixes and error handling improvements.
 
diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c
index 5e280abafab3..e680dd0cdb64 100644
--- a/fs/ntfs/logfile.c
+++ b/fs/ntfs/logfile.c
@@ -443,7 +443,7 @@ BOOL ntfs_check_logfile(struct inode *log_vi)
 	/* An empty $LogFile must have been clean before it got emptied. */
 	if (NVolLogFileEmpty(vol))
 		goto is_empty;
-	size = log_vi->i_size;
+	size = i_size_read(log_vi);
 	/* Make sure the file doesn't exceed the maximum allowed size. */
 	if (size > MaxLogFileSize)
 		size = MaxLogFileSize;
@@ -689,7 +689,8 @@ BOOL ntfs_empty_logfile(struct inode *log_vi)
 	if (!NVolLogFileEmpty(vol)) {
 		int err;
 		
-		err = ntfs_attr_set(NTFS_I(log_vi), 0, log_vi->i_size, 0xff);
+		err = ntfs_attr_set(NTFS_I(log_vi), 0, i_size_read(log_vi),
+				0xff);
 		if (unlikely(err)) {
 			ntfs_error(vol->sb, "Failed to fill $LogFile with "
 					"0xff bytes (error code %i).", err);

From db30d160cd8dfe1e53435fd76f4189778f1c728e Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Thu, 11 Nov 2004 12:42:00 +0000
Subject: [PATCH 003/113] NTFS: Use i_size_read() once and then use the cached
 value in       fs/ntfs/lcnalloc.c::ntfs_cluster_alloc().

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog  | 2 ++
 fs/ntfs/lcnalloc.c | 8 +++++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index b1a949401be3..ff0d9b132df8 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -31,6 +31,8 @@ ToDo/Notes:
 	  out.
 	- Use i_size_read() in fs/ntfs/attrib.c::ntfs_attr_set().
 	- Use i_size_read() in fs/ntfs/logfile.c::ntfs_{check,empty}_logfile().
+	- Use i_size_read() once and then use the cached value in
+	  fs/ntfs/lcnalloc.c::ntfs_cluster_alloc().
 
 2.1.22 - Many bug and race fixes and error handling improvements.
 
diff --git a/fs/ntfs/lcnalloc.c b/fs/ntfs/lcnalloc.c
index 23fd911078b1..5346596fa871 100644
--- a/fs/ntfs/lcnalloc.c
+++ b/fs/ntfs/lcnalloc.c
@@ -140,6 +140,7 @@ runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn,
 	LCN zone_start, zone_end, bmp_pos, bmp_initial_pos, last_read_pos, lcn;
 	LCN prev_lcn = 0, prev_run_len = 0, mft_zone_size;
 	s64 clusters;
+	loff_t i_size;
 	struct inode *lcnbmp_vi;
 	runlist_element *rl = NULL;
 	struct address_space *mapping;
@@ -249,6 +250,7 @@ runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn,
 	clusters = count;
 	rlpos = rlsize = 0;
 	mapping = lcnbmp_vi->i_mapping;
+	i_size = i_size_read(lcnbmp_vi);
 	while (1) {
 		ntfs_debug("Start of outer while loop: done_zones 0x%x, "
 				"search_zone %i, pass %i, zone_start 0x%llx, "
@@ -263,7 +265,7 @@ runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn,
 		last_read_pos = bmp_pos >> 3;
 		ntfs_debug("last_read_pos 0x%llx.",
 				(unsigned long long)last_read_pos);
-		if (last_read_pos > lcnbmp_vi->i_size) {
+		if (last_read_pos > i_size) {
 			ntfs_debug("End of attribute reached.  "
 					"Skipping to zone_pass_done.");
 			goto zone_pass_done;
@@ -287,8 +289,8 @@ runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn,
 		buf_size = last_read_pos & ~PAGE_CACHE_MASK;
 		buf = page_address(page) + buf_size;
 		buf_size = PAGE_CACHE_SIZE - buf_size;
-		if (unlikely(last_read_pos + buf_size > lcnbmp_vi->i_size))
-			buf_size = lcnbmp_vi->i_size - last_read_pos;
+		if (unlikely(last_read_pos + buf_size > i_size))
+			buf_size = i_size - last_read_pos;
 		buf_size <<= 3;
 		lcn = bmp_pos & 7;
 		bmp_pos &= ~7;

From d4b9ba7bf6f38cff55b5d95a0db7dd91311ce20a Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Wed, 17 Nov 2004 15:45:08 +0000
Subject: [PATCH 004/113] NTFS: Use i_size_read() in
 fs/ntfs/file.c::ntfs_file_open().

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog | 1 +
 fs/ntfs/file.c    | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index ff0d9b132df8..6d2a99c13d30 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -33,6 +33,7 @@ ToDo/Notes:
 	- Use i_size_read() in fs/ntfs/logfile.c::ntfs_{check,empty}_logfile().
 	- Use i_size_read() once and then use the cached value in
 	  fs/ntfs/lcnalloc.c::ntfs_cluster_alloc().
+	- Use i_size_read() in fs/ntfs/file.c::ntfs_file_open().
 
 2.1.22 - Many bug and race fixes and error handling improvements.
 
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index db8713ea0d27..e0f530ce6b99 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -47,7 +47,7 @@
 static int ntfs_file_open(struct inode *vi, struct file *filp)
 {
 	if (sizeof(unsigned long) < 8) {
-		if (vi->i_size > MAX_LFS_FILESIZE)
+		if (i_size_read(vi) > MAX_LFS_FILESIZE)
 			return -EFBIG;
 	}
 	return generic_file_open(vi, filp);

From 367636772f094fd840d2d79e75257bcfaa28e70f Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Thu, 18 Nov 2004 13:46:45 +0000
Subject: [PATCH 005/113] NTFS: - In fs/ntfs/compress.c, use i_size_read() at
 the start and then use the 	cached value everywhere.  Cache the
 initialized_size in the same way 	and protect the critical region where
 the two sizes are read using the 	new size_lock of the ntfs inode.      
 - Add the new size_lock to the ntfs_inode structure (fs/ntfs/inode.h) 	and
 initialize it (fs/ntfs/inode.c).

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog  |  7 +++++++
 fs/ntfs/compress.c | 46 ++++++++++++++++++++++++++++------------------
 fs/ntfs/inode.c    |  1 +
 fs/ntfs/inode.h    |  1 +
 4 files changed, 37 insertions(+), 18 deletions(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 6d2a99c13d30..97c4fe932046 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -34,6 +34,13 @@ ToDo/Notes:
 	- Use i_size_read() once and then use the cached value in
 	  fs/ntfs/lcnalloc.c::ntfs_cluster_alloc().
 	- Use i_size_read() in fs/ntfs/file.c::ntfs_file_open().
+	- Add size_lock to the ntfs_inode structure.  This is an rw spinlock
+	  and it locks against access to the inode sizes.  Note, ->size_lock
+	  is also accessed from irq context so you must use the _irqsave and
+	  _irqrestore lock and unlock functions, respectively.
+	- Use i_size_read() in fs/ntfs/compress.c at the start of the read and
+	  use the cached value afterwards.  Cache the initialized_size in the
+	  same way and protect access to the two sizes using the size_lock.
 
 2.1.22 - Many bug and race fixes and error handling improvements.
 
diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c
index ee5ae706f861..6d265cfd49aa 100644
--- a/fs/ntfs/compress.c
+++ b/fs/ntfs/compress.c
@@ -96,13 +96,14 @@ void free_compression_buffers(void)
 /**
  * zero_partial_compressed_page - zero out of bounds compressed page region
  */
-static void zero_partial_compressed_page(ntfs_inode *ni, struct page *page)
+static void zero_partial_compressed_page(struct page *page,
+		const s64 initialized_size)
 {
 	u8 *kp = page_address(page);
 	unsigned int kp_ofs;
 
 	ntfs_debug("Zeroing page region outside initialized size.");
-	if (((s64)page->index << PAGE_CACHE_SHIFT) >= ni->initialized_size) {
+	if (((s64)page->index << PAGE_CACHE_SHIFT) >= initialized_size) {
 		/*
 		 * FIXME: Using clear_page() will become wrong when we get
 		 * PAGE_CACHE_SIZE != PAGE_SIZE but for now there is no problem.
@@ -110,7 +111,7 @@ static void zero_partial_compressed_page(ntfs_inode *ni, struct page *page)
 		clear_page(kp);
 		return;
 	}
-	kp_ofs = ni->initialized_size & ~PAGE_CACHE_MASK;
+	kp_ofs = initialized_size & ~PAGE_CACHE_MASK;
 	memset(kp + kp_ofs, 0, PAGE_CACHE_SIZE - kp_ofs);
 	return;
 }
@@ -118,12 +119,12 @@ static void zero_partial_compressed_page(ntfs_inode *ni, struct page *page)
 /**
  * handle_bounds_compressed_page - test for&handle out of bounds compressed page
  */
-static inline void handle_bounds_compressed_page(ntfs_inode *ni,
-		struct page *page)
+static inline void handle_bounds_compressed_page(struct page *page,
+		const loff_t i_size, const s64 initialized_size)
 {
-	if ((page->index >= (ni->initialized_size >> PAGE_CACHE_SHIFT)) &&
-			(ni->initialized_size < VFS_I(ni)->i_size))
-		zero_partial_compressed_page(ni, page);
+	if ((page->index >= (initialized_size >> PAGE_CACHE_SHIFT)) &&
+			(initialized_size < i_size))
+		zero_partial_compressed_page(page, initialized_size);
 	return;
 }
 
@@ -138,6 +139,8 @@ static inline void handle_bounds_compressed_page(ntfs_inode *ni,
  * @xpage_done:		set to 1 if xpage was completed successfully (IN/OUT)
  * @cb_start:		compression block to decompress (IN)
  * @cb_size:		size of compression block @cb_start in bytes (IN)
+ * @i_size:		file size when we started the read (IN)
+ * @initialized_size:	initialized file size when we started the read (IN)
  *
  * The caller must have disabled preemption. ntfs_decompress() reenables it when
  * the critical section is finished.
@@ -165,7 +168,8 @@ static inline void handle_bounds_compressed_page(ntfs_inode *ni,
 static int ntfs_decompress(struct page *dest_pages[], int *dest_index,
 		int *dest_ofs, const int dest_max_index, const int dest_max_ofs,
 		const int xpage, char *xpage_done, u8 *const cb_start,
-		const u32 cb_size)
+		const u32 cb_size, const loff_t i_size,
+		const s64 initialized_size)
 {
 	/*
 	 * Pointers into the compressed data, i.e. the compression block (cb),
@@ -219,9 +223,6 @@ return_error:
 		spin_unlock(&ntfs_cb_lock);
 		/* Second stage: finalize completed pages. */
 		if (nr_completed_pages > 0) {
-			struct page *page = dest_pages[completed_pages[0]];
-			ntfs_inode *ni = NTFS_I(page->mapping->host);
-
 			for (i = 0; i < nr_completed_pages; i++) {
 				int di = completed_pages[i];
 
@@ -230,7 +231,8 @@ return_error:
 				 * If we are outside the initialized size, zero
 				 * the out of bounds page range.
 				 */
-				handle_bounds_compressed_page(ni, dp);
+				handle_bounds_compressed_page(dp, i_size,
+						initialized_size);
 				flush_dcache_page(dp);
 				kunmap(dp);
 				SetPageUptodate(dp);
@@ -478,12 +480,14 @@ return_overflow:
  */
 int ntfs_read_compressed_block(struct page *page)
 {
+	loff_t i_size;
+	s64 initialized_size;
 	struct address_space *mapping = page->mapping;
 	ntfs_inode *ni = NTFS_I(mapping->host);
 	ntfs_volume *vol = ni->vol;
 	struct super_block *sb = vol->sb;
 	runlist_element *rl;
-	unsigned long block_size = sb->s_blocksize;
+	unsigned long flags, block_size = sb->s_blocksize;
 	unsigned char block_size_bits = sb->s_blocksize_bits;
 	u8 *cb, *cb_pos, *cb_end;
 	struct buffer_head **bhs;
@@ -552,8 +556,12 @@ int ntfs_read_compressed_block(struct page *page)
 	 * The remaining pages need to be allocated and inserted into the page
 	 * cache, alignment guarantees keep all the below much simpler. (-8
 	 */
-	max_page = ((VFS_I(ni)->i_size + PAGE_CACHE_SIZE - 1) >>
-			PAGE_CACHE_SHIFT) - offset;
+	read_lock_irqsave(&ni->size_lock, flags);
+	i_size = i_size_read(VFS_I(ni));
+	initialized_size = ni->initialized_size;
+	read_unlock_irqrestore(&ni->size_lock, flags);
+	max_page = ((i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
+			offset;
 	if (nr_pages < max_page)
 		max_page = nr_pages;
 	for (i = 0; i < max_page; i++, offset++) {
@@ -824,7 +832,8 @@ lock_retry_remap:
 				 * If we are outside the initialized size, zero
 				 * the out of bounds page range.
 				 */
-				handle_bounds_compressed_page(ni, page);
+				handle_bounds_compressed_page(page, i_size,
+						initialized_size);
 				flush_dcache_page(page);
 				kunmap(page);
 				SetPageUptodate(page);
@@ -847,7 +856,8 @@ lock_retry_remap:
 		ntfs_debug("Found compressed compression block.");
 		err = ntfs_decompress(pages, &cur_page, &cur_ofs,
 				cb_max_page, cb_max_ofs, xpage, &xpage_done,
-				cb_pos,	cb_size - (cb_pos - cb));
+				cb_pos,	cb_size - (cb_pos - cb), i_size,
+				initialized_size);
 		/*
 		 * We can sleep from now on, lock already dropped by
 		 * ntfs_decompress().
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 31840ba0b38c..a02d8d9f0439 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -376,6 +376,7 @@ static void ntfs_destroy_extent_inode(ntfs_inode *ni)
 void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni)
 {
 	ntfs_debug("Entering.");
+	rwlock_init(&ni->size_lock);
 	ni->initialized_size = ni->allocated_size = 0;
 	ni->seq_no = 0;
 	atomic_set(&ni->count, 1);
diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h
index 99580455f2ed..56b2d6ec33f7 100644
--- a/fs/ntfs/inode.h
+++ b/fs/ntfs/inode.h
@@ -44,6 +44,7 @@ typedef struct _ntfs_inode ntfs_inode;
  * fields already provided in the VFS inode.
  */
 struct _ntfs_inode {
+	rwlock_t size_lock;	/* Lock serializing access to inode sizes. */
 	s64 initialized_size;	/* Copy from the attribute record. */
 	s64 allocated_size;	/* Copy from the attribute record. */
 	unsigned long state;	/* NTFS specific flags describing this inode.

From 206f9f35b2348b7b966ff18a5564b8a3ca325ed5 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Thu, 18 Nov 2004 15:01:06 +0000
Subject: [PATCH 006/113] NTFS: In fs/ntfs/dir.c, use i_size_read() once and
 then the cached value       afterwards.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog |  2 ++
 fs/ntfs/dir.c     | 13 +++++++------
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 97c4fe932046..7bbbb9152fee 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -41,6 +41,8 @@ ToDo/Notes:
 	- Use i_size_read() in fs/ntfs/compress.c at the start of the read and
 	  use the cached value afterwards.  Cache the initialized_size in the
 	  same way and protect access to the two sizes using the size_lock.
+	- Use i_size_read() in fs/ntfs/dir.c once and then use the cached
+	  value afterwards.
 
 2.1.22 - Many bug and race fixes and error handling improvements.
 
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index 93577561cdbe..d261ac4596f5 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -1101,7 +1101,7 @@ static inline int ntfs_filldir(ntfs_volume *vol, loff_t fpos,
 static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
 	s64 ia_pos, ia_start, prev_ia_pos, bmp_pos;
-	loff_t fpos;
+	loff_t fpos, i_size;
 	struct inode *bmp_vi, *vdir = filp->f_dentry->d_inode;
 	struct super_block *sb = vdir->i_sb;
 	ntfs_inode *ndir = NTFS_I(vdir);
@@ -1122,7 +1122,8 @@ static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 			vdir->i_ino, fpos);
 	rc = err = 0;
 	/* Are we at end of dir yet? */
-	if (fpos >= vdir->i_size + vol->mft_record_size)
+	i_size = i_size_read(vdir);
+	if (fpos >= i_size + vol->mft_record_size)
 		goto done;
 	/* Emulate . and .. for all directories. */
 	if (!fpos) {
@@ -1264,7 +1265,7 @@ skip_index_root:
 	bmp_mapping = bmp_vi->i_mapping;
 	/* Get the starting bitmap bit position and sanity check it. */
 	bmp_pos = ia_pos >> ndir->itype.index.block_size_bits;
-	if (unlikely(bmp_pos >> 3 >= bmp_vi->i_size)) {
+	if (unlikely(bmp_pos >> 3 >= i_size_read(bmp_vi))) {
 		ntfs_error(sb, "Current index allocation position exceeds "
 				"index bitmap size.");
 		goto err_out;
@@ -1301,7 +1302,7 @@ find_next_index_buffer:
 			goto get_next_bmp_page;
 		}
 		/* If we have reached the end of the bitmap, we are done. */
-		if (unlikely(((bmp_pos + cur_bmp_pos) >> 3) >= vdir->i_size))
+		if (unlikely(((bmp_pos + cur_bmp_pos) >> 3) >= i_size))
 			goto unm_EOD;
 		ia_pos = (bmp_pos + cur_bmp_pos) <<
 				ndir->itype.index.block_size_bits;
@@ -1441,7 +1442,7 @@ unm_EOD:
 	ntfs_unmap_page(bmp_page);
 EOD:
 	/* We are finished, set fpos to EOD. */
-	fpos = vdir->i_size + vol->mft_record_size;
+	fpos = i_size + vol->mft_record_size;
 abort:
 	kfree(name);
 done:
@@ -1495,7 +1496,7 @@ err_out:
 static int ntfs_dir_open(struct inode *vi, struct file *filp)
 {
 	if (sizeof(unsigned long) < 8) {
-		if (vi->i_size > MAX_LFS_FILESIZE)
+		if (i_size_read(vi) > MAX_LFS_FILESIZE)
 			return -EFBIG;
 	}
 	return 0;

From 218357ff1b1b2f1bfdce89d608dbe33dd2f9f14b Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Thu, 18 Nov 2004 20:34:59 +0000
Subject: [PATCH 007/113] NTFS: - Use i_size_read() in fs/ntfs/super.c once and
 then use the cached 	value afterwards.  Cache the initialized_size in the
 same way and 	protect access to the two sizes using the size_lock.       -
 Minor optimization to fs/ntfs/super.c::ntfs_statfs() and its helpers.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog |   4 ++
 fs/ntfs/super.c   | 102 ++++++++++++++++++++++++----------------------
 2 files changed, 58 insertions(+), 48 deletions(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 7bbbb9152fee..f4e087a83e78 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -43,6 +43,10 @@ ToDo/Notes:
 	  same way and protect access to the two sizes using the size_lock.
 	- Use i_size_read() in fs/ntfs/dir.c once and then use the cached
 	  value afterwards.
+	- Use i_size_read() in fs/ntfs/super.c once and then use the cached
+	  value afterwards.  Cache the initialized_size in the same way and
+	  protect access to the two sizes using the size_lock.
+	- Minor optimization to fs/ntfs/super.c::ntfs_statfs() and its helpers.
 
 2.1.22 - Many bug and race fixes and error handling improvements.
 
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 212a3d0f2073..0ad0c51e9eb9 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -990,12 +990,12 @@ static BOOL load_and_init_mft_mirror(ntfs_volume *vol)
  */
 static BOOL check_mft_mirror(ntfs_volume *vol)
 {
-	unsigned long index;
 	struct super_block *sb = vol->sb;
 	ntfs_inode *mirr_ni;
 	struct page *mft_page, *mirr_page;
 	u8 *kmft, *kmirr;
 	runlist_element *rl, rl2[2];
+	pgoff_t index;
 	int mrecs_per_page, i;
 
 	ntfs_debug("Entering.");
@@ -1205,10 +1205,11 @@ static BOOL load_and_init_quota(ntfs_volume *vol)
  */
 static BOOL load_and_init_attrdef(ntfs_volume *vol)
 {
+	loff_t i_size;
 	struct super_block *sb = vol->sb;
 	struct inode *ino;
 	struct page *page;
-	unsigned long index, max_index;
+	pgoff_t index, max_index;
 	unsigned int size;
 
 	ntfs_debug("Entering.");
@@ -1220,13 +1221,14 @@ static BOOL load_and_init_attrdef(ntfs_volume *vol)
 		goto failed;
 	}
 	/* The size of FILE_AttrDef must be above 0 and fit inside 31 bits. */
-	if (!ino->i_size || ino->i_size > 0x7fffffff)
+	i_size = i_size_read(ino);
+	if (i_size <= 0 || i_size > 0x7fffffff)
 		goto iput_failed;
-	vol->attrdef = (ATTR_DEF*)ntfs_malloc_nofs(ino->i_size);
+	vol->attrdef = (ATTR_DEF*)ntfs_malloc_nofs(i_size);
 	if (!vol->attrdef)
 		goto iput_failed;
 	index = 0;
-	max_index = ino->i_size >> PAGE_CACHE_SHIFT;
+	max_index = i_size >> PAGE_CACHE_SHIFT;
 	size = PAGE_CACHE_SIZE;
 	while (index < max_index) {
 		/* Read the attrdef table and copy it into the linear buffer. */
@@ -1239,12 +1241,12 @@ read_partial_attrdef_page:
 		ntfs_unmap_page(page);
 	};
 	if (size == PAGE_CACHE_SIZE) {
-		size = ino->i_size & ~PAGE_CACHE_MASK;
+		size = i_size & ~PAGE_CACHE_MASK;
 		if (size)
 			goto read_partial_attrdef_page;
 	}
-	vol->attrdef_size = ino->i_size;
-	ntfs_debug("Read %llu bytes from $AttrDef.", ino->i_size);
+	vol->attrdef_size = i_size;
+	ntfs_debug("Read %llu bytes from $AttrDef.", i_size);
 	iput(ino);
 	return TRUE;
 free_iput_failed:
@@ -1267,10 +1269,11 @@ failed:
  */
 static BOOL load_and_init_upcase(ntfs_volume *vol)
 {
+	loff_t i_size;
 	struct super_block *sb = vol->sb;
 	struct inode *ino;
 	struct page *page;
-	unsigned long index, max_index;
+	pgoff_t index, max_index;
 	unsigned int size;
 	int i, max;
 
@@ -1286,14 +1289,15 @@ static BOOL load_and_init_upcase(ntfs_volume *vol)
 	 * The upcase size must not be above 64k Unicode characters, must not
 	 * be zero and must be a multiple of sizeof(ntfschar).
 	 */
-	if (!ino->i_size || ino->i_size & (sizeof(ntfschar) - 1) ||
-			ino->i_size > 64ULL * 1024 * sizeof(ntfschar))
+	i_size = i_size_read(ino);
+	if (!i_size || i_size & (sizeof(ntfschar) - 1) ||
+			i_size > 64ULL * 1024 * sizeof(ntfschar))
 		goto iput_upcase_failed;
-	vol->upcase = (ntfschar*)ntfs_malloc_nofs(ino->i_size);
+	vol->upcase = (ntfschar*)ntfs_malloc_nofs(i_size);
 	if (!vol->upcase)
 		goto iput_upcase_failed;
 	index = 0;
-	max_index = ino->i_size >> PAGE_CACHE_SHIFT;
+	max_index = i_size >> PAGE_CACHE_SHIFT;
 	size = PAGE_CACHE_SIZE;
 	while (index < max_index) {
 		/* Read the upcase table and copy it into the linear buffer. */
@@ -1306,13 +1310,13 @@ read_partial_upcase_page:
 		ntfs_unmap_page(page);
 	};
 	if (size == PAGE_CACHE_SIZE) {
-		size = ino->i_size & ~PAGE_CACHE_MASK;
+		size = i_size & ~PAGE_CACHE_MASK;
 		if (size)
 			goto read_partial_upcase_page;
 	}
-	vol->upcase_len = ino->i_size >> UCHAR_T_SIZE_BITS;
+	vol->upcase_len = i_size >> UCHAR_T_SIZE_BITS;
 	ntfs_debug("Read %llu bytes from $UpCase (expected %zu bytes).",
-			ino->i_size, 64 * 1024 * sizeof(ntfschar));
+			i_size, 64 * 1024 * sizeof(ntfschar));
 	iput(ino);
 	down(&ntfs_lock);
 	if (!default_upcase) {
@@ -1435,7 +1439,7 @@ static BOOL load_system_files(ntfs_volume *vol)
 			iput(vol->lcnbmp_ino);
 		goto bitmap_failed;
 	}
-	if ((vol->nr_clusters + 7) >> 3 > vol->lcnbmp_ino->i_size) {
+	if ((vol->nr_clusters + 7) >> 3 > i_size_read(vol->lcnbmp_ino)) {
 		iput(vol->lcnbmp_ino);
 bitmap_failed:
 		ntfs_error(sb, "Failed to load $Bitmap.");
@@ -1959,8 +1963,7 @@ static s64 get_nr_free_clusters(ntfs_volume *vol)
 	struct address_space *mapping = vol->lcnbmp_ino->i_mapping;
 	filler_t *readpage = (filler_t*)mapping->a_ops->readpage;
 	struct page *page;
-	unsigned long index, max_index;
-	unsigned int max_size;
+	pgoff_t index, max_index;
 
 	ntfs_debug("Entering.");
 	/* Serialize accesses to the cluster bitmap. */
@@ -1972,11 +1975,10 @@ static s64 get_nr_free_clusters(ntfs_volume *vol)
 	 */
 	max_index = (((vol->nr_clusters + 7) >> 3) + PAGE_CACHE_SIZE - 1) >>
 			PAGE_CACHE_SHIFT;
-	/* Use multiples of 4 bytes. */
-	max_size = PAGE_CACHE_SIZE >> 2;
-	ntfs_debug("Reading $Bitmap, max_index = 0x%lx, max_size = 0x%x.",
-			max_index, max_size);
-	for (index = 0UL; index < max_index; index++) {
+	/* Use multiples of 4 bytes, thus max_size is PAGE_CACHE_SIZE / 4. */
+	ntfs_debug("Reading $Bitmap, max_index = 0x%lx, max_size = 0x%lx.",
+			max_index, PAGE_CACHE_SIZE / 4);
+	for (index = 0; index < max_index; index++) {
 		unsigned int i;
 		/*
 		 * Read the page from page cache, getting it from backing store
@@ -2008,7 +2010,7 @@ static s64 get_nr_free_clusters(ntfs_volume *vol)
 		 * the result as all out of range bytes are set to zero by
 		 * ntfs_readpage().
 		 */
-	  	for (i = 0; i < max_size; i++)
+	  	for (i = 0; i < PAGE_CACHE_SIZE / 4; i++)
 			nr_free -= (s64)hweight32(kaddr[i]);
 		kunmap_atomic(kaddr, KM_USER0);
 		page_cache_release(page);
@@ -2031,6 +2033,8 @@ static s64 get_nr_free_clusters(ntfs_volume *vol)
 /**
  * __get_nr_free_mft_records - return the number of free inodes on a volume
  * @vol:	ntfs volume for which to obtain free inode count
+ * @nr_free:	number of mft records in file system
+ * @max_index:	maximum number of pages containing set bits
  *
  * Calculate the number of free mft records (inodes) on the mounted NTFS
  * volume @vol. We actually calculate the number of mft records in use instead
@@ -2043,32 +2047,20 @@ static s64 get_nr_free_clusters(ntfs_volume *vol)
  *
  * NOTE: Caller must hold mftbmp_lock rw_semaphore for reading or writing.
  */
-static unsigned long __get_nr_free_mft_records(ntfs_volume *vol)
+static unsigned long __get_nr_free_mft_records(ntfs_volume *vol,
+		s64 nr_free, const pgoff_t max_index)
 {
-	s64 nr_free;
 	u32 *kaddr;
 	struct address_space *mapping = vol->mftbmp_ino->i_mapping;
 	filler_t *readpage = (filler_t*)mapping->a_ops->readpage;
 	struct page *page;
-	unsigned long index, max_index;
-	unsigned int max_size;
+	pgoff_t index;
 
 	ntfs_debug("Entering.");
-	/* Number of mft records in file system (at this point in time). */
-	nr_free = vol->mft_ino->i_size >> vol->mft_record_size_bits;
-	/*
-	 * Convert the maximum number of set bits into bytes rounded up, then
-	 * convert into multiples of PAGE_CACHE_SIZE, rounding up so that if we
-	 * have one full and one partial page max_index = 2.
-	 */
-	max_index = ((((NTFS_I(vol->mft_ino)->initialized_size >>
-			vol->mft_record_size_bits) + 7) >> 3) +
-			PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-	/* Use multiples of 4 bytes. */
-	max_size = PAGE_CACHE_SIZE >> 2;
+	/* Use multiples of 4 bytes, thus max_size is PAGE_CACHE_SIZE / 4. */
 	ntfs_debug("Reading $MFT/$BITMAP, max_index = 0x%lx, max_size = "
-			"0x%x.", max_index, max_size);
-	for (index = 0UL; index < max_index; index++) {
+			"0x%lx.", max_index, PAGE_CACHE_SIZE / 4);
+	for (index = 0; index < max_index; index++) {
 		unsigned int i;
 		/*
 		 * Read the page from page cache, getting it from backing store
@@ -2100,7 +2092,7 @@ static unsigned long __get_nr_free_mft_records(ntfs_volume *vol)
 		 * the result as all out of range bytes are set to zero by
 		 * ntfs_readpage().
 		 */
-	  	for (i = 0; i < max_size; i++)
+	  	for (i = 0; i < PAGE_CACHE_SIZE / 4; i++)
 			nr_free -= (s64)hweight32(kaddr[i]);
 		kunmap_atomic(kaddr, KM_USER0);
 		page_cache_release(page);
@@ -2134,8 +2126,11 @@ static unsigned long __get_nr_free_mft_records(ntfs_volume *vol)
  */
 static int ntfs_statfs(struct super_block *sb, struct kstatfs *sfs)
 {
-	ntfs_volume *vol = NTFS_SB(sb);
 	s64 size;
+	ntfs_volume *vol = NTFS_SB(sb);
+	ntfs_inode *mft_ni = NTFS_I(vol->mft_ino);
+	pgoff_t max_index;
+	unsigned long flags;
 
 	ntfs_debug("Entering.");
 	/* Type of filesystem. */
@@ -2158,10 +2153,20 @@ static int ntfs_statfs(struct super_block *sb, struct kstatfs *sfs)
 	sfs->f_bavail = sfs->f_bfree = size;
 	/* Serialize accesses to the inode bitmap. */
 	down_read(&vol->mftbmp_lock);
+	read_lock_irqsave(&mft_ni->size_lock, flags);
+	size = i_size_read(vol->mft_ino) >> vol->mft_record_size_bits;
+	/*
+	 * Convert the maximum number of set bits into bytes rounded up, then
+	 * convert into multiples of PAGE_CACHE_SIZE, rounding up so that if we
+	 * have one full and one partial page max_index = 2.
+	 */
+	max_index = ((((mft_ni->initialized_size >> vol->mft_record_size_bits)
+			+ 7) >> 3) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	read_unlock_irqrestore(&mft_ni->size_lock, flags);
 	/* Number of inodes in file system (at this point in time). */
-	sfs->f_files = vol->mft_ino->i_size >> vol->mft_record_size_bits;
+	sfs->f_files = size;
 	/* Free inodes in fs (based on current total count). */
-	sfs->f_ffree = __get_nr_free_mft_records(vol);
+	sfs->f_ffree = __get_nr_free_mft_records(vol, size, max_index);
 	up_read(&vol->mftbmp_lock);
 	/*
 	 * File system id. This is extremely *nix flavour dependent and even
@@ -2347,7 +2352,8 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
 	}
 
 	/* Get the size of the device in units of NTFS_BLOCK_SIZE bytes. */
-	vol->nr_blocks = sb->s_bdev->bd_inode->i_size >> NTFS_BLOCK_SIZE_BITS;
+	vol->nr_blocks = i_size_read(sb->s_bdev->bd_inode) >>
+			NTFS_BLOCK_SIZE_BITS;
 
 	/* Read the boot sector and return unlocked buffer head to it. */
 	if (!(bh = read_ntfs_boot_sector(sb, silent))) {

From f50f3ac51983025405a71b70b033cc6bcb0d1fc1 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Fri, 19 Nov 2004 22:16:00 +0000
Subject: [PATCH 008/113] NTFS: Use i_size_read() in fs/ntfs/inode.c once and
 then use the cached value       afterwards when reading the size of the
 bitmap inode.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog |  2 ++
 fs/ntfs/inode.c   | 25 ++++++++++++++-----------
 2 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index f4e087a83e78..445d421e961d 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -47,6 +47,8 @@ ToDo/Notes:
 	  value afterwards.  Cache the initialized_size in the same way and
 	  protect access to the two sizes using the size_lock.
 	- Minor optimization to fs/ntfs/super.c::ntfs_statfs() and its helpers.
+	- Use i_size_read() in fs/ntfs/inode.c once and then use the cached
+	  value afterwards when reading the size of the bitmap inode.
 
 2.1.22 - Many bug and race fixes and error handling improvements.
 
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index a02d8d9f0439..6c631dbe0f4c 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -174,7 +174,7 @@ struct inode *ntfs_iget(struct super_block *sb, unsigned long mft_no)
 
 	vi = iget5_locked(sb, mft_no, (test_t)ntfs_test_inode,
 			(set_t)ntfs_init_locked_inode, &na);
-	if (!vi)
+	if (unlikely(!vi))
 		return ERR_PTR(-ENOMEM);
 
 	err = 0;
@@ -188,7 +188,7 @@ struct inode *ntfs_iget(struct super_block *sb, unsigned long mft_no)
 	 * There is no point in keeping bad inodes around if the failure was
 	 * due to ENOMEM. We want to be able to retry again later.
 	 */
-	if (err == -ENOMEM) {
+	if (unlikely(err == -ENOMEM)) {
 		iput(vi);
 		vi = ERR_PTR(err);
 	}
@@ -235,7 +235,7 @@ struct inode *ntfs_attr_iget(struct inode *base_vi, ATTR_TYPE type,
 
 	vi = iget5_locked(base_vi->i_sb, na.mft_no, (test_t)ntfs_test_inode,
 			(set_t)ntfs_init_locked_inode, &na);
-	if (!vi)
+	if (unlikely(!vi))
 		return ERR_PTR(-ENOMEM);
 
 	err = 0;
@@ -250,7 +250,7 @@ struct inode *ntfs_attr_iget(struct inode *base_vi, ATTR_TYPE type,
 	 * simplifies things in that we never need to check for bad attribute
 	 * inodes elsewhere.
 	 */
-	if (err) {
+	if (unlikely(err)) {
 		iput(vi);
 		vi = ERR_PTR(err);
 	}
@@ -290,7 +290,7 @@ struct inode *ntfs_index_iget(struct inode *base_vi, ntfschar *name,
 
 	vi = iget5_locked(base_vi->i_sb, na.mft_no, (test_t)ntfs_test_inode,
 			(set_t)ntfs_init_locked_inode, &na);
-	if (!vi)
+	if (unlikely(!vi))
 		return ERR_PTR(-ENOMEM);
 
 	err = 0;
@@ -305,7 +305,7 @@ struct inode *ntfs_index_iget(struct inode *base_vi, ntfschar *name,
 	 * simplifies things in that we never need to check for bad index
 	 * inodes elsewhere.
 	 */
-	if (err) {
+	if (unlikely(err)) {
 		iput(vi);
 		vi = ERR_PTR(err);
 	}
@@ -742,6 +742,7 @@ skip_attr_list_load:
 	 * in ntfs_ino->attr_list and it is ntfs_ino->attr_list_size bytes.
 	 */
 	if (S_ISDIR(vi->i_mode)) {
+		loff_t bvi_size;
 		struct inode *bvi;
 		ntfs_inode *bni;
 		INDEX_ROOT *ir;
@@ -959,11 +960,12 @@ skip_attr_list_load:
 			goto unm_err_out;
 		}
 		/* Consistency check bitmap size vs. index allocation size. */
-		if ((bvi->i_size << 3) < (vi->i_size >>
+		bvi_size = i_size_read(bvi);
+		if ((bvi_size << 3) < (vi->i_size >>
 				ni->itype.index.block_size_bits)) {
 			ntfs_error(vi->i_sb, "Index bitmap too small (0x%llx) "
 					"for index allocation (0x%llx).",
-					bvi->i_size << 3, vi->i_size);
+					bvi_size << 3, vi->i_size);
 			goto unm_err_out;
 		}
 skip_large_dir_stuff:
@@ -1430,6 +1432,7 @@ err_out:
  */
 static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
 {
+	loff_t bvi_size;
 	ntfs_volume *vol = NTFS_SB(vi->i_sb);
 	ntfs_inode *ni, *base_ni, *bni;
 	struct inode *bvi;
@@ -1633,10 +1636,10 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
 		goto iput_unm_err_out;
 	}
 	/* Consistency check bitmap size vs. index allocation size. */
-	if ((bvi->i_size << 3) < (vi->i_size >>
-			ni->itype.index.block_size_bits)) {
+	bvi_size = i_size_read(bvi);
+	if ((bvi_size << 3) < (vi->i_size >> ni->itype.index.block_size_bits)) {
 		ntfs_error(vi->i_sb, "Index bitmap too small (0x%llx) for "
-				"index allocation (0x%llx).", bvi->i_size << 3,
+				"index allocation (0x%llx).", bvi_size << 3,
 				vi->i_size);
 		goto iput_unm_err_out;
 	}

From 07a4e2da7dd3c9345f84b2552872f9d38c257451 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Wed, 12 Jan 2005 13:08:26 +0000
Subject: [PATCH 009/113] NTFS: Use i_size_{read,write}() in
 fs/ntfs/{aops.c,mft.c} and protect       access to the i_size and other size
 fields using the size_lock.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog |   2 +
 fs/ntfs/aops.c    |  56 ++++++++++++-----
 fs/ntfs/mft.c     | 157 +++++++++++++++++++++++++++++++++-------------
 3 files changed, 156 insertions(+), 59 deletions(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 445d421e961d..051c6818aad8 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -49,6 +49,8 @@ ToDo/Notes:
 	- Minor optimization to fs/ntfs/super.c::ntfs_statfs() and its helpers.
 	- Use i_size_read() in fs/ntfs/inode.c once and then use the cached
 	  value afterwards when reading the size of the bitmap inode.
+	- Use i_size_{read,write}() in fs/ntfs/{aops.c,mft.c} and protect
+	  access to the i_size and other size fields using the size_lock.
 
 2.1.22 - Many bug and race fixes and error handling improvements.
 
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 45d56e41ed98..a53212793809 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -66,19 +66,22 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
 	ni = NTFS_I(page->mapping->host);
 
 	if (likely(uptodate)) {
-		s64 file_ofs;
+		s64 file_ofs, initialized_size;
 
 		set_buffer_uptodate(bh);
 
 		file_ofs = ((s64)page->index << PAGE_CACHE_SHIFT) +
 				bh_offset(bh);
+		read_lock_irqsave(&ni->size_lock, flags);
+		initialized_size = ni->initialized_size;
+		read_unlock_irqrestore(&ni->size_lock, flags);
 		/* Check for the current buffer head overflowing. */
-		if (file_ofs + bh->b_size > ni->initialized_size) {
+		if (file_ofs + bh->b_size > initialized_size) {
 			char *addr;
 			int ofs = 0;
 
-			if (file_ofs < ni->initialized_size)
-				ofs = ni->initialized_size - file_ofs;
+			if (file_ofs < initialized_size)
+				ofs = initialized_size - file_ofs;
 			addr = kmap_atomic(page, KM_BIO_SRC_IRQ);
 			memset(addr + bh_offset(bh) + ofs, 0, bh->b_size - ofs);
 			flush_dcache_page(page);
@@ -168,6 +171,7 @@ static int ntfs_read_block(struct page *page)
 	runlist_element *rl;
 	struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
 	sector_t iblock, lblock, zblock;
+	unsigned long flags;
 	unsigned int blocksize, vcn_ofs;
 	int i, nr;
 	unsigned char blocksize_bits;
@@ -190,8 +194,10 @@ static int ntfs_read_block(struct page *page)
 	}
 
 	iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
+	read_lock_irqsave(&ni->size_lock, flags);
 	lblock = (ni->allocated_size + blocksize - 1) >> blocksize_bits;
 	zblock = (ni->initialized_size + blocksize - 1) >> blocksize_bits;
+	read_unlock_irqrestore(&ni->size_lock, flags);
 
 	/* Loop through all the buffers in the page. */
 	rl = NULL;
@@ -463,12 +469,15 @@ static int ntfs_write_block(struct page *page, struct writeback_control *wbc)
 {
 	VCN vcn;
 	LCN lcn;
+	s64 initialized_size;
+	loff_t i_size;
 	sector_t block, dblock, iblock;
 	struct inode *vi;
 	ntfs_inode *ni;
 	ntfs_volume *vol;
 	runlist_element *rl;
 	struct buffer_head *bh, *head;
+	unsigned long flags;
 	unsigned int blocksize, vcn_ofs;
 	int err;
 	BOOL need_end_writeback;
@@ -510,11 +519,16 @@ static int ntfs_write_block(struct page *page, struct writeback_control *wbc)
 	/* The first block in the page. */
 	block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
 
+	read_lock_irqsave(&ni->size_lock, flags);
+	i_size = i_size_read(vi);
+	initialized_size = ni->initialized_size;
+	read_unlock_irqrestore(&ni->size_lock, flags);
+
 	/* The first out of bounds block for the data size. */
-	dblock = (vi->i_size + blocksize - 1) >> blocksize_bits;
+	dblock = (i_size + blocksize - 1) >> blocksize_bits;
 
 	/* The last (fully or partially) initialized block. */
-	iblock = ni->initialized_size >> blocksize_bits;
+	iblock = initialized_size >> blocksize_bits;
 
 	/*
 	 * Be very careful.  We have no exclusion from __set_page_dirty_buffers
@@ -559,7 +573,7 @@ static int ntfs_write_block(struct page *page, struct writeback_control *wbc)
 
 		/* Make sure we have enough initialized size. */
 		if (unlikely((block >= iblock) &&
-				(ni->initialized_size < vi->i_size))) {
+				(initialized_size < i_size))) {
 			/*
 			 * If this page is fully outside initialized size, zero
 			 * out all pages between the current initialized size
@@ -846,7 +860,7 @@ static int ntfs_write_mst_block(struct page *page,
 			(PAGE_CACHE_SHIFT - bh_size_bits);
 
 	/* The first out of bounds block for the data size. */
-	dblock = (vi->i_size + bh_size - 1) >> bh_size_bits;
+	dblock = (i_size_read(vi) + bh_size - 1) >> bh_size_bits;
 
 	rl = NULL;
 	err = err2 = nr_bhs = nr_recs = nr_locked_nis = 0;
@@ -1367,7 +1381,7 @@ static int ntfs_writepage(struct page *page, struct writeback_control *wbc)
 	 */
 
 	attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
-	i_size = i_size_read(VFS_I(ni));
+	i_size = i_size_read(vi);
 	kaddr = kmap_atomic(page, KM_USER0);
 	if (unlikely(attr_len > i_size)) {
 		/* Zero out of bounds area in the mft record. */
@@ -1425,12 +1439,15 @@ static int ntfs_prepare_nonresident_write(struct page *page,
 {
 	VCN vcn;
 	LCN lcn;
+	s64 initialized_size;
+	loff_t i_size;
 	sector_t block, ablock, iblock;
 	struct inode *vi;
 	ntfs_inode *ni;
 	ntfs_volume *vol;
 	runlist_element *rl;
 	struct buffer_head *bh, *head, *wait[2], **wait_bh = wait;
+	unsigned long flags;
 	unsigned int vcn_ofs, block_start, block_end, blocksize;
 	int err;
 	BOOL is_retry;
@@ -1462,6 +1479,7 @@ static int ntfs_prepare_nonresident_write(struct page *page,
 	/* The first block in the page. */
 	block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
 
+	read_lock_irqsave(&ni->size_lock, flags);
 	/*
 	 * The first out of bounds block for the allocated size. No need to
 	 * round up as allocated_size is in multiples of cluster size and the
@@ -1470,8 +1488,12 @@ static int ntfs_prepare_nonresident_write(struct page *page,
 	 */
 	ablock = ni->allocated_size >> blocksize_bits;
 
+	i_size = i_size_read(vi);
+	initialized_size = ni->initialized_size;
+	read_unlock_irqrestore(&ni->size_lock, flags);
+
 	/* The last (fully or partially) initialized block. */
-	iblock = ni->initialized_size >> blocksize_bits;
+	iblock = initialized_size >> blocksize_bits;
 
 	/* Loop through all the buffers in the page. */
 	block_start = 0;
@@ -1518,7 +1540,7 @@ static int ntfs_prepare_nonresident_write(struct page *page,
 		 * request, i.e. block < ablock is true.
 		 */
 		if (unlikely((block >= iblock) &&
-				(ni->initialized_size < vi->i_size))) {
+				(initialized_size < i_size))) {
 			/*
 			 * If this page is fully outside initialized size, zero
 			 * out all pages between the current initialized size
@@ -1868,7 +1890,7 @@ static int ntfs_prepare_write(struct file *file, struct page *page,
 	BUG_ON(page_has_buffers(page));
 	new_size = ((s64)page->index << PAGE_CACHE_SHIFT) + to;
 	/* If we do not need to resize the attribute allocation we are done. */
-	if (new_size <= vi->i_size)
+	if (new_size <= i_size_read(vi))
 		goto done;
 
 	// FIXME: We abort for now as this code is not safe.
@@ -1904,7 +1926,7 @@ static int ntfs_prepare_write(struct file *file, struct page *page,
 	a = ctx->attr;
 	/* The total length of the attribute value. */
 	attr_len = le32_to_cpu(a->data.resident.value_length);
-	BUG_ON(vi->i_size != attr_len);
+	BUG_ON(i_size_read(vi) != attr_len);
 	/* Check if new size is allowed in $AttrDef. */
 	err = ntfs_attr_size_bounds_check(vol, ni->type, new_size);
 	if (unlikely(err)) {
@@ -2047,7 +2069,7 @@ static int ntfs_commit_nonresident_write(struct page *page,
 	 * now we know ntfs_prepare_write() would have failed in the write
 	 * exceeds i_size case, so this will never trigger which is fine.
 	 */
-	if (pos > vi->i_size) {
+	if (pos > i_size_read(vi)) {
 		ntfs_error(vi->i_sb, "Writing beyond the existing file size is "
 				"not supported yet.  Sorry.");
 		return -EOPNOTSUPP;
@@ -2183,9 +2205,13 @@ static int ntfs_commit_write(struct file *file, struct page *page,
 	}
 	kunmap_atomic(kaddr, KM_USER0);
 	/* Update i_size if necessary. */
-	if (vi->i_size < attr_len) {
+	if (i_size_read(vi) < attr_len) {
+		unsigned long flags;
+
+		write_lock_irqsave(&ni->size_lock, flags);
 		ni->allocated_size = ni->initialized_size = attr_len;
 		i_size_write(vi, attr_len);
+		write_unlock_irqrestore(&ni->size_lock, flags);
 	}
 	/* Mark the mft record dirty, so it gets written back. */
 	flush_dcache_mft_record_page(ctx->ntfs_ino);
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index dfa85ac2f8ba..20011e02f5b6 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -45,6 +45,7 @@
  */
 static inline MFT_RECORD *map_mft_record_page(ntfs_inode *ni)
 {
+	loff_t i_size;
 	ntfs_volume *vol = ni->vol;
 	struct inode *mft_vi = vol->mft_ino;
 	struct page *page;
@@ -60,13 +61,14 @@ static inline MFT_RECORD *map_mft_record_page(ntfs_inode *ni)
 	index = ni->mft_no << vol->mft_record_size_bits >> PAGE_CACHE_SHIFT;
 	ofs = (ni->mft_no << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK;
 
+	i_size = i_size_read(mft_vi);
 	/* The maximum valid index into the page cache for $MFT's data. */
-	end_index = mft_vi->i_size >> PAGE_CACHE_SHIFT;
+	end_index = i_size >> PAGE_CACHE_SHIFT;
 
 	/* If the wanted index is out of bounds the mft record doesn't exist. */
 	if (unlikely(index >= end_index)) {
-		if (index > end_index || (mft_vi->i_size & ~PAGE_CACHE_MASK) <
-				ofs + vol->mft_record_size) {
+		if (index > end_index || (i_size & ~PAGE_CACHE_MASK) < ofs +
+				vol->mft_record_size) {
 			page = ERR_PTR(-ENOENT);
 			ntfs_error(vol->sb, "Attemt to read mft record 0x%lx, "
 					"which is beyond the end of the mft.  "
@@ -1121,6 +1123,7 @@ static int ntfs_mft_bitmap_find_and_alloc_free_rec_nolock(ntfs_volume *vol,
 		ntfs_inode *base_ni)
 {
 	s64 pass_end, ll, data_pos, pass_start, ofs, bit;
+	unsigned long flags;
 	struct address_space *mftbmp_mapping;
 	u8 *buf, *byte;
 	struct page *page;
@@ -1134,9 +1137,13 @@ static int ntfs_mft_bitmap_find_and_alloc_free_rec_nolock(ntfs_volume *vol,
 	 * Set the end of the pass making sure we do not overflow the mft
 	 * bitmap.
 	 */
+	read_lock_irqsave(&NTFS_I(vol->mft_ino)->size_lock, flags);
 	pass_end = NTFS_I(vol->mft_ino)->allocated_size >>
 			vol->mft_record_size_bits;
+	read_unlock_irqrestore(&NTFS_I(vol->mft_ino)->size_lock, flags);
+	read_lock_irqsave(&NTFS_I(vol->mftbmp_ino)->size_lock, flags);
 	ll = NTFS_I(vol->mftbmp_ino)->initialized_size << 3;
+	read_unlock_irqrestore(&NTFS_I(vol->mftbmp_ino)->size_lock, flags);
 	if (pass_end > ll)
 		pass_end = ll;
 	pass = 1;
@@ -1263,6 +1270,7 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
 {
 	LCN lcn;
 	s64 ll;
+	unsigned long flags;
 	struct page *page;
 	ntfs_inode *mft_ni, *mftbmp_ni;
 	runlist_element *rl, *rl2 = NULL;
@@ -1286,8 +1294,10 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
 	 * mft bitmap cannot be zero so we are ok to do this.
 	 * ntfs_find_vcn() returns the runlist locked on success.
 	 */
-	rl = ntfs_find_vcn(mftbmp_ni, (mftbmp_ni->allocated_size - 1) >>
-			vol->cluster_size_bits, TRUE);
+	read_lock_irqsave(&mftbmp_ni->size_lock, flags);
+	ll = mftbmp_ni->allocated_size;
+	read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
+	rl = ntfs_find_vcn(mftbmp_ni, (ll - 1) >> vol->cluster_size_bits, TRUE);
 	if (unlikely(IS_ERR(rl) || !rl->length || rl->lcn < 0)) {
 		ntfs_error(vol->sb, "Failed to determine last allocated "
 				"cluster of mft bitmap attribute.");
@@ -1458,9 +1468,11 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
 		}
 		a = ctx->attr;
 	}
+	write_lock_irqsave(&mftbmp_ni->size_lock, flags);
 	mftbmp_ni->allocated_size += vol->cluster_size;
 	a->data.non_resident.allocated_size =
 			cpu_to_sle64(mftbmp_ni->allocated_size);
+	write_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
 	/* Ensure the changes make it to disk. */
 	flush_dcache_mft_record_page(ctx->ntfs_ino);
 	mark_mft_record_dirty(ctx->ntfs_ino);
@@ -1476,7 +1488,9 @@ restore_undo_alloc:
 			0, ctx)) {
 		ntfs_error(vol->sb, "Failed to find last attribute extent of "
 				"mft bitmap attribute.%s", es);
+		write_lock_irqsave(&mftbmp_ni->size_lock, flags);
 		mftbmp_ni->allocated_size += vol->cluster_size;
+		write_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
 		ntfs_attr_put_search_ctx(ctx);
 		unmap_mft_record(mft_ni);
 		up_write(&mftbmp_ni->runlist.lock);
@@ -1550,6 +1564,7 @@ undo_alloc:
 static int ntfs_mft_bitmap_extend_initialized_nolock(ntfs_volume *vol)
 {
 	s64 old_data_size, old_initialized_size;
+	unsigned long flags;
 	struct inode *mftbmp_vi;
 	ntfs_inode *mft_ni, *mftbmp_ni;
 	ntfs_attr_search_ctx *ctx;
@@ -1583,7 +1598,8 @@ static int ntfs_mft_bitmap_extend_initialized_nolock(ntfs_volume *vol)
 		goto put_err_out;
 	}
 	a = ctx->attr;
-	old_data_size = mftbmp_vi->i_size;
+	write_lock_irqsave(&mftbmp_ni->size_lock, flags);
+	old_data_size = i_size_read(mftbmp_vi);
 	old_initialized_size = mftbmp_ni->initialized_size;
 	/*
 	 * We can simply update the initialized_size before filling the space
@@ -1593,11 +1609,12 @@ static int ntfs_mft_bitmap_extend_initialized_nolock(ntfs_volume *vol)
 	mftbmp_ni->initialized_size += 8;
 	a->data.non_resident.initialized_size =
 			cpu_to_sle64(mftbmp_ni->initialized_size);
-	if (mftbmp_ni->initialized_size > mftbmp_vi->i_size) {
-		mftbmp_vi->i_size = mftbmp_ni->initialized_size;
+	if (mftbmp_ni->initialized_size > old_data_size) {
+		i_size_write(mftbmp_vi, mftbmp_ni->initialized_size);
 		a->data.non_resident.data_size =
-				cpu_to_sle64(mftbmp_vi->i_size);
+				cpu_to_sle64(mftbmp_ni->initialized_size);
 	}
+	write_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
 	/* Ensure the changes make it to disk. */
 	flush_dcache_mft_record_page(ctx->ntfs_ino);
 	mark_mft_record_dirty(ctx->ntfs_ino);
@@ -1636,22 +1653,28 @@ unm_err_out:
 		goto err_out;
 	}
 	a = ctx->attr;
+	write_lock_irqsave(&mftbmp_ni->size_lock, flags);
 	mftbmp_ni->initialized_size = old_initialized_size;
 	a->data.non_resident.initialized_size =
 			cpu_to_sle64(old_initialized_size);
-	if (mftbmp_vi->i_size != old_data_size) {
-		mftbmp_vi->i_size = old_data_size;
+	if (i_size_read(mftbmp_vi) != old_data_size) {
+		i_size_write(mftbmp_vi, old_data_size);
 		a->data.non_resident.data_size = cpu_to_sle64(old_data_size);
 	}
+	write_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
 	flush_dcache_mft_record_page(ctx->ntfs_ino);
 	mark_mft_record_dirty(ctx->ntfs_ino);
 	ntfs_attr_put_search_ctx(ctx);
 	unmap_mft_record(mft_ni);
+#ifdef DEBUG
+	read_lock_irqsave(&mftbmp_ni->size_lock, flags);
 	ntfs_debug("Restored status of mftbmp: allocated_size 0x%llx, "
 			"data_size 0x%llx, initialized_size 0x%llx.",
 			(long long)mftbmp_ni->allocated_size,
-			(long long)mftbmp_vi->i_size,
+			(long long)i_size_read(mftbmp_vi),
 			(long long)mftbmp_ni->initialized_size);
+	read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
+#endif /* DEBUG */
 err_out:
 	return ret;
 }
@@ -1679,7 +1702,8 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
 {
 	LCN lcn;
 	VCN old_last_vcn;
-	s64 min_nr, nr, ll = 0;
+	s64 min_nr, nr, ll;
+	unsigned long flags;
 	ntfs_inode *mft_ni;
 	runlist_element *rl, *rl2;
 	ntfs_attr_search_ctx *ctx = NULL;
@@ -1697,8 +1721,10 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
 	 * attribute cannot be zero so we are ok to do this.
 	 * ntfs_find_vcn() returns the runlist locked on success.
 	 */
-	rl = ntfs_find_vcn(mft_ni, (mft_ni->allocated_size - 1) >>
-			vol->cluster_size_bits, TRUE);
+	read_lock_irqsave(&mft_ni->size_lock, flags);
+	ll = mft_ni->allocated_size;
+	read_unlock_irqrestore(&mft_ni->size_lock, flags);
+	rl = ntfs_find_vcn(mft_ni, (ll - 1) >> vol->cluster_size_bits, TRUE);
 	if (unlikely(IS_ERR(rl) || !rl->length || rl->lcn < 0)) {
 		ntfs_error(vol->sb, "Failed to determine last allocated "
 				"cluster of mft data attribute.");
@@ -1710,8 +1736,7 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
 		return ret;
 	}
 	lcn = rl->lcn + rl->length;
-	ntfs_debug("Last lcn of mft data attribute is 0x%llx.",
-			(long long)lcn);
+	ntfs_debug("Last lcn of mft data attribute is 0x%llx.", (long long)lcn);
 	/* Minimum allocation is one mft record worth of clusters. */
 	min_nr = vol->mft_record_size >> vol->cluster_size_bits;
 	if (!min_nr)
@@ -1721,12 +1746,13 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
 	if (!nr)
 		nr = min_nr;
 	/* Ensure we do not go above 2^32-1 mft records. */
-	if (unlikely((mft_ni->allocated_size +
-			(nr << vol->cluster_size_bits)) >>
+	read_lock_irqsave(&mft_ni->size_lock, flags);
+	ll = mft_ni->allocated_size;
+	read_unlock_irqrestore(&mft_ni->size_lock, flags);
+	if (unlikely((ll + (nr << vol->cluster_size_bits)) >>
 			vol->mft_record_size_bits >= (1ll << 32))) {
 		nr = min_nr;
-		if (unlikely((mft_ni->allocated_size +
-				(nr << vol->cluster_size_bits)) >>
+		if (unlikely((ll + (nr << vol->cluster_size_bits)) >>
 				vol->mft_record_size_bits >= (1ll << 32))) {
 			ntfs_warning(vol->sb, "Cannot allocate mft record "
 					"because the maximum number of inodes "
@@ -1875,9 +1901,11 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
 		}
 		a = ctx->attr;
 	}
+	write_lock_irqsave(&mft_ni->size_lock, flags);
 	mft_ni->allocated_size += nr << vol->cluster_size_bits;
 	a->data.non_resident.allocated_size =
 			cpu_to_sle64(mft_ni->allocated_size);
+	write_unlock_irqrestore(&mft_ni->size_lock, flags);
 	/* Ensure the changes make it to disk. */
 	flush_dcache_mft_record_page(ctx->ntfs_ino);
 	mark_mft_record_dirty(ctx->ntfs_ino);
@@ -1892,7 +1920,9 @@ restore_undo_alloc:
 			CASE_SENSITIVE, rl[1].vcn, NULL, 0, ctx)) {
 		ntfs_error(vol->sb, "Failed to find last attribute extent of "
 				"mft data attribute.%s", es);
+		write_lock_irqsave(&mft_ni->size_lock, flags);
 		mft_ni->allocated_size += nr << vol->cluster_size_bits;
+		write_unlock_irqrestore(&mft_ni->size_lock, flags);
 		ntfs_attr_put_search_ctx(ctx);
 		unmap_mft_record(mft_ni);
 		up_write(&mft_ni->runlist.lock);
@@ -2036,6 +2066,7 @@ static int ntfs_mft_record_layout(const ntfs_volume *vol, const s64 mft_no,
  */
 static int ntfs_mft_record_format(const ntfs_volume *vol, const s64 mft_no)
 {
+	loff_t i_size;
 	struct inode *mft_vi = vol->mft_ino;
 	struct page *page;
 	MFT_RECORD *m;
@@ -2051,10 +2082,11 @@ static int ntfs_mft_record_format(const ntfs_volume *vol, const s64 mft_no)
 	index = mft_no << vol->mft_record_size_bits >> PAGE_CACHE_SHIFT;
 	ofs = (mft_no << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK;
 	/* The maximum valid index into the page cache for $MFT's data. */
-	end_index = mft_vi->i_size >> PAGE_CACHE_SHIFT;
+	i_size = i_size_read(mft_vi);
+	end_index = i_size >> PAGE_CACHE_SHIFT;
 	if (unlikely(index >= end_index)) {
 		if (unlikely(index > end_index || ofs + vol->mft_record_size >=
-				(mft_vi->i_size & ~PAGE_CACHE_MASK))) {
+				(i_size & ~PAGE_CACHE_MASK))) {
 			ntfs_error(vol->sb, "Tried to format non-existing mft "
 					"record 0x%llx.", (long long)mft_no);
 			return -ENOENT;
@@ -2188,6 +2220,7 @@ ntfs_inode *ntfs_mft_record_alloc(ntfs_volume *vol, const int mode,
 		ntfs_inode *base_ni, MFT_RECORD **mrec)
 {
 	s64 ll, bit, old_data_initialized, old_data_size;
+	unsigned long flags;
 	struct inode *vi;
 	struct page *page;
 	ntfs_inode *mft_ni, *mftbmp_ni, *ni;
@@ -2237,9 +2270,13 @@ ntfs_inode *ntfs_mft_record_alloc(ntfs_volume *vol, const int mode,
 	 * the first 24 mft records as they are special and whilst they may not
 	 * be in use, we do not allocate from them.
 	 */
+	read_lock_irqsave(&mft_ni->size_lock, flags);
 	ll = mft_ni->initialized_size >> vol->mft_record_size_bits;
-	if (mftbmp_ni->initialized_size << 3 > ll &&
-			mftbmp_ni->initialized_size > 3) {
+	read_unlock_irqrestore(&mft_ni->size_lock, flags);
+	read_lock_irqsave(&mftbmp_ni->size_lock, flags);
+	old_data_initialized = mftbmp_ni->initialized_size;
+	read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
+	if (old_data_initialized << 3 > ll && old_data_initialized > 3) {
 		bit = ll;
 		if (bit < 24)
 			bit = 24;
@@ -2254,15 +2291,18 @@ ntfs_inode *ntfs_mft_record_alloc(ntfs_volume *vol, const int mode,
 	 * mft record that we can allocate.
 	 * Note: The smallest mft record we allocate is mft record 24.
 	 */
-	bit = mftbmp_ni->initialized_size << 3;
+	bit = old_data_initialized << 3;
 	if (unlikely(bit >= (1ll << 32)))
 		goto max_err_out;
+	read_lock_irqsave(&mftbmp_ni->size_lock, flags);
+	old_data_size = mftbmp_ni->allocated_size;
 	ntfs_debug("Status of mftbmp before extension: allocated_size 0x%llx, "
 			"data_size 0x%llx, initialized_size 0x%llx.",
-			(long long)mftbmp_ni->allocated_size,
-			(long long)vol->mftbmp_ino->i_size,
-			(long long)mftbmp_ni->initialized_size);
-	if (mftbmp_ni->initialized_size + 8 > mftbmp_ni->allocated_size) {
+			(long long)old_data_size,
+			(long long)i_size_read(vol->mftbmp_ino),
+			(long long)old_data_initialized);
+	read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
+	if (old_data_initialized + 8 > old_data_size) {
 		/* Need to extend bitmap by one more cluster. */
 		ntfs_debug("mftbmp: initialized_size + 8 > allocated_size.");
 		err = ntfs_mft_bitmap_extend_allocation_nolock(vol);
@@ -2270,12 +2310,16 @@ ntfs_inode *ntfs_mft_record_alloc(ntfs_volume *vol, const int mode,
 			up_write(&vol->mftbmp_lock);
 			goto err_out;
 		}
+#ifdef DEBUG
+		read_lock_irqsave(&mftbmp_ni->size_lock, flags);
 		ntfs_debug("Status of mftbmp after allocation extension: "
 				"allocated_size 0x%llx, data_size 0x%llx, "
 				"initialized_size 0x%llx.",
 				(long long)mftbmp_ni->allocated_size,
-				(long long)vol->mftbmp_ino->i_size,
+				(long long)i_size_read(vol->mftbmp_ino),
 				(long long)mftbmp_ni->initialized_size);
+		read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
+#endif /* DEBUG */
 	}
 	/*
 	 * We now have sufficient allocated space, extend the initialized_size
@@ -2287,12 +2331,16 @@ ntfs_inode *ntfs_mft_record_alloc(ntfs_volume *vol, const int mode,
 		up_write(&vol->mftbmp_lock);
 		goto err_out;
 	}
+#ifdef DEBUG
+	read_lock_irqsave(&mftbmp_ni->size_lock, flags);
 	ntfs_debug("Status of mftbmp after initialized extention: "
 			"allocated_size 0x%llx, data_size 0x%llx, "
 			"initialized_size 0x%llx.",
 			(long long)mftbmp_ni->allocated_size,
-			(long long)vol->mftbmp_ino->i_size,
+			(long long)i_size_read(vol->mftbmp_ino),
 			(long long)mftbmp_ni->initialized_size);
+	read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
+#endif /* DEBUG */
 	ntfs_debug("Found free record (#3), bit 0x%llx.", (long long)bit);
 found_free_rec:
 	/* @bit is the found free mft record, allocate it in the mft bitmap. */
@@ -2314,7 +2362,10 @@ have_alloc_rec:
 	 * parallel allocation could allocate the same mft record as this one.
 	 */
 	ll = (bit + 1) << vol->mft_record_size_bits;
-	if (ll <= mft_ni->initialized_size) {
+	read_lock_irqsave(&mft_ni->size_lock, flags);
+	old_data_initialized = mft_ni->initialized_size;
+	read_unlock_irqrestore(&mft_ni->size_lock, flags);
+	if (ll <= old_data_initialized) {
 		ntfs_debug("Allocated mft record already initialized.");
 		goto mft_rec_already_initialized;
 	}
@@ -2325,25 +2376,32 @@ have_alloc_rec:
 	 * actually traversed more than once when a freshly formatted volume is
 	 * first written to so it optimizes away nicely in the common case.
 	 */
+	read_lock_irqsave(&mft_ni->size_lock, flags);
+	old_data_size = mft_ni->allocated_size;
 	ntfs_debug("Status of mft data before extension: "
 			"allocated_size 0x%llx, data_size 0x%llx, "
 			"initialized_size 0x%llx.",
-			(long long)mft_ni->allocated_size,
-			(long long)vol->mft_ino->i_size,
+			(long long)old_data_size,
+			(long long)i_size_read(vol->mft_ino),
 			(long long)mft_ni->initialized_size);
-	while (ll > mft_ni->allocated_size) {
+	read_unlock_irqrestore(&mft_ni->size_lock, flags);
+	while (ll > old_data_size) {
 		err = ntfs_mft_data_extend_allocation_nolock(vol);
 		if (unlikely(err)) {
 			ntfs_error(vol->sb, "Failed to extend mft data "
 					"allocation.");
 			goto undo_mftbmp_alloc_nolock;
 		}
+#ifdef DEBUG
+		read_lock_irqsave(&mft_ni->size_lock, flags);
 		ntfs_debug("Status of mft data after allocation extension: "
 				"allocated_size 0x%llx, data_size 0x%llx, "
 				"initialized_size 0x%llx.",
 				(long long)mft_ni->allocated_size,
-				(long long)vol->mft_ino->i_size,
+				(long long)i_size_read(vol->mft_ino),
 				(long long)mft_ni->initialized_size);
+		read_unlock_irqrestore(&mft_ni->size_lock, flags);
+#endif /* DEBUG */
 	}
 	/*
 	 * Extend mft data initialized size (and data size of course) to reach
@@ -2352,6 +2410,7 @@ have_alloc_rec:
 	 * needed by ntfs_mft_record_format().  We will update the attribute
 	 * record itself in one fell swoop later on.
 	 */
+	write_lock_irqsave(&mft_ni->size_lock, flags);
 	old_data_initialized = mft_ni->initialized_size;
 	old_data_size = vol->mft_ino->i_size;
 	while (ll > mft_ni->initialized_size) {
@@ -2360,8 +2419,9 @@ have_alloc_rec:
 		new_initialized_size = mft_ni->initialized_size +
 				vol->mft_record_size;
 		mft_no = mft_ni->initialized_size >> vol->mft_record_size_bits;
-		if (new_initialized_size > vol->mft_ino->i_size)
-			vol->mft_ino->i_size = new_initialized_size;
+		if (new_initialized_size > i_size_read(vol->mft_ino))
+			i_size_write(vol->mft_ino, new_initialized_size);
+		write_unlock_irqrestore(&mft_ni->size_lock, flags);
 		ntfs_debug("Initializing mft record 0x%llx.",
 				(long long)mft_no);
 		err = ntfs_mft_record_format(vol, mft_no);
@@ -2369,8 +2429,10 @@ have_alloc_rec:
 			ntfs_error(vol->sb, "Failed to format mft record.");
 			goto undo_data_init;
 		}
+		write_lock_irqsave(&mft_ni->size_lock, flags);
 		mft_ni->initialized_size = new_initialized_size;
 	}
+	write_unlock_irqrestore(&mft_ni->size_lock, flags);
 	record_formatted = TRUE;
 	/* Update the mft data attribute record to reflect the new sizes. */
 	m = map_mft_record(mft_ni);
@@ -2396,22 +2458,27 @@ have_alloc_rec:
 		goto undo_data_init;
 	}
 	a = ctx->attr;
+	read_lock_irqsave(&mft_ni->size_lock, flags);
 	a->data.non_resident.initialized_size =
 			cpu_to_sle64(mft_ni->initialized_size);
-	a->data.non_resident.data_size = cpu_to_sle64(vol->mft_ino->i_size);
+	a->data.non_resident.data_size =
+			cpu_to_sle64(i_size_read(vol->mft_ino));
+	read_unlock_irqrestore(&mft_ni->size_lock, flags);
 	/* Ensure the changes make it to disk. */
 	flush_dcache_mft_record_page(ctx->ntfs_ino);
 	mark_mft_record_dirty(ctx->ntfs_ino);
 	ntfs_attr_put_search_ctx(ctx);
 	unmap_mft_record(mft_ni);
+	read_lock_irqsave(&mft_ni->size_lock, flags);
 	ntfs_debug("Status of mft data after mft record initialization: "
 			"allocated_size 0x%llx, data_size 0x%llx, "
 			"initialized_size 0x%llx.",
 			(long long)mft_ni->allocated_size,
-			(long long)vol->mft_ino->i_size,
+			(long long)i_size_read(vol->mft_ino),
 			(long long)mft_ni->initialized_size);
-	BUG_ON(vol->mft_ino->i_size > mft_ni->allocated_size);
-	BUG_ON(mft_ni->initialized_size > vol->mft_ino->i_size);
+	BUG_ON(i_size_read(vol->mft_ino) > mft_ni->allocated_size);
+	BUG_ON(mft_ni->initialized_size > i_size_read(vol->mft_ino));
+	read_unlock_irqrestore(&mft_ni->size_lock, flags);
 mft_rec_already_initialized:
 	/*
 	 * We can finally drop the mft bitmap lock as the mft data attribute
@@ -2652,8 +2719,10 @@ mft_rec_already_initialized:
 	*mrec = m;
 	return ni;
 undo_data_init:
+	write_lock_irqsave(&mft_ni->size_lock, flags);
 	mft_ni->initialized_size = old_data_initialized;
-	vol->mft_ino->i_size = old_data_size;
+	i_size_write(vol->mft_ino, old_data_size);
+	write_unlock_irqrestore(&mft_ni->size_lock, flags);
 	goto undo_mftbmp_alloc_nolock;
 undo_mftbmp_alloc:
 	down_write(&vol->mftbmp_lock);

From 149f0c5200188a43f1fc11ca2fb14d8183013d10 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Wed, 12 Jan 2005 13:52:30 +0000
Subject: [PATCH 010/113] NTFS: Repeat a failed ntfs_truncate() in
 fs/ntfs/aops.c::ntfs_writepage()       and abort if it fails again.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/aops.c | 31 +++++++++++++++++++++----------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index a53212793809..ac65806ee515 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -1237,19 +1237,30 @@ done:
 static int ntfs_writepage(struct page *page, struct writeback_control *wbc)
 {
 	loff_t i_size;
-	struct inode *vi;
-	ntfs_inode *ni, *base_ni;
+	struct inode *vi = page->mapping->host;
+	ntfs_inode *base_ni = NULL, *ni = NTFS_I(vi);
 	char *kaddr;
-	ntfs_attr_search_ctx *ctx;
-	MFT_RECORD *m;
+	ntfs_attr_search_ctx *ctx = NULL;
+	MFT_RECORD *m = NULL;
 	u32 attr_len;
 	int err;
 
 	BUG_ON(!PageLocked(page));
-
-	vi = page->mapping->host;
+	/*
+	 * If a previous ntfs_truncate() failed, repeat it and abort if it
+	 * fails again.
+	 */
+	if (unlikely(NInoTruncateFailed(ni))) {
+		down_write(&vi->i_alloc_sem);
+		err = ntfs_truncate(vi);
+		up_write(&vi->i_alloc_sem);
+		if (err || NInoTruncateFailed(ni)) {
+			if (!err)
+				err = -EIO;
+			goto err_out;
+		}
+	}
 	i_size = i_size_read(vi);
-
 	/* Is the page fully outside i_size? (truncate in progress) */
 	if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >>
 			PAGE_CACHE_SHIFT)) {
@@ -1262,8 +1273,6 @@ static int ntfs_writepage(struct page *page, struct writeback_control *wbc)
 		ntfs_debug("Write outside i_size - truncated?");
 		return 0;
 	}
-	ni = NTFS_I(vi);
-
 	/* NInoNonResident() == NInoIndexAllocPresent() */
 	if (NInoNonResident(ni)) {
 		/*
@@ -1419,8 +1428,10 @@ err_out:
 		err = 0;
 	} else {
 		ntfs_error(vi->i_sb, "Resident attribute write failed with "
-				"error %i.  Setting page error flag.", err);
+				"error %i.", err);
 		SetPageError(page);
+		NVolSetErrors(ni->vol);
+		make_bad_inode(vi);
 	}
 	unlock_page(page);
 	if (ctx)

From 3834c3f227725e2395840aed82342bda4ee9d379 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Thu, 13 Jan 2005 11:04:39 +0000
Subject: [PATCH 011/113] NTFS: Fix stupid bug in fs/ntfs/mft.c introduced in
 last changeset.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/mft.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 20011e02f5b6..4e0bf39426cf 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -2377,22 +2377,20 @@ have_alloc_rec:
 	 * first written to so it optimizes away nicely in the common case.
 	 */
 	read_lock_irqsave(&mft_ni->size_lock, flags);
-	old_data_size = mft_ni->allocated_size;
 	ntfs_debug("Status of mft data before extension: "
 			"allocated_size 0x%llx, data_size 0x%llx, "
 			"initialized_size 0x%llx.",
-			(long long)old_data_size,
+			(long long)mft_ni->allocated_size,
 			(long long)i_size_read(vol->mft_ino),
 			(long long)mft_ni->initialized_size);
-	read_unlock_irqrestore(&mft_ni->size_lock, flags);
-	while (ll > old_data_size) {
+	while (ll > mft_ni->allocated_size) {
+		read_unlock_irqrestore(&mft_ni->size_lock, flags);
 		err = ntfs_mft_data_extend_allocation_nolock(vol);
 		if (unlikely(err)) {
 			ntfs_error(vol->sb, "Failed to extend mft data "
 					"allocation.");
 			goto undo_mftbmp_alloc_nolock;
 		}
-#ifdef DEBUG
 		read_lock_irqsave(&mft_ni->size_lock, flags);
 		ntfs_debug("Status of mft data after allocation extension: "
 				"allocated_size 0x%llx, data_size 0x%llx, "
@@ -2400,9 +2398,8 @@ have_alloc_rec:
 				(long long)mft_ni->allocated_size,
 				(long long)i_size_read(vol->mft_ino),
 				(long long)mft_ni->initialized_size);
-		read_unlock_irqrestore(&mft_ni->size_lock, flags);
-#endif /* DEBUG */
 	}
+	read_unlock_irqrestore(&mft_ni->size_lock, flags);
 	/*
 	 * Extend mft data initialized size (and data size of course) to reach
 	 * the allocated mft record, formatting the mft records allong the way.

From 946929d813a3bde095678526dd037ab9ac6efe35 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Thu, 13 Jan 2005 15:26:29 +0000
Subject: [PATCH 012/113] NTFS: Fixup the resident attribute resizing code in  
     fs/ntfs/aops.c::ntfs_{prepare,commit}_write()() and re-enable it.      
 It should be safe now.  (Famous last words...)

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog |  5 +++++
 fs/ntfs/aops.c    | 21 ++++++++++++++-------
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 051c6818aad8..8a249df2b5c2 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -51,6 +51,11 @@ ToDo/Notes:
 	  value afterwards when reading the size of the bitmap inode.
 	- Use i_size_{read,write}() in fs/ntfs/{aops.c,mft.c} and protect
 	  access to the i_size and other size fields using the size_lock.
+	- Implement extension of resident files in the regular file write code
+	  paths (fs/ntfs/aops.c::ntfs_{prepare,commit}_write()).  At present
+	  this only works until the data attribute becomes too big for the mft
+	  record after which we abort the write returning -EOPNOTSUPP from
+	  ntfs_prepare_write().
 
 2.1.22 - Many bug and race fixes and error handling improvements.
 
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index ac65806ee515..92215228eeab 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -872,6 +872,7 @@ static int ntfs_write_mst_block(struct page *page,
 		if (likely(block < rec_block)) {
 			if (unlikely(block >= dblock)) {
 				clear_buffer_dirty(bh);
+				set_buffer_uptodate(bh);
 				continue;
 			}
 			/*
@@ -1830,6 +1831,7 @@ static int ntfs_prepare_write(struct file *file, struct page *page,
 		unsigned from, unsigned to)
 {
 	s64 new_size;
+	unsigned long flags;
 	struct inode *vi = page->mapping->host;
 	ntfs_inode *base_ni = NULL, *ni = NTFS_I(vi);
 	ntfs_volume *vol = ni->vol;
@@ -1903,12 +1905,6 @@ static int ntfs_prepare_write(struct file *file, struct page *page,
 	/* If we do not need to resize the attribute allocation we are done. */
 	if (new_size <= i_size_read(vi))
 		goto done;
-
-	// FIXME: We abort for now as this code is not safe.
-	ntfs_error(vi->i_sb, "Changing the file size is not supported yet.  "
-			"Sorry.");
-	return -EOPNOTSUPP;
-
 	/* Map, pin, and lock the (base) mft record. */
 	if (!NInoAttr(ni))
 		base_ni = ni;
@@ -1937,7 +1933,17 @@ static int ntfs_prepare_write(struct file *file, struct page *page,
 	a = ctx->attr;
 	/* The total length of the attribute value. */
 	attr_len = le32_to_cpu(a->data.resident.value_length);
-	BUG_ON(i_size_read(vi) != attr_len);
+	/* Fix an eventual previous failure of ntfs_commit_write(). */
+	read_lock_irqsave(&ni->size_lock, flags);
+	if (unlikely(ni->initialized_size < attr_len)) {
+		attr_len = ni->initialized_size;
+		a->data.resident.value_length = cpu_to_le32(attr_len);
+		BUG_ON(attr_len < i_size_read(vi));
+	}
+	read_unlock_irqrestore(&ni->size_lock, flags);
+	/* If we do not need to resize the attribute allocation we are done. */
+	if (new_size <= attr_len)
+		goto done_unm;
 	/* Check if new size is allowed in $AttrDef. */
 	err = ntfs_attr_size_bounds_check(vol, ni->type, new_size);
 	if (unlikely(err)) {
@@ -1995,6 +2001,7 @@ static int ntfs_prepare_write(struct file *file, struct page *page,
 	}
 	flush_dcache_mft_record_page(ctx->ntfs_ino);
 	mark_mft_record_dirty(ctx->ntfs_ino);
+done_unm:
 	ntfs_attr_put_search_ctx(ctx);
 	unmap_mft_record(base_ni);
 	/*

From f40661be038ce6ed9ef6a8b80307a9153bd95769 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Thu, 13 Jan 2005 16:03:38 +0000
Subject: [PATCH 013/113] NTFS: Optimise/reorganise some error handling code in
 fs/ntfs/aops.c.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/aops.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 92215228eeab..812d53e93354 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -1392,14 +1392,11 @@ static int ntfs_writepage(struct page *page, struct writeback_control *wbc)
 
 	attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
 	i_size = i_size_read(vi);
-	kaddr = kmap_atomic(page, KM_USER0);
 	if (unlikely(attr_len > i_size)) {
-		/* Zero out of bounds area in the mft record. */
-		memset((u8*)ctx->attr + le16_to_cpu(
-				ctx->attr->data.resident.value_offset) +
-				i_size, 0, attr_len - i_size);
 		attr_len = i_size;
+		ctx->attr->data.resident.value_length = cpu_to_le32(attr_len);
 	}
+	kaddr = kmap_atomic(page, KM_USER0);
 	/* Copy the data from the page to the mft record. */
 	memcpy((u8*)ctx->attr +
 			le16_to_cpu(ctx->attr->data.resident.value_offset),
@@ -1831,7 +1828,7 @@ static int ntfs_prepare_write(struct file *file, struct page *page,
 		unsigned from, unsigned to)
 {
 	s64 new_size;
-	unsigned long flags;
+	loff_t i_size;
 	struct inode *vi = page->mapping->host;
 	ntfs_inode *base_ni = NULL, *ni = NTFS_I(vi);
 	ntfs_volume *vol = ni->vol;
@@ -1934,13 +1931,11 @@ static int ntfs_prepare_write(struct file *file, struct page *page,
 	/* The total length of the attribute value. */
 	attr_len = le32_to_cpu(a->data.resident.value_length);
 	/* Fix an eventual previous failure of ntfs_commit_write(). */
-	read_lock_irqsave(&ni->size_lock, flags);
-	if (unlikely(ni->initialized_size < attr_len)) {
-		attr_len = ni->initialized_size;
+	i_size = i_size_read(vi);
+	if (unlikely(attr_len > i_size)) {
+		attr_len = i_size;
 		a->data.resident.value_length = cpu_to_le32(attr_len);
-		BUG_ON(attr_len < i_size_read(vi));
 	}
-	read_unlock_irqrestore(&ni->size_lock, flags);
 	/* If we do not need to resize the attribute allocation we are done. */
 	if (new_size <= attr_len)
 		goto done_unm;

From c002f42543e155dd2b5b5039ea2637ab26c82513 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Thu, 3 Feb 2005 12:02:56 +0000
Subject: [PATCH 014/113] NTFS: - Add disable_sparse mount option together with
 a per volume sparse 	enable bit which is set appropriately and a per inode
 sparse disable 	bit which is preset on some system file inodes as
 appropriate.       - Enforce that sparse support is disabled on NTFS volumes
 pre 3.0.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 Documentation/filesystems/ntfs.txt | 11 ++++-
 fs/ntfs/ChangeLog                  | 10 +++--
 fs/ntfs/dir.c                      |  4 +-
 fs/ntfs/inode.c                    |  5 ++-
 fs/ntfs/inode.h                    |  4 +-
 fs/ntfs/layout.h                   |  4 +-
 fs/ntfs/super.c                    | 65 +++++++++++++++++++++---------
 fs/ntfs/sysctl.c                   |  4 +-
 fs/ntfs/volume.h                   |  6 ++-
 9 files changed, 80 insertions(+), 33 deletions(-)

diff --git a/Documentation/filesystems/ntfs.txt b/Documentation/filesystems/ntfs.txt
index f89b440fad1d..cb3cb8c06e9d 100644
--- a/Documentation/filesystems/ntfs.txt
+++ b/Documentation/filesystems/ntfs.txt
@@ -21,7 +21,7 @@ Overview
 ========
 
 Linux-NTFS comes with a number of user-space programs known as ntfsprogs.
-These include mkntfs, a full-featured ntfs file system format utility,
+These include mkntfs, a full-featured ntfs filesystem format utility,
 ntfsundelete used for recovering files that were unintentionally deleted
 from an NTFS volume and ntfsresize which is used to resize an NTFS partition.
 See the web site for more information.
@@ -149,7 +149,14 @@ case_sensitive=<BOOL>	If case_sensitive is specified, treat all file names as
 			name, if it exists.  If case_sensitive, you will need
 			to provide the correct case of the short file name.
 
-errors=opt		What to do when critical file system errors are found.
+disable_sparse=<BOOL>	If disable_sparse is specified, creation of sparse
+			regions, i.e. holes, inside files is disabled for the
+			volume (for the duration of this mount only).  By
+			default, creation of sparse regions is enabled, which
+			is consistent with the behaviour of traditional Unix
+			filesystems.
+
+errors=opt		What to do when critical filesystem errors are found.
 			Following values can be used for "opt":
 			  continue: DEFAULT, try to clean-up as much as
 				    possible, e.g. marking a corrupt inode as
diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 8a249df2b5c2..3f78fff45448 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -56,6 +56,10 @@ ToDo/Notes:
 	  this only works until the data attribute becomes too big for the mft
 	  record after which we abort the write returning -EOPNOTSUPP from
 	  ntfs_prepare_write().
+	- Add disable_sparse mount option together with a per volume sparse
+	  enable bit which is set appropriately and a per inode sparse disable
+	  bit which is preset on some system file inodes as appropriate.
+	- Enforce that sparse support is disabled on NTFS volumes pre 3.0.
 
 2.1.22 - Many bug and race fixes and error handling improvements.
 
@@ -1062,7 +1066,7 @@ tng-0.0.8 - 08/03/2002 - Now using BitKeeper, http://linux-ntfs.bkbits.net/
 	- Further runlist merging work. (Richard Russon)
 	- Backwards compatibility for gcc-2.95. (Richard Russon)
 	- Update to kernel 2.5.5-pre1 and rediff the now tiny patch.
-	- Convert to new file system declaration using ->ntfs_get_sb() and
+	- Convert to new filesystem declaration using ->ntfs_get_sb() and
 	  replacing ntfs_read_super() with ntfs_fill_super().
 	- Set s_maxbytes to MAX_LFS_FILESIZE to avoid page cache page index
 	  overflow on 32-bit architectures.
@@ -1358,7 +1362,7 @@ tng-0.0.1 - The first useful version.
 	The driver is now actually useful! Yey. (-: It undoubtedly has got bugs
 	though and it doesn't implement accesssing compressed files yet. Also,
 	accessing files with attribute list attributes is not implemented yet
-	either. But for small or simple file systems it should work and allow
+	either. But for small or simple filesystems it should work and allow
 	you to list directories, use stat on directory entries and the file
 	system, open, read, mmap and llseek around in files. A big mile stone
 	has been reached!
@@ -1366,7 +1370,7 @@ tng-0.0.1 - The first useful version.
 tng-0.0.0 - Initial version tag.
 
 	Initial driver implementation. The driver can mount and umount simple
-	NTFS file systems (i.e. ones without attribute lists in the system
+	NTFS filesystems (i.e. ones without attribute lists in the system
 	files). If the mount fails there might be problems in the error handling
 	code paths, so be warned. Otherwise it seems to be loading the system
 	files nicely and the mft record read mapping/unmapping seems to be
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index d261ac4596f5..95c44577160c 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -1,7 +1,7 @@
 /**
  * dir.c - NTFS kernel directory operations. Part of the Linux-NTFS project.
  *
- * Copyright (c) 2001-2004 Anton Altaparmakov
+ * Copyright (c) 2001-2005 Anton Altaparmakov
  * Copyright (c) 2002 Richard Russon
  *
  * This program/include file is free software; you can redistribute it and/or
@@ -610,7 +610,7 @@ dir_err_out:
 // TODO: (AIA)
 // The algorithm embedded in this code will be required for the time when we
 // want to support adding of entries to directories, where we require correct
-// collation of file names in order not to cause corruption of the file system.
+// collation of file names in order not to cause corruption of the filesystem.
 
 /**
  * ntfs_lookup_inode_by_name - find an inode in a directory given its name
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 6c631dbe0f4c..372c7fcdc068 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -1,7 +1,7 @@
 /**
  * inode.c - NTFS kernel inode handling. Part of the Linux-NTFS project.
  *
- * Copyright (c) 2001-2004 Anton Altaparmakov
+ * Copyright (c) 2001-2005 Anton Altaparmakov
  *
  * This program/include file is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as published
@@ -1731,6 +1731,7 @@ int ntfs_read_inode_mount(struct inode *vi)
 	/* Setup the data attribute. It is special as it is mst protected. */
 	NInoSetNonResident(ni);
 	NInoSetMstProtected(ni);
+	NInoSetSparseDisabled(ni);
 	ni->type = AT_DATA;
 	ni->name = NULL;
 	ni->name_len = 0;
@@ -2279,6 +2280,8 @@ int ntfs_show_options(struct seq_file *sf, struct vfsmount *mnt)
 		seq_printf(sf, ",case_sensitive");
 	if (NVolShowSystemFiles(vol))
 		seq_printf(sf, ",show_sys_files");
+	if (!NVolSparseEnabled(vol))
+		seq_printf(sf, ",disable_sparse");
 	for (i = 0; on_errors_arr[i].val; i++) {
 		if (on_errors_arr[i].val & vol->on_errors)
 			seq_printf(sf, ",errors=%s", on_errors_arr[i].str);
diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h
index 56b2d6ec33f7..6eb99777a722 100644
--- a/fs/ntfs/inode.h
+++ b/fs/ntfs/inode.h
@@ -2,7 +2,7 @@
  * inode.h - Defines for inode structures NTFS Linux kernel driver. Part of
  *	     the Linux-NTFS project.
  *
- * Copyright (c) 2001-2004 Anton Altaparmakov
+ * Copyright (c) 2001-2005 Anton Altaparmakov
  * Copyright (c) 2002 Richard Russon
  *
  * This program/include file is free software; you can redistribute it and/or
@@ -166,6 +166,7 @@ typedef enum {
 	NI_Sparse,		/* 1: Unnamed data attr is sparse (f).
 				   1: Create sparse files by default (d).
 				   1: Attribute is sparse (a). */
+	NI_SparseDisabled,	/* 1: May not create sparse regions. */
 	NI_TruncateFailed,	/* 1: Last ntfs_truncate() call failed. */
 } ntfs_inode_state_bits;
 
@@ -218,6 +219,7 @@ NINO_FNS(IndexAllocPresent)
 NINO_FNS(Compressed)
 NINO_FNS(Encrypted)
 NINO_FNS(Sparse)
+NINO_FNS(SparseDisabled)
 NINO_FNS(TruncateFailed)
 
 /*
diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h
index 47b338999921..7cb8806d4559 100644
--- a/fs/ntfs/layout.h
+++ b/fs/ntfs/layout.h
@@ -2,7 +2,7 @@
  * layout.h - All NTFS associated on-disk structures. Part of the Linux-NTFS
  *	      project.
  *
- * Copyright (c) 2001-2004 Anton Altaparmakov
+ * Copyright (c) 2001-2005 Anton Altaparmakov
  * Copyright (c) 2002 Richard Russon
  *
  * This program/include file is free software; you can redistribute it and/or
@@ -834,7 +834,7 @@ enum {
 	/* Note, this is a copy of the corresponding bit from the mft record,
 	   telling us whether this file has a view index present (eg. object id
 	   index, quota index, one of the security indexes or the encrypting
-	   file system related indexes). */
+	   filesystem related indexes). */
 };
 
 typedef le32 FILE_ATTR_FLAGS;
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 0ad0c51e9eb9..b676cf6fd93c 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -1,7 +1,7 @@
 /*
  * super.c - NTFS kernel super block handling. Part of the Linux-NTFS project.
  *
- * Copyright (c) 2001-2004 Anton Altaparmakov
+ * Copyright (c) 2001-2005 Anton Altaparmakov
  * Copyright (c) 2001,2002 Richard Russon
  *
  * This program/include file is free software; you can redistribute it and/or
@@ -41,7 +41,7 @@
 #include "malloc.h"
 #include "ntfs.h"
 
-/* Number of mounted file systems which have compression enabled. */
+/* Number of mounted filesystems which have compression enabled. */
 static unsigned long ntfs_nr_compression_users;
 
 /* A global default upcase table and a corresponding reference count. */
@@ -102,7 +102,7 @@ static BOOL parse_options(ntfs_volume *vol, char *opt)
 	gid_t gid = (gid_t)-1;
 	mode_t fmask = (mode_t)-1, dmask = (mode_t)-1;
 	int mft_zone_multiplier = -1, on_errors = -1;
-	int show_sys_files = -1, case_sensitive = -1;
+	int show_sys_files = -1, case_sensitive = -1, disable_sparse = -1;
 	struct nls_table *nls_map = NULL, *old_nls;
 
 	/* I am lazy... (-8 */
@@ -162,6 +162,7 @@ static BOOL parse_options(ntfs_volume *vol, char *opt)
 		else NTFS_GETOPT_WITH_DEFAULT("sloppy", sloppy, TRUE)
 		else NTFS_GETOPT_BOOL("show_sys_files", show_sys_files)
 		else NTFS_GETOPT_BOOL("case_sensitive", case_sensitive)
+		else NTFS_GETOPT_BOOL("disable_sparse", disable_sparse)
 		else NTFS_GETOPT_OPTIONS_ARRAY("errors", on_errors,
 				on_errors_arr)
 		else if (!strcmp(p, "posix") || !strcmp(p, "show_inodes"))
@@ -291,6 +292,21 @@ no_mount_options:
 		else
 			NVolClearCaseSensitive(vol);
 	}
+	if (disable_sparse != -1) {
+		if (disable_sparse)
+			NVolClearSparseEnabled(vol);
+		else {
+			if (!NVolSparseEnabled(vol) &&
+					vol->major_ver && vol->major_ver < 3)
+				ntfs_warning(vol->sb, "Not enabling sparse "
+						"support due to NTFS volume "
+						"version %i.%i (need at least "
+						"version 3.0).", vol->major_ver,
+						vol->minor_ver);
+			else
+				NVolSetSparseEnabled(vol);
+		}
+	}
 	return TRUE;
 needs_arg:
 	ntfs_error(vol->sb, "The %s option requires an argument.", p);
@@ -967,6 +983,7 @@ static BOOL load_and_init_mft_mirror(ntfs_volume *vol)
 	tmp_ni = NTFS_I(tmp_ino);
 	/* The $MFTMirr, like the $MFT is multi sector transfer protected. */
 	NInoSetMstProtected(tmp_ni);
+	NInoSetSparseDisabled(tmp_ni);
 	/*
 	 * Set up our little cheat allowing us to reuse the async read io
 	 * completion handler for directories.
@@ -1122,6 +1139,7 @@ static BOOL load_and_check_logfile(ntfs_volume *vol)
 		/* ntfs_check_logfile() will have displayed error output. */
 		return FALSE;
 	}
+	NInoSetSparseDisabled(NTFS_I(tmp_ino));
 	vol->logfile_ino = tmp_ino;
 	ntfs_debug("Done.");
 	return TRUE;
@@ -1220,6 +1238,7 @@ static BOOL load_and_init_attrdef(ntfs_volume *vol)
 			iput(ino);
 		goto failed;
 	}
+	NInoSetSparseDisabled(NTFS_I(ino));
 	/* The size of FILE_AttrDef must be above 0 and fit inside 31 bits. */
 	i_size = i_size_read(ino);
 	if (i_size <= 0 || i_size > 0x7fffffff)
@@ -1439,6 +1458,7 @@ static BOOL load_system_files(ntfs_volume *vol)
 			iput(vol->lcnbmp_ino);
 		goto bitmap_failed;
 	}
+	NInoSetSparseDisabled(NTFS_I(vol->lcnbmp_ino));
 	if ((vol->nr_clusters + 7) >> 3 > i_size_read(vol->lcnbmp_ino)) {
 		iput(vol->lcnbmp_ino);
 bitmap_failed:
@@ -1490,6 +1510,12 @@ get_ctx_vol_failed:
 	unmap_mft_record(NTFS_I(vol->vol_ino));
 	printk(KERN_INFO "NTFS volume version %i.%i.\n", vol->major_ver,
 			vol->minor_ver);
+	if (vol->major_ver < 3 && NVolSparseEnabled(vol)) {
+		ntfs_warning(vol->sb, "Disabling sparse support due to NTFS "
+				"volume version %i.%i (need at least version "
+				"3.0).", vol->major_ver, vol->minor_ver);
+		NVolClearSparseEnabled(vol);
+	}
 #ifdef NTFS_RW
 	/* Make sure that no unsupported volume flags are set. */
 	if (vol->vol_flags & VOLUME_MUST_MOUNT_RO_MASK) {
@@ -2033,7 +2059,7 @@ static s64 get_nr_free_clusters(ntfs_volume *vol)
 /**
  * __get_nr_free_mft_records - return the number of free inodes on a volume
  * @vol:	ntfs volume for which to obtain free inode count
- * @nr_free:	number of mft records in file system
+ * @nr_free:	number of mft records in filesystem
  * @max_index:	maximum number of pages containing set bits
  *
  * Calculate the number of free mft records (inodes) on the mounted NTFS
@@ -2138,13 +2164,13 @@ static int ntfs_statfs(struct super_block *sb, struct kstatfs *sfs)
 	/* Optimal transfer block size. */
 	sfs->f_bsize  = PAGE_CACHE_SIZE;
 	/*
-	 * Total data blocks in file system in units of f_bsize and since
+	 * Total data blocks in filesystem in units of f_bsize and since
 	 * inodes are also stored in data blocs ($MFT is a file) this is just
 	 * the total clusters.
 	 */
 	sfs->f_blocks = vol->nr_clusters << vol->cluster_size_bits >>
 				PAGE_CACHE_SHIFT;
-	/* Free data blocks in file system in units of f_bsize. */
+	/* Free data blocks in filesystem in units of f_bsize. */
 	size	      = get_nr_free_clusters(vol) << vol->cluster_size_bits >>
 				PAGE_CACHE_SHIFT;
 	if (size < 0LL)
@@ -2163,7 +2189,7 @@ static int ntfs_statfs(struct super_block *sb, struct kstatfs *sfs)
 	max_index = ((((mft_ni->initialized_size >> vol->mft_record_size_bits)
 			+ 7) >> 3) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 	read_unlock_irqrestore(&mft_ni->size_lock, flags);
-	/* Number of inodes in file system (at this point in time). */
+	/* Number of inodes in filesystem (at this point in time). */
 	sfs->f_files = size;
 	/* Free inodes in fs (based on current total count). */
 	sfs->f_ffree = __get_nr_free_mft_records(vol, size, max_index);
@@ -2172,8 +2198,8 @@ static int ntfs_statfs(struct super_block *sb, struct kstatfs *sfs)
 	 * File system id. This is extremely *nix flavour dependent and even
 	 * within Linux itself all fs do their own thing. I interpret this to
 	 * mean a unique id associated with the mounted fs and not the id
-	 * associated with the file system driver, the latter is already given
-	 * by the file system type in sfs->f_type. Thus we use the 64-bit
+	 * associated with the filesystem driver, the latter is already given
+	 * by the filesystem type in sfs->f_type. Thus we use the 64-bit
 	 * volume serial number splitting it into two 32-bit parts. We enter
 	 * the least significant 32-bits in f_fsid[0] and the most significant
 	 * 32-bits in f_fsid[1].
@@ -2259,18 +2285,18 @@ static struct export_operations ntfs_export_ops = {
 };
 
 /**
- * ntfs_fill_super - mount an ntfs files system
- * @sb:		super block of ntfs file system to mount
+ * ntfs_fill_super - mount an ntfs filesystem
+ * @sb:		super block of ntfs filesystem to mount
  * @opt:	string containing the mount options
  * @silent:	silence error output
  *
  * ntfs_fill_super() is called by the VFS to mount the device described by @sb
- * with the mount otions in @data with the NTFS file system.
+ * with the mount otions in @data with the NTFS filesystem.
  *
  * If @silent is true, remain silent even if errors are detected. This is used
- * during bootup, when the kernel tries to mount the root file system with all
- * registered file systems one after the other until one succeeds. This implies
- * that all file systems except the correct one will quite correctly and
+ * during bootup, when the kernel tries to mount the root filesystem with all
+ * registered filesystems one after the other until one succeeds. This implies
+ * that all filesystems except the correct one will quite correctly and
  * expectedly return an error, but nobody wants to see error messages when in
  * fact this is what is supposed to happen.
  *
@@ -2330,6 +2356,9 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
 
 	unlock_kernel();
 
+	/* By default, enable sparse support. */
+	NVolSetSparseEnabled(vol);
+
 	/* Important to get the mount options dealt with now. */
 	if (!parse_options(vol, (char*)opt))
 		goto err_out_now;
@@ -2711,7 +2740,7 @@ static int __init init_ntfs_fs(void)
 		ntfs_debug("NTFS driver registered successfully.");
 		return 0; /* Success! */
 	}
-	printk(KERN_CRIT "NTFS: Failed to register NTFS file system driver!\n");
+	printk(KERN_CRIT "NTFS: Failed to register NTFS filesystem driver!\n");
 
 sysctl_err_out:
 	kmem_cache_destroy(ntfs_big_inode_cache);
@@ -2725,7 +2754,7 @@ actx_err_out:
 	kmem_cache_destroy(ntfs_index_ctx_cache);
 ictx_err_out:
 	if (!err) {
-		printk(KERN_CRIT "NTFS: Aborting NTFS file system driver "
+		printk(KERN_CRIT "NTFS: Aborting NTFS filesystem driver "
 				"registration...\n");
 		err = -ENOMEM;
 	}
@@ -2765,7 +2794,7 @@ static void __exit exit_ntfs_fs(void)
 }
 
 MODULE_AUTHOR("Anton Altaparmakov <aia21@cantab.net>");
-MODULE_DESCRIPTION("NTFS 1.2/3.x driver - Copyright (c) 2001-2004 Anton Altaparmakov");
+MODULE_DESCRIPTION("NTFS 1.2/3.x driver - Copyright (c) 2001-2005 Anton Altaparmakov");
 MODULE_VERSION(NTFS_VERSION);
 MODULE_LICENSE("GPL");
 #ifdef DEBUG
diff --git a/fs/ntfs/sysctl.c b/fs/ntfs/sysctl.c
index 75067e4f3036..1c23138d00b3 100644
--- a/fs/ntfs/sysctl.c
+++ b/fs/ntfs/sysctl.c
@@ -3,7 +3,7 @@
  *	      the Linux-NTFS project. Adapted from the old NTFS driver,
  *	      Copyright (C) 1997 Martin von L�wis, R�gis Duchesne
  *
- * Copyright (c) 2002-2004 Anton Altaparmakov
+ * Copyright (c) 2002-2005 Anton Altaparmakov
  *
  * This program/include file is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as published
@@ -67,7 +67,7 @@ int ntfs_sysctl(int add)
 			return -ENOMEM;
 #ifdef CONFIG_PROC_FS
 		/*
-		 * If the proc file system is in use and we are a module, need
+		 * If the proc filesystem is in use and we are a module, need
 		 * to set the owner of our proc entry to our module. In the
 		 * non-modular case, THIS_MODULE is NULL, so this is ok.
 		 */
diff --git a/fs/ntfs/volume.h b/fs/ntfs/volume.h
index 4b97fa8635a8..62be73ad0156 100644
--- a/fs/ntfs/volume.h
+++ b/fs/ntfs/volume.h
@@ -2,7 +2,7 @@
  * volume.h - Defines for volume structures in NTFS Linux kernel driver. Part
  *	      of the Linux-NTFS project.
  *
- * Copyright (c) 2001-2004 Anton Altaparmakov
+ * Copyright (c) 2001-2005 Anton Altaparmakov
  * Copyright (c) 2002 Richard Russon
  *
  * This program/include file is free software; you can redistribute it and/or
@@ -54,7 +54,7 @@ typedef struct {
 	mode_t dmask;			/* The mask for directory
 					   permissions. */
 	u8 mft_zone_multiplier;		/* Initial mft zone multiplier. */
-	u8 on_errors;			/* What to do on file system errors. */
+	u8 on_errors;			/* What to do on filesystem errors. */
 	/* NTFS bootsector provided information. */
 	u16 sector_size;		/* in bytes */
 	u8 sector_size_bits;		/* log2(sector_size) */
@@ -141,6 +141,7 @@ typedef enum {
 				      file names in WIN32 namespace. */
 	NV_LogFileEmpty,	/* 1: $LogFile journal is empty. */
 	NV_QuotaOutOfDate,	/* 1: $Quota is out of date. */
+	NV_SparseEnabled,	/* 1: May create sparse files. */
 } ntfs_volume_flags;
 
 /*
@@ -167,5 +168,6 @@ NVOL_FNS(ShowSystemFiles)
 NVOL_FNS(CaseSensitive)
 NVOL_FNS(LogFileEmpty)
 NVOL_FNS(QuotaOutOfDate)
+NVOL_FNS(SparseEnabled)
 
 #endif /* _LINUX_NTFS_VOLUME_H */

From 1a0df15acdae065789446aca83021c72b71db9a5 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Thu, 3 Feb 2005 12:04:36 +0000
Subject: [PATCH 015/113] NTFS: Fix a bug in
 fs/ntfs/runlist.c::ntfs_mapping_pairs_decompress() in       the creation of
 the unmapped runlist element for the base attribute       extent.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog |  3 +++
 fs/ntfs/runlist.c | 56 ++++++++++++++++++++++++++++-------------------
 2 files changed, 37 insertions(+), 22 deletions(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 3f78fff45448..b40c334e616f 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -60,6 +60,9 @@ ToDo/Notes:
 	  enable bit which is set appropriately and a per inode sparse disable
 	  bit which is preset on some system file inodes as appropriate.
 	- Enforce that sparse support is disabled on NTFS volumes pre 3.0.
+	- Fix a bug in fs/ntfs/runlist.c::ntfs_mapping_pairs_decompress() in
+	  the creation of the unmapped runlist element for the base attribute
+	  extent.
 
 2.1.22 - Many bug and race fixes and error handling improvements.
 
diff --git a/fs/ntfs/runlist.c b/fs/ntfs/runlist.c
index 8438fb1da219..5244687dfaa8 100644
--- a/fs/ntfs/runlist.c
+++ b/fs/ntfs/runlist.c
@@ -1,7 +1,7 @@
 /**
  * runlist.c - NTFS runlist handling code.  Part of the Linux-NTFS project.
  *
- * Copyright (c) 2001-2004 Anton Altaparmakov
+ * Copyright (c) 2001-2005 Anton Altaparmakov
  * Copyright (c) 2002 Richard Russon
  *
  * This program/include file is free software; you can redistribute it and/or
@@ -59,7 +59,7 @@ static inline void ntfs_rl_mc(runlist_element *dstbase, int dst,
  *
  * As the runlists grow, more memory will be required.  To prevent the
  * kernel having to allocate and reallocate large numbers of small bits of
- * memory, this function returns and entire page of memory.
+ * memory, this function returns an entire page of memory.
  *
  * It is up to the caller to serialize access to the runlist @rl.
  *
@@ -855,30 +855,42 @@ mpa_err:
 	if (!attr->data.non_resident.lowest_vcn) {
 		VCN max_cluster;
 
-		max_cluster = (sle64_to_cpu(
+		max_cluster = ((sle64_to_cpu(
 				attr->data.non_resident.allocated_size) +
 				vol->cluster_size - 1) >>
-				vol->cluster_size_bits;
+				vol->cluster_size_bits) - 1;
 		/*
-		 * If there is a difference between the highest_vcn and the
-		 * highest cluster, the runlist is either corrupt or, more
-		 * likely, there are more extents following this one.
+		 * A highest_vcn of zero means this is a single extent
+		 * attribute so simply terminate the runlist with LCN_ENOENT).
 		 */
-		if (deltaxcn < --max_cluster) {
-			ntfs_debug("More extents to follow; deltaxcn = 0x%llx, "
-					"max_cluster = 0x%llx",
-					(unsigned long long)deltaxcn,
-					(unsigned long long)max_cluster);
-			rl[rlpos].vcn = vcn;
-			vcn += rl[rlpos].length = max_cluster - deltaxcn;
-			rl[rlpos].lcn = LCN_RL_NOT_MAPPED;
-			rlpos++;
-		} else if (unlikely(deltaxcn > max_cluster)) {
-			ntfs_error(vol->sb, "Corrupt attribute. deltaxcn = "
-					"0x%llx, max_cluster = 0x%llx",
-					(unsigned long long)deltaxcn,
-					(unsigned long long)max_cluster);
-			goto mpa_err;
+		if (deltaxcn) {
+			/*
+			 * If there is a difference between the highest_vcn and
+			 * the highest cluster, the runlist is either corrupt
+			 * or, more likely, there are more extents following
+			 * this one.
+			 */
+			if (deltaxcn < max_cluster) {
+				ntfs_debug("More extents to follow; deltaxcn "
+						"= 0x%llx, max_cluster = "
+						"0x%llx",
+						(unsigned long long)deltaxcn,
+						(unsigned long long)
+						max_cluster);
+				rl[rlpos].vcn = vcn;
+				vcn += rl[rlpos].length = max_cluster -
+						deltaxcn;
+				rl[rlpos].lcn = LCN_RL_NOT_MAPPED;
+				rlpos++;
+			} else if (unlikely(deltaxcn > max_cluster)) {
+				ntfs_error(vol->sb, "Corrupt attribute.  "
+						"deltaxcn = 0x%llx, "
+						"max_cluster = 0x%llx",
+						(unsigned long long)deltaxcn,
+						(unsigned long long)
+						max_cluster);
+				goto mpa_err;
+			}
 		}
 		rl[rlpos].lcn = LCN_ENOENT;
 	} else /* Not the base extent. There may be more extents to follow. */

From b6ad6c52fe36ab35d0fe28c064f59de2ba670c2a Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Tue, 15 Feb 2005 10:08:43 +0000
Subject: [PATCH 016/113] NTFS: - Split ntfs_map_runlist() into
 ntfs_map_runlist() and a non-locking 	helper ntfs_map_runlist_nolock() which
 is used by ntfs_map_runlist(). 	This allows us to map runlist
 fragments with the runlist lock already 	held without having to drop
 and reacquire it around the call.  Adapt 	all callers.       - Change
 ntfs_find_vcn() to ntfs_find_vcn_nolock() which takes a locked 
 runlist.  This allows us to find runlist elements with the runlist 	lock
 already held without having to drop and reacquire it around the 	call. 
 Adapt all callers.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog  |   9 ++++
 fs/ntfs/aops.c     |  34 +++++---------
 fs/ntfs/attrib.c   | 107 ++++++++++++++++++++++++++-------------------
 fs/ntfs/attrib.h   |   5 ++-
 fs/ntfs/lcnalloc.c |  39 +++++------------
 fs/ntfs/mft.c      |  38 +++++++++-------
 6 files changed, 119 insertions(+), 113 deletions(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index b40c334e616f..9d42393c16c0 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -63,6 +63,15 @@ ToDo/Notes:
 	- Fix a bug in fs/ntfs/runlist.c::ntfs_mapping_pairs_decompress() in
 	  the creation of the unmapped runlist element for the base attribute
 	  extent.
+	- Split ntfs_map_runlist() into ntfs_map_runlist() and a non-locking
+	  helper ntfs_map_runlist_nolock() which is used by ntfs_map_runlist().
+	  This allows us to map runlist fragments with the runlist lock already
+	  held without having to drop and reacquire it around the call.  Adapt
+	  all callers.
+	- Change ntfs_find_vcn() to ntfs_find_vcn_nolock() which takes a locked
+	  runlist.  This allows us to find runlist elements with the runlist
+	  lock already held without having to drop and reacquire it around the
+	  call.  Adapt all callers.
 
 2.1.22 - Many bug and race fixes and error handling improvements.
 
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 812d53e93354..2b4b8b9e8796 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -2,7 +2,7 @@
  * aops.c - NTFS kernel address space operations and page cache handling.
  *	    Part of the Linux-NTFS project.
  *
- * Copyright (c) 2001-2004 Anton Altaparmakov
+ * Copyright (c) 2001-2005 Anton Altaparmakov
  * Copyright (c) 2002 Richard Russon
  *
  * This program/include file is free software; you can redistribute it and/or
@@ -135,7 +135,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
 					i * rec_size), rec_size);
 		flush_dcache_page(page);
 		kunmap_atomic(addr, KM_BIO_SRC_IRQ);
-		if (likely(!PageError(page) && page_uptodate))
+		if (likely(page_uptodate && !PageError(page)))
 			SetPageUptodate(page);
 	}
 	unlock_page(page);
@@ -347,11 +347,11 @@ handle_zblock:
  */
 static int ntfs_readpage(struct file *file, struct page *page)
 {
-	loff_t i_size;
 	ntfs_inode *ni, *base_ni;
 	u8 *kaddr;
 	ntfs_attr_search_ctx *ctx;
 	MFT_RECORD *mrec;
+	unsigned long flags;
 	u32 attr_len;
 	int err = 0;
 
@@ -389,9 +389,9 @@ static int ntfs_readpage(struct file *file, struct page *page)
 	 * Attribute is resident, implying it is not compressed or encrypted.
 	 * This also means the attribute is smaller than an mft record and
 	 * hence smaller than a page, so can simply zero out any pages with
-	 * index above 0.  We can also do this if the file size is 0.
+	 * index above 0.
 	 */
-	if (unlikely(page->index > 0 || !i_size_read(VFS_I(ni)))) {
+	if (unlikely(page->index > 0)) {
 		kaddr = kmap_atomic(page, KM_USER0);
 		memset(kaddr, 0, PAGE_CACHE_SIZE);
 		flush_dcache_page(page);
@@ -418,9 +418,10 @@ static int ntfs_readpage(struct file *file, struct page *page)
 	if (unlikely(err))
 		goto put_unm_err_out;
 	attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
-	i_size = i_size_read(VFS_I(ni));
-	if (unlikely(attr_len > i_size))
-		attr_len = i_size;
+	read_lock_irqsave(&ni->size_lock, flags);
+	if (unlikely(attr_len > ni->initialized_size))
+		attr_len = ni->initialized_size;
+	read_unlock_irqrestore(&ni->size_lock, flags);
 	kaddr = kmap_atomic(page, KM_USER0);
 	/* Copy the data to the page. */
 	memcpy(kaddr, (u8*)ctx->attr +
@@ -1247,20 +1248,6 @@ static int ntfs_writepage(struct page *page, struct writeback_control *wbc)
 	int err;
 
 	BUG_ON(!PageLocked(page));
-	/*
-	 * If a previous ntfs_truncate() failed, repeat it and abort if it
-	 * fails again.
-	 */
-	if (unlikely(NInoTruncateFailed(ni))) {
-		down_write(&vi->i_alloc_sem);
-		err = ntfs_truncate(vi);
-		up_write(&vi->i_alloc_sem);
-		if (err || NInoTruncateFailed(ni)) {
-			if (!err)
-				err = -EIO;
-			goto err_out;
-		}
-	}
 	i_size = i_size_read(vi);
 	/* Is the page fully outside i_size? (truncate in progress) */
 	if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >>
@@ -1490,13 +1477,12 @@ static int ntfs_prepare_nonresident_write(struct page *page,
 
 	read_lock_irqsave(&ni->size_lock, flags);
 	/*
-	 * The first out of bounds block for the allocated size. No need to
+	 * The first out of bounds block for the allocated size.  No need to
 	 * round up as allocated_size is in multiples of cluster size and the
 	 * minimum cluster size is 512 bytes, which is equal to the smallest
 	 * blocksize.
 	 */
 	ablock = ni->allocated_size >> blocksize_bits;
-
 	i_size = i_size_read(vi);
 	initialized_size = ni->initialized_size;
 	read_unlock_irqrestore(&ni->size_lock, flags);
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index 7d668466dcd7..7a16f7ca76d8 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -1,7 +1,7 @@
 /**
  * attrib.c - NTFS attribute operations.  Part of the Linux-NTFS project.
  *
- * Copyright (c) 2001-2004 Anton Altaparmakov
+ * Copyright (c) 2001-2005 Anton Altaparmakov
  * Copyright (c) 2002 Richard Russon
  *
  * This program/include file is free software; you can redistribute it and/or
@@ -30,7 +30,7 @@
 #include "types.h"
 
 /**
- * ntfs_map_runlist - map (a part of) a runlist of an ntfs inode
+ * ntfs_map_runlist_nolock - map (a part of) a runlist of an ntfs inode
  * @ni:		ntfs inode for which to map (part of) a runlist
  * @vcn:	map runlist part containing this vcn
  *
@@ -38,24 +38,23 @@
  *
  * Return 0 on success and -errno on error.
  *
- * Locking: - The runlist must be unlocked on entry and is unlocked on return.
- *	    - This function takes the lock for writing and modifies the runlist.
+ * Locking: - The runlist must be locked for writing.
+ *	    - This function modifies the runlist.
  */
-int ntfs_map_runlist(ntfs_inode *ni, VCN vcn)
+int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn)
 {
 	ntfs_inode *base_ni;
-	ntfs_attr_search_ctx *ctx;
 	MFT_RECORD *mrec;
+	ntfs_attr_search_ctx *ctx;
+	runlist_element *rl;
 	int err = 0;
 
 	ntfs_debug("Mapping runlist part containing vcn 0x%llx.",
 			(unsigned long long)vcn);
-
 	if (!NInoAttr(ni))
 		base_ni = ni;
 	else
 		base_ni = ni->ext.base_ntfs_ino;
-
 	mrec = map_mft_record(base_ni);
 	if (IS_ERR(mrec))
 		return PTR_ERR(mrec);
@@ -66,15 +65,7 @@ int ntfs_map_runlist(ntfs_inode *ni, VCN vcn)
 	}
 	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
 			CASE_SENSITIVE, vcn, NULL, 0, ctx);
-	if (unlikely(err))
-		goto put_err_out;
-
-	down_write(&ni->runlist.lock);
-	/* Make sure someone else didn't do the work while we were sleeping. */
-	if (likely(ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn) <=
-			LCN_RL_NOT_MAPPED)) {
-		runlist_element *rl;
-
+	if (likely(!err)) {
 		rl = ntfs_mapping_pairs_decompress(ni->vol, ctx->attr,
 				ni->runlist.rl);
 		if (IS_ERR(rl))
@@ -82,9 +73,6 @@ int ntfs_map_runlist(ntfs_inode *ni, VCN vcn)
 		else
 			ni->runlist.rl = rl;
 	}
-	up_write(&ni->runlist.lock);
-
-put_err_out:
 	ntfs_attr_put_search_ctx(ctx);
 err_out:
 	unmap_mft_record(base_ni);
@@ -92,17 +80,45 @@ err_out:
 }
 
 /**
- * ntfs_find_vcn - find a vcn in the runlist described by an ntfs inode
- * @ni:		ntfs inode describing the runlist to search
- * @vcn:	vcn to find
- * @need_write:	if false, lock for reading and if true, lock for writing
+ * ntfs_map_runlist - map (a part of) a runlist of an ntfs inode
+ * @ni:		ntfs inode for which to map (part of) a runlist
+ * @vcn:	map runlist part containing this vcn
+ *
+ * Map the part of a runlist containing the @vcn of the ntfs inode @ni.
+ *
+ * Return 0 on success and -errno on error.
+ *
+ * Locking: - The runlist must be unlocked on entry and is unlocked on return.
+ *	    - This function takes the runlist lock for writing and modifies the
+ *	      runlist.
+ */
+int ntfs_map_runlist(ntfs_inode *ni, VCN vcn)
+{
+	int err = 0;
+
+	down_write(&ni->runlist.lock);
+	/* Make sure someone else didn't do the work while we were sleeping. */
+	if (likely(ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn) <=
+			LCN_RL_NOT_MAPPED))
+		err = ntfs_map_runlist_nolock(ni, vcn);
+	up_write(&ni->runlist.lock);
+	return err;
+}
+
+/**
+ * ntfs_find_vcn_nolock - find a vcn in the runlist described by an ntfs inode
+ * @ni:			ntfs inode describing the runlist to search
+ * @vcn:		vcn to find
+ * @write_locked:	true if the runlist is locked for writing
  *
  * Find the virtual cluster number @vcn in the runlist described by the ntfs
  * inode @ni and return the address of the runlist element containing the @vcn.
- * The runlist is left locked and the caller has to unlock it.  If @need_write
- * is true, the runlist is locked for writing and if @need_write is false, the
- * runlist is locked for reading.  In the error case, the runlist is not left
- * locked.
+ * The runlist is left locked and the caller has to unlock it.  In the error
+ * case, the runlist is left in the same locking state as on entry.
+ *
+ * Note if @write_locked is FALSE the lock may be dropped inside the function
+ * so you cannot rely on the runlist still being the same when this function
+ * returns.
  *
  * Note you need to distinguish between the lcn of the returned runlist element
  * being >= 0 and LCN_HOLE.  In the later case you have to return zeroes on
@@ -124,28 +140,24 @@ err_out:
  *	      true, it is locked for writing.  Otherwise is is locked for
  *	      reading.
  */
-runlist_element *ntfs_find_vcn(ntfs_inode *ni, const VCN vcn,
-		const BOOL need_write)
+runlist_element *ntfs_find_vcn_nolock(ntfs_inode *ni, const VCN vcn,
+		const BOOL write_locked)
 {
 	runlist_element *rl;
 	int err = 0;
 	BOOL is_retry = FALSE;
 
-	ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, lock for %sing.",
+	ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, %s_locked.",
 			ni->mft_no, (unsigned long long)vcn,
-			!need_write ? "read" : "writ");
+			write_locked ? "write" : "read");
 	BUG_ON(!ni);
 	BUG_ON(!NInoNonResident(ni));
 	BUG_ON(vcn < 0);
-lock_retry_remap:
-	if (!need_write)
-		down_read(&ni->runlist.lock);
-	else
-		down_write(&ni->runlist.lock);
+retry_remap:
 	rl = ni->runlist.rl;
 	if (likely(rl && vcn >= rl[0].vcn)) {
 		while (likely(rl->length)) {
-			if (likely(vcn < rl[1].vcn)) {
+			if (unlikely(vcn < rl[1].vcn)) {
 				if (likely(rl->lcn >= LCN_HOLE)) {
 					ntfs_debug("Done.");
 					return rl;
@@ -161,19 +173,23 @@ lock_retry_remap:
 				err = -EIO;
 		}
 	}
-	if (!need_write)
-		up_read(&ni->runlist.lock);
-	else
-		up_write(&ni->runlist.lock);
 	if (!err && !is_retry) {
 		/*
 		 * The @vcn is in an unmapped region, map the runlist and
 		 * retry.
 		 */
-		err = ntfs_map_runlist(ni, vcn);
+		if (!write_locked) {
+			up_read(&ni->runlist.lock);
+			down_write(&ni->runlist.lock);
+		}
+		err = ntfs_map_runlist_nolock(ni, vcn);
+		if (!write_locked) {
+			up_write(&ni->runlist.lock);
+			down_read(&ni->runlist.lock);
+		}
 		if (likely(!err)) {
 			is_retry = TRUE;
-			goto lock_retry_remap;
+			goto retry_remap;
 		}
 		/*
 		 * -EINVAL and -ENOENT coming from a failed mapping attempt are
@@ -184,7 +200,8 @@ lock_retry_remap:
 			err = -EIO;
 	} else if (!err)
 		err = -EIO;
-	ntfs_error(ni->vol->sb, "Failed with error code %i.", err);
+	if (err != -ENOENT)
+		ntfs_error(ni->vol->sb, "Failed with error code %i.", err);
 	return ERR_PTR(err);
 }
 
diff --git a/fs/ntfs/attrib.h b/fs/ntfs/attrib.h
index e0c2c6c81bc0..3eb451657025 100644
--- a/fs/ntfs/attrib.h
+++ b/fs/ntfs/attrib.h
@@ -60,10 +60,11 @@ typedef struct {
 	ATTR_RECORD *base_attr;
 } ntfs_attr_search_ctx;
 
+extern int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn);
 extern int ntfs_map_runlist(ntfs_inode *ni, VCN vcn);
 
-extern runlist_element *ntfs_find_vcn(ntfs_inode *ni, const VCN vcn,
-		const BOOL need_write);
+extern runlist_element *ntfs_find_vcn_nolock(ntfs_inode *ni, const VCN vcn,
+		const BOOL write_locked);
 
 int ntfs_attr_lookup(const ATTR_TYPE type, const ntfschar *name,
 		const u32 name_len, const IGNORE_CASE_BOOL ic,
diff --git a/fs/ntfs/lcnalloc.c b/fs/ntfs/lcnalloc.c
index 5346596fa871..8db4492b139c 100644
--- a/fs/ntfs/lcnalloc.c
+++ b/fs/ntfs/lcnalloc.c
@@ -849,7 +849,8 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
 	total_freed = real_freed = 0;
 
 	/* This returns with ni->runlist locked for reading on success. */
-	rl = ntfs_find_vcn(ni, start_vcn, FALSE);
+	down_read(&ni->runlist.lock);
+	rl = ntfs_find_vcn_nolock(ni, start_vcn, FALSE);
 	if (IS_ERR(rl)) {
 		if (!is_rollback)
 			ntfs_error(vol->sb, "Failed to find first runlist "
@@ -863,7 +864,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
 			ntfs_error(vol->sb, "First runlist element has "
 					"invalid lcn, aborting.");
 		err = -EIO;
-		goto unl_err_out;
+		goto err_out;
 	}
 	/* Find the starting cluster inside the run that needs freeing. */
 	delta = start_vcn - rl->vcn;
@@ -881,7 +882,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
 			if (!is_rollback)
 				ntfs_error(vol->sb, "Failed to clear first run "
 						"(error %i), aborting.", err);
-			goto unl_err_out;
+			goto err_out;
 		}
 		/* We have freed @to_free real clusters. */
 		real_freed = to_free;
@@ -901,30 +902,15 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
 		if (unlikely(rl->lcn < LCN_HOLE)) {
 			VCN vcn;
 
-			/*
-			 * Attempt to map runlist, dropping runlist lock for
-			 * the duration.
-			 */
+			/* Attempt to map runlist. */
 			vcn = rl->vcn;
-			up_read(&ni->runlist.lock);
-			err = ntfs_map_runlist(ni, vcn);
-			if (err) {
-				if (!is_rollback)
-					ntfs_error(vol->sb, "Failed to map "
-							"runlist fragment.");
-				if (err == -EINVAL || err == -ENOENT)
-					err = -EIO;
-				goto err_out;
-			}
-			/*
-			 * This returns with ni->runlist locked for reading on
-			 * success.
-			 */
-			rl = ntfs_find_vcn(ni, vcn, FALSE);
+			rl = ntfs_find_vcn_nolock(ni, vcn, FALSE);
 			if (IS_ERR(rl)) {
 				err = PTR_ERR(rl);
 				if (!is_rollback)
-					ntfs_error(vol->sb, "Failed to find "
+					ntfs_error(vol->sb, "Failed to map "
+							"runlist fragment or "
+							"failed to find "
 							"subsequent runlist "
 							"element.");
 				goto err_out;
@@ -937,7 +923,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
 							(unsigned long long)
 							rl->lcn);
 				err = -EIO;
-				goto unl_err_out;
+				goto err_out;
 			}
 		}
 		/* The number of clusters in this run that need freeing. */
@@ -953,7 +939,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
 				if (!is_rollback)
 					ntfs_error(vol->sb, "Failed to clear "
 							"subsequent run.");
-				goto unl_err_out;
+				goto err_out;
 			}
 			/* We have freed @to_free real clusters. */
 			real_freed += to_free;
@@ -974,9 +960,8 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
 	/* We are done.  Return the number of actually freed clusters. */
 	ntfs_debug("Done.");
 	return real_freed;
-unl_err_out:
-	up_read(&ni->runlist.lock);
 err_out:
+	up_read(&ni->runlist.lock);
 	if (is_rollback)
 		return err;
 	/* If no real clusters were freed, no need to rollback. */
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 4e0bf39426cf..0975d738834c 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -1,7 +1,7 @@
 /**
  * mft.c - NTFS kernel mft record operations. Part of the Linux-NTFS project.
  *
- * Copyright (c) 2001-2004 Anton Altaparmakov
+ * Copyright (c) 2001-2005 Anton Altaparmakov
  * Copyright (c) 2002 Richard Russon
  *
  * This program/include file is free software; you can redistribute it and/or
@@ -287,7 +287,7 @@ MFT_RECORD *map_extent_mft_record(ntfs_inode *base_ni, MFT_REF mref,
 			}
 			unmap_mft_record(ni);
 			ntfs_error(base_ni->vol->sb, "Found stale extent mft "
-					"reference! Corrupt file system. "
+					"reference! Corrupt filesystem. "
 					"Run chkdsk.");
 			return ERR_PTR(-EIO);
 		}
@@ -318,7 +318,7 @@ map_err_out:
 	/* Verify the sequence number if it is present. */
 	if (seq_no && (le16_to_cpu(m->sequence_number) != seq_no)) {
 		ntfs_error(base_ni->vol->sb, "Found stale extent mft "
-				"reference! Corrupt file system. Run chkdsk.");
+				"reference! Corrupt filesystem. Run chkdsk.");
 		destroy_ni = TRUE;
 		m = ERR_PTR(-EIO);
 		goto unm_err_out;
@@ -1292,19 +1292,20 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
 	/*
 	 * Determine the last lcn of the mft bitmap.  The allocated size of the
 	 * mft bitmap cannot be zero so we are ok to do this.
-	 * ntfs_find_vcn() returns the runlist locked on success.
 	 */
+	down_write(&mftbmp_ni->runlist.lock);
 	read_lock_irqsave(&mftbmp_ni->size_lock, flags);
 	ll = mftbmp_ni->allocated_size;
 	read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
-	rl = ntfs_find_vcn(mftbmp_ni, (ll - 1) >> vol->cluster_size_bits, TRUE);
+	rl = ntfs_find_vcn_nolock(mftbmp_ni,
+			(ll - 1) >> vol->cluster_size_bits, TRUE);
 	if (unlikely(IS_ERR(rl) || !rl->length || rl->lcn < 0)) {
+		up_write(&mftbmp_ni->runlist.lock);
 		ntfs_error(vol->sb, "Failed to determine last allocated "
 				"cluster of mft bitmap attribute.");
-		if (!IS_ERR(rl)) {
-			up_write(&mftbmp_ni->runlist.lock);
+		if (!IS_ERR(rl))
 			ret = -EIO;
-		} else
+		else
 			ret = PTR_ERR(rl);
 		return ret;
 	}
@@ -1428,6 +1429,8 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
 		// TODO: Deal with this by moving this extent to a new mft
 		// record or by starting a new extent in a new mft record or by
 		// moving other attributes out of this mft record.
+		// Note: It will need to be a special mft record and if none of
+		// those are available it gets rather complicated...
 		ntfs_error(vol->sb, "Not enough space in this mft record to "
 				"accomodate extended mft bitmap attribute "
 				"extent.  Cannot handle this yet.");
@@ -1719,19 +1722,20 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
 	 * Determine the preferred allocation location, i.e. the last lcn of
 	 * the mft data attribute.  The allocated size of the mft data
 	 * attribute cannot be zero so we are ok to do this.
-	 * ntfs_find_vcn() returns the runlist locked on success.
 	 */
+	down_write(&mft_ni->runlist.lock);
 	read_lock_irqsave(&mft_ni->size_lock, flags);
 	ll = mft_ni->allocated_size;
 	read_unlock_irqrestore(&mft_ni->size_lock, flags);
-	rl = ntfs_find_vcn(mft_ni, (ll - 1) >> vol->cluster_size_bits, TRUE);
+	rl = ntfs_find_vcn_nolock(mft_ni, (ll - 1) >> vol->cluster_size_bits,
+			TRUE);
 	if (unlikely(IS_ERR(rl) || !rl->length || rl->lcn < 0)) {
+		up_write(&mft_ni->runlist.lock);
 		ntfs_error(vol->sb, "Failed to determine last allocated "
 				"cluster of mft data attribute.");
-		if (!IS_ERR(rl)) {
-			up_write(&mft_ni->runlist.lock);
+		if (!IS_ERR(rl))
 			ret = -EIO;
-		} else
+		else
 			ret = PTR_ERR(rl);
 		return ret;
 	}
@@ -1858,7 +1862,11 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
 		// moving other attributes out of this mft record.
 		// Note: Use the special reserved mft records and ensure that
 		// this extent is not required to find the mft record in
-		// question.
+		// question.  If no free special records left we would need to
+		// move an existing record away, insert ours in its place, and
+		// then place the moved record into the newly allocated space
+		// and we would then need to update all references to this mft
+		// record appropriately.  This is rather complicated...
 		ntfs_error(vol->sb, "Not enough space in this mft record to "
 				"accomodate extended mft data attribute "
 				"extent.  Cannot handle this yet.");
@@ -2021,7 +2029,7 @@ static int ntfs_mft_record_layout(const ntfs_volume *vol, const s64 mft_no,
 				"reports this as corruption, please email "
 				"linux-ntfs-dev@lists.sourceforge.net stating "
 				"that you saw this message and that the "
-				"modified file system created was corrupt.  "
+				"modified filesystem created was corrupt.  "
 				"Thank you.");
 	}
 	/* Set the update sequence number to 1. */

From d8ec785e0bf2941ed546711c2f240a3e030c39c7 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Fri, 18 Feb 2005 09:23:39 +0000
Subject: [PATCH 017/113] NTFS: Change time to u64 in time.h::ntfs2utc() as it
 otherwise generates a       warning in the do_div() call on sparc32.  Thanks
 to Meelis Roos for the       report and analysis of the warning.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog | 3 +++
 fs/ntfs/time.h    | 4 ++--
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 9d42393c16c0..0ef1cd329096 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -72,6 +72,9 @@ ToDo/Notes:
 	  runlist.  This allows us to find runlist elements with the runlist
 	  lock already held without having to drop and reacquire it around the
 	  call.  Adapt all callers.
+	- Change time to u64 time.h::ntfs2utc() as it otherwise generates a
+	  warning in the do_div() call on sparc32.  Thanks to Meelis Roos for
+	  the report and analysis of the warning.
 
 2.1.22 - Many bug and race fixes and error handling improvements.
 
diff --git a/fs/ntfs/time.h b/fs/ntfs/time.h
index a09a51dabe4e..01233989d5d1 100644
--- a/fs/ntfs/time.h
+++ b/fs/ntfs/time.h
@@ -1,7 +1,7 @@
 /*
  * time.h - NTFS time conversion functions.  Part of the Linux-NTFS project.
  *
- * Copyright (c) 2001-2004 Anton Altaparmakov
+ * Copyright (c) 2001-2005 Anton Altaparmakov
  *
  * This program/include file is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as published
@@ -87,7 +87,7 @@ static inline struct timespec ntfs2utc(const sle64 time)
 	struct timespec ts;
 
 	/* Subtract the NTFS time offset. */
-	s64 t = sle64_to_cpu(time) - NTFS_TIME_OFFSET;
+	u64 t = (u64)(sle64_to_cpu(time) - NTFS_TIME_OFFSET);
 	/*
 	 * Convert the time to 1-second intervals and the remainder to
 	 * 1-nano-second intervals.

From 37e4c13b987a7923ec13bda7368901b3e094fecb Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Fri, 18 Feb 2005 10:03:13 +0000
Subject: [PATCH 018/113] NTFS: Fix a nasty runlist merge bug when merging two
 holes.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog | 3 ++-
 fs/ntfs/runlist.c | 5 ++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 0ef1cd329096..53bb6c1404f2 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -72,9 +72,10 @@ ToDo/Notes:
 	  runlist.  This allows us to find runlist elements with the runlist
 	  lock already held without having to drop and reacquire it around the
 	  call.  Adapt all callers.
-	- Change time to u64 time.h::ntfs2utc() as it otherwise generates a
+	- Change time to u64 in time.h::ntfs2utc() as it otherwise generates a
 	  warning in the do_div() call on sparc32.  Thanks to Meelis Roos for
 	  the report and analysis of the warning.
+	- Fix a nasty runlist merge bug when merging two holes.
 
 2.1.22 - Many bug and race fixes and error handling improvements.
 
diff --git a/fs/ntfs/runlist.c b/fs/ntfs/runlist.c
index 5244687dfaa8..1b344dd4d407 100644
--- a/fs/ntfs/runlist.c
+++ b/fs/ntfs/runlist.c
@@ -113,8 +113,11 @@ static inline BOOL ntfs_are_rl_mergeable(runlist_element *dst,
 	BUG_ON(!dst);
 	BUG_ON(!src);
 
-	if ((dst->lcn < 0) || (src->lcn < 0))     /* Are we merging holes? */
+	if ((dst->lcn < 0) || (src->lcn < 0)) {   /* Are we merging holes? */
+		if (dst->lcn == LCN_HOLE && src->lcn == LCN_HOLE)
+			return TRUE;
 		return FALSE;
+	}
 	if ((dst->lcn + dst->length) != src->lcn) /* Are the runs contiguous? */
 		return FALSE;
 	if ((dst->vcn + dst->length) != src->vcn) /* Are the runs misaligned? */

From 5ae9fcf8f329baba4bada8719cb0337eef083a1a Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Wed, 2 Mar 2005 17:03:24 +0000
Subject: [PATCH 019/113] NTFS: - Set the ntfs_inode->allocated_size to the
 real allocated size in the         mft record for resident attributes
 (fs/ntfs/inode.c).       - Small readability cleanup to use "a" instead of
 "ctx->attr"         everywhere (fs/ntfs/inode.c).

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog |   4 +
 fs/ntfs/inode.c   | 344 +++++++++++++++++++++++-----------------------
 2 files changed, 174 insertions(+), 174 deletions(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 53bb6c1404f2..ad2c4e88f053 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -76,6 +76,10 @@ ToDo/Notes:
 	  warning in the do_div() call on sparc32.  Thanks to Meelis Roos for
 	  the report and analysis of the warning.
 	- Fix a nasty runlist merge bug when merging two holes.
+	- Set the ntfs_inode->allocated_size to the real allocated size in the
+	  mft record for resident attributes (fs/ntfs/inode.c).
+	- Small readability cleanup to use "a" instead of "ctx->attr"
+	  everywhere (fs/ntfs/inode.c).
 
 2.1.22 - Many bug and race fixes and error handling improvements.
 
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 372c7fcdc068..cfca17384115 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -525,6 +525,7 @@ static int ntfs_read_locked_inode(struct inode *vi)
 	ntfs_volume *vol = NTFS_SB(vi->i_sb);
 	ntfs_inode *ni;
 	MFT_RECORD *m;
+	ATTR_RECORD *a;
 	STANDARD_INFORMATION *si;
 	ntfs_attr_search_ctx *ctx;
 	int err = 0;
@@ -633,9 +634,10 @@ static int ntfs_read_locked_inode(struct inode *vi)
 		}
 		goto unm_err_out;
 	}
+	a = ctx->attr;
 	/* Get the standard information attribute value. */
-	si = (STANDARD_INFORMATION*)((char*)ctx->attr +
-			le16_to_cpu(ctx->attr->data.resident.value_offset));
+	si = (STANDARD_INFORMATION*)((u8*)a +
+			le16_to_cpu(a->data.resident.value_offset));
 
 	/* Transfer information from the standard information into vi. */
 	/*
@@ -674,15 +676,16 @@ static int ntfs_read_locked_inode(struct inode *vi)
 			goto skip_attr_list_load;
 		ntfs_debug("Attribute list found in inode 0x%lx.", vi->i_ino);
 		NInoSetAttrList(ni);
-		if (ctx->attr->flags & ATTR_IS_ENCRYPTED ||
-				ctx->attr->flags & ATTR_COMPRESSION_MASK ||
-				ctx->attr->flags & ATTR_IS_SPARSE) {
+		a = ctx->attr;
+		if (a->flags & ATTR_IS_ENCRYPTED ||
+				a->flags & ATTR_COMPRESSION_MASK ||
+				a->flags & ATTR_IS_SPARSE) {
 			ntfs_error(vi->i_sb, "Attribute list attribute is "
 					"compressed/encrypted/sparse.");
 			goto unm_err_out;
 		}
 		/* Now allocate memory for the attribute list. */
-		ni->attr_list_size = (u32)ntfs_attr_size(ctx->attr);
+		ni->attr_list_size = (u32)ntfs_attr_size(a);
 		ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size);
 		if (!ni->attr_list) {
 			ntfs_error(vi->i_sb, "Not enough memory to allocate "
@@ -690,9 +693,9 @@ static int ntfs_read_locked_inode(struct inode *vi)
 			err = -ENOMEM;
 			goto unm_err_out;
 		}
-		if (ctx->attr->non_resident) {
+		if (a->non_resident) {
 			NInoSetAttrListNonResident(ni);
-			if (ctx->attr->data.non_resident.lowest_vcn) {
+			if (a->data.non_resident.lowest_vcn) {
 				ntfs_error(vi->i_sb, "Attribute list has non "
 						"zero lowest_vcn.");
 				goto unm_err_out;
@@ -702,7 +705,7 @@ static int ntfs_read_locked_inode(struct inode *vi)
 			 * exclusive access to the inode at this time.
 			 */
 			ni->attr_list_rl.rl = ntfs_mapping_pairs_decompress(vol,
-					ctx->attr, NULL);
+					a, NULL);
 			if (IS_ERR(ni->attr_list_rl.rl)) {
 				err = PTR_ERR(ni->attr_list_rl.rl);
 				ni->attr_list_rl.rl = NULL;
@@ -713,27 +716,26 @@ static int ntfs_read_locked_inode(struct inode *vi)
 			/* Now load the attribute list. */
 			if ((err = load_attribute_list(vol, &ni->attr_list_rl,
 					ni->attr_list, ni->attr_list_size,
-					sle64_to_cpu(ctx->attr->data.
-					non_resident.initialized_size)))) {
+					sle64_to_cpu(a->data.non_resident.
+					initialized_size)))) {
 				ntfs_error(vi->i_sb, "Failed to load "
 						"attribute list attribute.");
 				goto unm_err_out;
 			}
-		} else /* if (!ctx.attr->non_resident) */ {
-			if ((u8*)ctx->attr + le16_to_cpu(
-					ctx->attr->data.resident.value_offset) +
-					le32_to_cpu(
-					ctx->attr->data.resident.value_length) >
+		} else /* if (!a->non_resident) */ {
+			if ((u8*)a + le16_to_cpu(a->data.resident.value_offset)
+					+ le32_to_cpu(
+					a->data.resident.value_length) >
 					(u8*)ctx->mrec + vol->mft_record_size) {
 				ntfs_error(vi->i_sb, "Corrupt attribute list "
 						"in inode.");
 				goto unm_err_out;
 			}
 			/* Now copy the attribute list. */
-			memcpy(ni->attr_list, (u8*)ctx->attr + le16_to_cpu(
-					ctx->attr->data.resident.value_offset),
+			memcpy(ni->attr_list, (u8*)a + le16_to_cpu(
+					a->data.resident.value_offset),
 					le32_to_cpu(
-					ctx->attr->data.resident.value_length));
+					a->data.resident.value_length));
 		}
 	}
 skip_attr_list_load:
@@ -746,7 +748,7 @@ skip_attr_list_load:
 		struct inode *bvi;
 		ntfs_inode *bni;
 		INDEX_ROOT *ir;
-		char *ir_end, *index_end;
+		u8 *ir_end, *index_end;
 
 		/* It is a directory, find index root attribute. */
 		ntfs_attr_reinit_search_ctx(ctx);
@@ -762,17 +764,16 @@ skip_attr_list_load:
 			}
 			goto unm_err_out;
 		}
+		a = ctx->attr;
 		/* Set up the state. */
-		if (unlikely(ctx->attr->non_resident)) {
+		if (unlikely(a->non_resident)) {
 			ntfs_error(vol->sb, "$INDEX_ROOT attribute is not "
 					"resident.");
 			goto unm_err_out;
 		}
 		/* Ensure the attribute name is placed before the value. */
-		if (unlikely(ctx->attr->name_length &&
-				(le16_to_cpu(ctx->attr->name_offset) >=
-				le16_to_cpu(ctx->attr->data.resident.
-				value_offset)))) {
+		if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
+				le16_to_cpu(a->data.resident.value_offset)))) {
 			ntfs_error(vol->sb, "$INDEX_ROOT attribute name is "
 					"placed after the attribute value.");
 			goto unm_err_out;
@@ -783,28 +784,27 @@ skip_attr_list_load:
 		 * encrypted. However index root cannot be both compressed and
 		 * encrypted.
 		 */
-		if (ctx->attr->flags & ATTR_COMPRESSION_MASK)
+		if (a->flags & ATTR_COMPRESSION_MASK)
 			NInoSetCompressed(ni);
-		if (ctx->attr->flags & ATTR_IS_ENCRYPTED) {
-			if (ctx->attr->flags & ATTR_COMPRESSION_MASK) {
+		if (a->flags & ATTR_IS_ENCRYPTED) {
+			if (a->flags & ATTR_COMPRESSION_MASK) {
 				ntfs_error(vi->i_sb, "Found encrypted and "
 						"compressed attribute.");
 				goto unm_err_out;
 			}
 			NInoSetEncrypted(ni);
 		}
-		if (ctx->attr->flags & ATTR_IS_SPARSE)
+		if (a->flags & ATTR_IS_SPARSE)
 			NInoSetSparse(ni);
-		ir = (INDEX_ROOT*)((char*)ctx->attr + le16_to_cpu(
-				ctx->attr->data.resident.value_offset));
-		ir_end = (char*)ir + le32_to_cpu(
-				ctx->attr->data.resident.value_length);
-		if (ir_end > (char*)ctx->mrec + vol->mft_record_size) {
+		ir = (INDEX_ROOT*)((u8*)a +
+				le16_to_cpu(a->data.resident.value_offset));
+		ir_end = (u8*)ir + le32_to_cpu(a->data.resident.value_length);
+		if (ir_end > (u8*)ctx->mrec + vol->mft_record_size) {
 			ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is "
 					"corrupt.");
 			goto unm_err_out;
 		}
-		index_end = (char*)&ir->index +
+		index_end = (u8*)&ir->index +
 				le32_to_cpu(ir->index.index_length);
 		if (index_end > ir_end) {
 			ntfs_error(vi->i_sb, "Directory index is corrupt.");
@@ -891,7 +891,8 @@ skip_attr_list_load:
 						"attribute.");
 			goto unm_err_out;
 		}
-		if (!ctx->attr->non_resident) {
+		a = ctx->attr;
+		if (!a->non_resident) {
 			ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute "
 					"is resident.");
 			goto unm_err_out;
@@ -900,42 +901,40 @@ skip_attr_list_load:
 		 * Ensure the attribute name is placed before the mapping pairs
 		 * array.
 		 */
-		if (unlikely(ctx->attr->name_length &&
-				(le16_to_cpu(ctx->attr->name_offset) >=
-				le16_to_cpu(ctx->attr->data.non_resident.
-				mapping_pairs_offset)))) {
+		if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
+				le16_to_cpu(
+				a->data.non_resident.mapping_pairs_offset)))) {
 			ntfs_error(vol->sb, "$INDEX_ALLOCATION attribute name "
 					"is placed after the mapping pairs "
 					"array.");
 			goto unm_err_out;
 		}
-		if (ctx->attr->flags & ATTR_IS_ENCRYPTED) {
+		if (a->flags & ATTR_IS_ENCRYPTED) {
 			ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute "
 					"is encrypted.");
 			goto unm_err_out;
 		}
-		if (ctx->attr->flags & ATTR_IS_SPARSE) {
+		if (a->flags & ATTR_IS_SPARSE) {
 			ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute "
 					"is sparse.");
 			goto unm_err_out;
 		}
-		if (ctx->attr->flags & ATTR_COMPRESSION_MASK) {
+		if (a->flags & ATTR_COMPRESSION_MASK) {
 			ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute "
 					"is compressed.");
 			goto unm_err_out;
 		}
-		if (ctx->attr->data.non_resident.lowest_vcn) {
+		if (a->data.non_resident.lowest_vcn) {
 			ntfs_error(vi->i_sb, "First extent of "
 					"$INDEX_ALLOCATION attribute has non "
 					"zero lowest_vcn.");
 			goto unm_err_out;
 		}
-		vi->i_size = sle64_to_cpu(
-				ctx->attr->data.non_resident.data_size);
+		vi->i_size = sle64_to_cpu(a->data.non_resident.data_size);
 		ni->initialized_size = sle64_to_cpu(
-				ctx->attr->data.non_resident.initialized_size);
+				a->data.non_resident.initialized_size);
 		ni->allocated_size = sle64_to_cpu(
-				ctx->attr->data.non_resident.allocated_size);
+				a->data.non_resident.allocated_size);
 		/*
 		 * We are done with the mft record, so we release it. Otherwise
 		 * we would deadlock in ntfs_attr_iget().
@@ -1013,10 +1012,11 @@ skip_large_dir_stuff:
 			ntfs_error(vi->i_sb, "$DATA attribute is missing.");
 			goto unm_err_out;
 		}
+		a = ctx->attr;
 		/* Setup the state. */
-		if (ctx->attr->non_resident) {
+		if (a->non_resident) {
 			NInoSetNonResident(ni);
-			if (ctx->attr->flags & ATTR_COMPRESSION_MASK) {
+			if (a->flags & ATTR_COMPRESSION_MASK) {
 				NInoSetCompressed(ni);
 				if (vol->cluster_size > 4096) {
 					ntfs_error(vi->i_sb, "Found "
@@ -1026,7 +1026,7 @@ skip_large_dir_stuff:
 						vol->cluster_size);
 					goto unm_err_out;
 				}
-				if ((ctx->attr->flags & ATTR_COMPRESSION_MASK)
+				if ((a->flags & ATTR_COMPRESSION_MASK)
 						!= ATTR_IS_COMPRESSED) {
 					ntfs_error(vi->i_sb, "Found "
 						"unknown compression method or "
@@ -1034,37 +1034,37 @@ skip_large_dir_stuff:
 					goto unm_err_out;
 				}
 				ni->itype.compressed.block_clusters = 1U <<
-						ctx->attr->data.non_resident.
+						a->data.non_resident.
 						compression_unit;
-				if (ctx->attr->data.non_resident.
-						compression_unit != 4) {
+				if (a->data.non_resident.compression_unit !=
+						4) {
 					ntfs_error(vi->i_sb, "Found "
 						"nonstandard compression unit "
 						"(%u instead of 4).  Cannot "
 						"handle this.",
-						ctx->attr->data.non_resident.
+						a->data.non_resident.
 						compression_unit);
 					err = -EOPNOTSUPP;
 					goto unm_err_out;
 				}
 				ni->itype.compressed.block_size = 1U << (
-						ctx->attr->data.non_resident.
+						a->data.non_resident.
 						compression_unit +
 						vol->cluster_size_bits);
 				ni->itype.compressed.block_size_bits = ffs(
 					ni->itype.compressed.block_size) - 1;
 			}
-			if (ctx->attr->flags & ATTR_IS_ENCRYPTED) {
-				if (ctx->attr->flags & ATTR_COMPRESSION_MASK) {
+			if (a->flags & ATTR_IS_ENCRYPTED) {
+				if (a->flags & ATTR_COMPRESSION_MASK) {
 					ntfs_error(vi->i_sb, "Found encrypted "
 							"and compressed data.");
 					goto unm_err_out;
 				}
 				NInoSetEncrypted(ni);
 			}
-			if (ctx->attr->flags & ATTR_IS_SPARSE)
+			if (a->flags & ATTR_IS_SPARSE)
 				NInoSetSparse(ni);
-			if (ctx->attr->data.non_resident.lowest_vcn) {
+			if (a->data.non_resident.lowest_vcn) {
 				ntfs_error(vi->i_sb, "First extent of $DATA "
 						"attribute has non zero "
 						"lowest_vcn.");
@@ -1072,28 +1072,28 @@ skip_large_dir_stuff:
 			}
 			/* Setup all the sizes. */
 			vi->i_size = sle64_to_cpu(
-					ctx->attr->data.non_resident.data_size);
+					a->data.non_resident.data_size);
 			ni->initialized_size = sle64_to_cpu(
-					ctx->attr->data.non_resident.
-					initialized_size);
+					a->data.non_resident.initialized_size);
 			ni->allocated_size = sle64_to_cpu(
-					ctx->attr->data.non_resident.
-					allocated_size);
-			if (NInoCompressed(ni)) {
+					a->data.non_resident.allocated_size);
+			if (NInoCompressed(ni))
 				ni->itype.compressed.size = sle64_to_cpu(
-						ctx->attr->data.non_resident.
+						a->data.non_resident.
 						compressed_size);
-			}
 		} else { /* Resident attribute. */
-			/*
-			 * Make all sizes equal for simplicity in read code
-			 * paths. FIXME: Need to keep this in mind when
-			 * converting to non-resident attribute in write code
-			 * path. (Probably only affects truncate().)
-			 */
-			vi->i_size = ni->initialized_size = ni->allocated_size =
-					le32_to_cpu(
-					ctx->attr->data.resident.value_length);
+			/* Setup all the sizes. */
+			vi->i_size = ni->initialized_size = le32_to_cpu(
+					a->data.resident.value_length);
+			ni->allocated_size = le32_to_cpu(a->length) -
+					le16_to_cpu(
+					a->data.resident.value_offset);
+			if (vi->i_size > ni->allocated_size) {
+				ntfs_error(vi->i_sb, "Resident data attribute "
+						"is corrupt (size exceeds "
+						"allocation).");
+				goto unm_err_out;
+			}
 		}
 no_data_attr_special_case:
 		/* We are done with the mft record, so we release it. */
@@ -1169,6 +1169,7 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
 	ntfs_volume *vol = NTFS_SB(vi->i_sb);
 	ntfs_inode *ni, *base_ni;
 	MFT_RECORD *m;
+	ATTR_RECORD *a;
 	ntfs_attr_search_ctx *ctx;
 	int err = 0;
 
@@ -1203,24 +1204,21 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
 		err = -ENOMEM;
 		goto unm_err_out;
 	}
-
 	/* Find the attribute. */
 	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
 			CASE_SENSITIVE, 0, NULL, 0, ctx);
 	if (unlikely(err))
 		goto unm_err_out;
-
-	if (!ctx->attr->non_resident) {
+	a = ctx->attr;
+	if (!a->non_resident) {
 		/* Ensure the attribute name is placed before the value. */
-		if (unlikely(ctx->attr->name_length &&
-				(le16_to_cpu(ctx->attr->name_offset) >=
-				le16_to_cpu(ctx->attr->data.resident.
-				value_offset)))) {
+		if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
+				le16_to_cpu(a->data.resident.value_offset)))) {
 			ntfs_error(vol->sb, "Attribute name is placed after "
 					"the attribute value.");
 			goto unm_err_out;
 		}
-		if (NInoMstProtected(ni) || ctx->attr->flags) {
+		if (NInoMstProtected(ni) || a->flags) {
 			ntfs_error(vi->i_sb, "Found mst protected attribute "
 					"or attribute with non-zero flags but "
 					"the attribute is resident.  Please "
@@ -1228,27 +1226,30 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
 					"linux-ntfs-dev@lists.sourceforge.net");
 			goto unm_err_out;
 		}
-		/*
-		 * Resident attribute. Make all sizes equal for simplicity in
-		 * read code paths.
-		 */
-		vi->i_size = ni->initialized_size = ni->allocated_size =
-			le32_to_cpu(ctx->attr->data.resident.value_length);
+		/* Resident attribute.  Setup all the sizes. */
+		vi->i_size = ni->initialized_size = le32_to_cpu(
+				a->data.resident.value_length);
+		ni->allocated_size = le32_to_cpu(a->length) -
+				le16_to_cpu(a->data.resident.value_offset);
+		if (vi->i_size > ni->allocated_size) {
+			ntfs_error(vi->i_sb, "Resident data attribute is "
+					"corrupt (size exceeds allocation).");
+			goto unm_err_out;
+		}
 	} else {
 		NInoSetNonResident(ni);
 		/*
 		 * Ensure the attribute name is placed before the mapping pairs
 		 * array.
 		 */
-		if (unlikely(ctx->attr->name_length &&
-				(le16_to_cpu(ctx->attr->name_offset) >=
-				le16_to_cpu(ctx->attr->data.non_resident.
-				mapping_pairs_offset)))) {
+		if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
+				le16_to_cpu(
+				a->data.non_resident.mapping_pairs_offset)))) {
 			ntfs_error(vol->sb, "Attribute name is placed after "
 					"the mapping pairs array.");
 			goto unm_err_out;
 		}
-		if (ctx->attr->flags & ATTR_COMPRESSION_MASK) {
+		if (a->flags & ATTR_COMPRESSION_MASK) {
 			if (NInoMstProtected(ni)) {
 				ntfs_error(vi->i_sb, "Found mst protected "
 						"attribute but the attribute "
@@ -1277,36 +1278,33 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
 						vol->cluster_size);
 				goto unm_err_out;
 			}
-			if ((ctx->attr->flags & ATTR_COMPRESSION_MASK)
-					!= ATTR_IS_COMPRESSED) {
+			if ((a->flags & ATTR_COMPRESSION_MASK) !=
+					ATTR_IS_COMPRESSED) {
 				ntfs_error(vi->i_sb, "Found unknown "
 						"compression method.");
 				goto unm_err_out;
 			}
 			ni->itype.compressed.block_clusters = 1U <<
-					ctx->attr->data.non_resident.
-					compression_unit;
-			if (ctx->attr->data.non_resident.compression_unit !=
-					4) {
+					a->data.non_resident.compression_unit;
+			if (a->data.non_resident.compression_unit != 4) {
 				ntfs_error(vi->i_sb, "Found nonstandard "
 						"compression unit (%u instead "
 						"of 4).  Cannot handle this.",
-						ctx->attr->data.non_resident.
+						a->data.non_resident.
 						compression_unit);
 				err = -EOPNOTSUPP;
 				goto unm_err_out;
 			}
 			ni->itype.compressed.block_size = 1U << (
-					ctx->attr->data.non_resident.
-					compression_unit +
+					a->data.non_resident.compression_unit +
 					vol->cluster_size_bits);
 			ni->itype.compressed.block_size_bits = ffs(
-				ni->itype.compressed.block_size) - 1;
+					ni->itype.compressed.block_size) - 1;
 		}
-		if (ctx->attr->flags & ATTR_IS_ENCRYPTED) {
-			if (ctx->attr->flags & ATTR_COMPRESSION_MASK) {
-				ntfs_error(vi->i_sb, "Found encrypted "
-						"and compressed data.");
+		if (a->flags & ATTR_IS_ENCRYPTED) {
+			if (a->flags & ATTR_COMPRESSION_MASK) {
+				ntfs_error(vi->i_sb, "Found encrypted and "
+						"compressed data.");
 				goto unm_err_out;
 			}
 			if (NInoMstProtected(ni)) {
@@ -1320,7 +1318,7 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
 			}
 			NInoSetEncrypted(ni);
 		}
-		if (ctx->attr->flags & ATTR_IS_SPARSE) {
+		if (a->flags & ATTR_IS_SPARSE) {
 			if (NInoMstProtected(ni)) {
 				ntfs_error(vi->i_sb, "Found mst protected "
 						"attribute but the attribute "
@@ -1332,23 +1330,20 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
 			}
 			NInoSetSparse(ni);
 		}
-		if (ctx->attr->data.non_resident.lowest_vcn) {
+		if (a->data.non_resident.lowest_vcn) {
 			ntfs_error(vi->i_sb, "First extent of attribute has "
 					"non-zero lowest_vcn.");
 			goto unm_err_out;
 		}
 		/* Setup all the sizes. */
-		vi->i_size = sle64_to_cpu(
-				ctx->attr->data.non_resident.data_size);
+		vi->i_size = sle64_to_cpu(a->data.non_resident.data_size);
 		ni->initialized_size = sle64_to_cpu(
-				ctx->attr->data.non_resident.initialized_size);
+				a->data.non_resident.initialized_size);
 		ni->allocated_size = sle64_to_cpu(
-				ctx->attr->data.non_resident.allocated_size);
-		if (NInoCompressed(ni)) {
+				a->data.non_resident.allocated_size);
+		if (NInoCompressed(ni))
 			ni->itype.compressed.size = sle64_to_cpu(
-					ctx->attr->data.non_resident.
-					compressed_size);
-		}
+					a->data.non_resident.compressed_size);
 	}
 
 	/* Setup the operations for this attribute inode. */
@@ -1437,6 +1432,7 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
 	ntfs_inode *ni, *base_ni, *bni;
 	struct inode *bvi;
 	MFT_RECORD *m;
+	ATTR_RECORD *a;
 	ntfs_attr_search_ctx *ctx;
 	INDEX_ROOT *ir;
 	u8 *ir_end, *index_end;
@@ -1478,30 +1474,28 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
 					"missing.");
 		goto unm_err_out;
 	}
+	a = ctx->attr;
 	/* Set up the state. */
-	if (unlikely(ctx->attr->non_resident)) {
+	if (unlikely(a->non_resident)) {
 		ntfs_error(vol->sb, "$INDEX_ROOT attribute is not resident.");
 		goto unm_err_out;
 	}
 	/* Ensure the attribute name is placed before the value. */
-	if (unlikely(ctx->attr->name_length &&
-			(le16_to_cpu(ctx->attr->name_offset) >=
-			le16_to_cpu(ctx->attr->data.resident.
-			value_offset)))) {
+	if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
+			le16_to_cpu(a->data.resident.value_offset)))) {
 		ntfs_error(vol->sb, "$INDEX_ROOT attribute name is placed "
 				"after the attribute value.");
 		goto unm_err_out;
 	}
 	/* Compressed/encrypted/sparse index root is not allowed. */
-	if (ctx->attr->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_ENCRYPTED |
+	if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_ENCRYPTED |
 			ATTR_IS_SPARSE)) {
 		ntfs_error(vi->i_sb, "Found compressed/encrypted/sparse index "
 				"root attribute.");
 		goto unm_err_out;
 	}
-	ir = (INDEX_ROOT*)((u8*)ctx->attr +
-			le16_to_cpu(ctx->attr->data.resident.value_offset));
-	ir_end = (u8*)ir + le32_to_cpu(ctx->attr->data.resident.value_length);
+	ir = (INDEX_ROOT*)((u8*)a + le16_to_cpu(a->data.resident.value_offset));
+	ir_end = (u8*)ir + le32_to_cpu(a->data.resident.value_length);
 	if (ir_end > (u8*)ctx->mrec + vol->mft_record_size) {
 		ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is corrupt.");
 		goto unm_err_out;
@@ -1574,7 +1568,7 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
 					"$INDEX_ALLOCATION attribute.");
 		goto unm_err_out;
 	}
-	if (!ctx->attr->non_resident) {
+	if (!a->non_resident) {
 		ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is "
 				"resident.");
 		goto unm_err_out;
@@ -1582,37 +1576,36 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
 	/*
 	 * Ensure the attribute name is placed before the mapping pairs array.
 	 */
-	if (unlikely(ctx->attr->name_length && (le16_to_cpu(
-			ctx->attr->name_offset) >= le16_to_cpu(
-			ctx->attr->data.non_resident.mapping_pairs_offset)))) {
+	if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
+			le16_to_cpu(
+			a->data.non_resident.mapping_pairs_offset)))) {
 		ntfs_error(vol->sb, "$INDEX_ALLOCATION attribute name is "
 				"placed after the mapping pairs array.");
 		goto unm_err_out;
 	}
-	if (ctx->attr->flags & ATTR_IS_ENCRYPTED) {
+	if (a->flags & ATTR_IS_ENCRYPTED) {
 		ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is "
 				"encrypted.");
 		goto unm_err_out;
 	}
-	if (ctx->attr->flags & ATTR_IS_SPARSE) {
+	if (a->flags & ATTR_IS_SPARSE) {
 		ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is sparse.");
 		goto unm_err_out;
 	}
-	if (ctx->attr->flags & ATTR_COMPRESSION_MASK) {
+	if (a->flags & ATTR_COMPRESSION_MASK) {
 		ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is "
 				"compressed.");
 		goto unm_err_out;
 	}
-	if (ctx->attr->data.non_resident.lowest_vcn) {
+	if (a->data.non_resident.lowest_vcn) {
 		ntfs_error(vi->i_sb, "First extent of $INDEX_ALLOCATION "
 				"attribute has non zero lowest_vcn.");
 		goto unm_err_out;
 	}
-	vi->i_size = sle64_to_cpu(ctx->attr->data.non_resident.data_size);
+	vi->i_size = sle64_to_cpu(a->data.non_resident.data_size);
 	ni->initialized_size = sle64_to_cpu(
-			ctx->attr->data.non_resident.initialized_size);
-	ni->allocated_size = sle64_to_cpu(
-			ctx->attr->data.non_resident.allocated_size);
+			a->data.non_resident.initialized_size);
+	ni->allocated_size = sle64_to_cpu(a->data.non_resident.allocated_size);
 	/*
 	 * We are done with the mft record, so we release it.  Otherwise
 	 * we would deadlock in ntfs_attr_iget().
@@ -1716,7 +1709,7 @@ int ntfs_read_inode_mount(struct inode *vi)
 	struct buffer_head *bh;
 	ntfs_inode *ni;
 	MFT_RECORD *m = NULL;
-	ATTR_RECORD *attr;
+	ATTR_RECORD *a;
 	ntfs_attr_search_ctx *ctx;
 	unsigned int i, nr_blocks;
 	int err;
@@ -1813,9 +1806,10 @@ int ntfs_read_inode_mount(struct inode *vi)
 
 		ntfs_debug("Attribute list attribute found in $MFT.");
 		NInoSetAttrList(ni);
-		if (ctx->attr->flags & ATTR_IS_ENCRYPTED ||
-				ctx->attr->flags & ATTR_COMPRESSION_MASK ||
-				ctx->attr->flags & ATTR_IS_SPARSE) {
+		a = ctx->attr;
+		if (a->flags & ATTR_IS_ENCRYPTED ||
+				a->flags & ATTR_COMPRESSION_MASK ||
+				a->flags & ATTR_IS_SPARSE) {
 			ntfs_error(sb, "Attribute list attribute is "
 					"compressed/encrypted/sparse. Not "
 					"allowed. $MFT is corrupt. You should "
@@ -1823,16 +1817,16 @@ int ntfs_read_inode_mount(struct inode *vi)
 			goto put_err_out;
 		}
 		/* Now allocate memory for the attribute list. */
-		ni->attr_list_size = (u32)ntfs_attr_size(ctx->attr);
+		ni->attr_list_size = (u32)ntfs_attr_size(a);
 		ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size);
 		if (!ni->attr_list) {
 			ntfs_error(sb, "Not enough memory to allocate buffer "
 					"for attribute list.");
 			goto put_err_out;
 		}
-		if (ctx->attr->non_resident) {
+		if (a->non_resident) {
 			NInoSetAttrListNonResident(ni);
-			if (ctx->attr->data.non_resident.lowest_vcn) {
+			if (a->data.non_resident.lowest_vcn) {
 				ntfs_error(sb, "Attribute list has non zero "
 						"lowest_vcn. $MFT is corrupt. "
 						"You should run chkdsk.");
@@ -1840,7 +1834,7 @@ int ntfs_read_inode_mount(struct inode *vi)
 			}
 			/* Setup the runlist. */
 			ni->attr_list_rl.rl = ntfs_mapping_pairs_decompress(vol,
-					ctx->attr, NULL);
+					a, NULL);
 			if (IS_ERR(ni->attr_list_rl.rl)) {
 				err = PTR_ERR(ni->attr_list_rl.rl);
 				ni->attr_list_rl.rl = NULL;
@@ -1852,7 +1846,7 @@ int ntfs_read_inode_mount(struct inode *vi)
 			/* Now load the attribute list. */
 			if ((err = load_attribute_list(vol, &ni->attr_list_rl,
 					ni->attr_list, ni->attr_list_size,
-					sle64_to_cpu(ctx->attr->data.
+					sle64_to_cpu(a->data.
 					non_resident.initialized_size)))) {
 				ntfs_error(sb, "Failed to load attribute list "
 						"attribute with error code %i.",
@@ -1860,20 +1854,20 @@ int ntfs_read_inode_mount(struct inode *vi)
 				goto put_err_out;
 			}
 		} else /* if (!ctx.attr->non_resident) */ {
-			if ((u8*)ctx->attr + le16_to_cpu(
-					ctx->attr->data.resident.value_offset) +
+			if ((u8*)a + le16_to_cpu(
+					a->data.resident.value_offset) +
 					le32_to_cpu(
-					ctx->attr->data.resident.value_length) >
+					a->data.resident.value_length) >
 					(u8*)ctx->mrec + vol->mft_record_size) {
 				ntfs_error(sb, "Corrupt attribute list "
 						"attribute.");
 				goto put_err_out;
 			}
 			/* Now copy the attribute list. */
-			memcpy(ni->attr_list, (u8*)ctx->attr + le16_to_cpu(
-					ctx->attr->data.resident.value_offset),
+			memcpy(ni->attr_list, (u8*)a + le16_to_cpu(
+					a->data.resident.value_offset),
 					le32_to_cpu(
-					ctx->attr->data.resident.value_length));
+					a->data.resident.value_length));
 		}
 		/* The attribute list is now setup in memory. */
 		/*
@@ -1939,25 +1933,25 @@ int ntfs_read_inode_mount(struct inode *vi)
 	ntfs_attr_reinit_search_ctx(ctx);
 
 	/* Now load all attribute extents. */
-	attr = NULL;
+	a = NULL;
 	next_vcn = last_vcn = highest_vcn = 0;
 	while (!(err = ntfs_attr_lookup(AT_DATA, NULL, 0, 0, next_vcn, NULL, 0,
 			ctx))) {
 		runlist_element *nrl;
 
 		/* Cache the current attribute. */
-		attr = ctx->attr;
+		a = ctx->attr;
 		/* $MFT must be non-resident. */
-		if (!attr->non_resident) {
+		if (!a->non_resident) {
 			ntfs_error(sb, "$MFT must be non-resident but a "
 					"resident extent was found. $MFT is "
 					"corrupt. Run chkdsk.");
 			goto put_err_out;
 		}
 		/* $MFT must be uncompressed and unencrypted. */
-		if (attr->flags & ATTR_COMPRESSION_MASK ||
-				attr->flags & ATTR_IS_ENCRYPTED ||
-				attr->flags & ATTR_IS_SPARSE) {
+		if (a->flags & ATTR_COMPRESSION_MASK ||
+				a->flags & ATTR_IS_ENCRYPTED ||
+				a->flags & ATTR_IS_SPARSE) {
 			ntfs_error(sb, "$MFT must be uncompressed, "
 					"non-sparse, and unencrypted but a "
 					"compressed/sparse/encrypted extent "
@@ -1971,7 +1965,7 @@ int ntfs_read_inode_mount(struct inode *vi)
 		 * as we have exclusive access to the inode at this time and we
 		 * are a mount in progress task, too.
 		 */
-		nrl = ntfs_mapping_pairs_decompress(vol, attr, ni->runlist.rl);
+		nrl = ntfs_mapping_pairs_decompress(vol, a, ni->runlist.rl);
 		if (IS_ERR(nrl)) {
 			ntfs_error(sb, "ntfs_mapping_pairs_decompress() "
 					"failed with error code %ld.  $MFT is "
@@ -1982,7 +1976,7 @@ int ntfs_read_inode_mount(struct inode *vi)
 
 		/* Are we in the first extent? */
 		if (!next_vcn) {
-			if (attr->data.non_resident.lowest_vcn) {
+			if (a->data.non_resident.lowest_vcn) {
 				ntfs_error(sb, "First extent of $DATA "
 						"attribute has non zero "
 						"lowest_vcn. $MFT is corrupt. "
@@ -1991,15 +1985,15 @@ int ntfs_read_inode_mount(struct inode *vi)
 			}
 			/* Get the last vcn in the $DATA attribute. */
 			last_vcn = sle64_to_cpu(
-					attr->data.non_resident.allocated_size)
+					a->data.non_resident.allocated_size)
 					>> vol->cluster_size_bits;
 			/* Fill in the inode size. */
 			vi->i_size = sle64_to_cpu(
-					attr->data.non_resident.data_size);
-			ni->initialized_size = sle64_to_cpu(attr->data.
-					non_resident.initialized_size);
+					a->data.non_resident.data_size);
+			ni->initialized_size = sle64_to_cpu(
+					a->data.non_resident.initialized_size);
 			ni->allocated_size = sle64_to_cpu(
-					attr->data.non_resident.allocated_size);
+					a->data.non_resident.allocated_size);
 			/*
 			 * Verify the number of mft records does not exceed
 			 * 2^32 - 1.
@@ -2056,7 +2050,7 @@ int ntfs_read_inode_mount(struct inode *vi)
 		}
 
 		/* Get the lowest vcn for the next extent. */
-		highest_vcn = sle64_to_cpu(attr->data.non_resident.highest_vcn);
+		highest_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn);
 		next_vcn = highest_vcn + 1;
 
 		/* Only one extent or error, which we catch below. */
@@ -2065,7 +2059,7 @@ int ntfs_read_inode_mount(struct inode *vi)
 
 		/* Avoid endless loops due to corruption. */
 		if (next_vcn < sle64_to_cpu(
-				attr->data.non_resident.lowest_vcn)) {
+				a->data.non_resident.lowest_vcn)) {
 			ntfs_error(sb, "$MFT has corrupt attribute list "
 					"attribute. Run chkdsk.");
 			goto put_err_out;
@@ -2076,7 +2070,7 @@ int ntfs_read_inode_mount(struct inode *vi)
 				"$MFT is corrupt. Run chkdsk.");
 		goto put_err_out;
 	}
-	if (!attr) {
+	if (!a) {
 		ntfs_error(sb, "$MFT/$DATA attribute not found. $MFT is "
 				"corrupt. Run chkdsk.");
 		goto put_err_out;
@@ -2318,6 +2312,7 @@ int ntfs_truncate(struct inode *vi)
 	ntfs_volume *vol = ni->vol;
 	ntfs_attr_search_ctx *ctx;
 	MFT_RECORD *m;
+	ATTR_RECORD *a;
 	const char *te = "  Leaving file length out of sync with i_size.";
 	int err;
 
@@ -2354,14 +2349,15 @@ int ntfs_truncate(struct inode *vi)
 					vi->i_ino, err);
 		goto err_out;
 	}
+	a = ctx->attr;
 	/* If the size has not changed there is nothing to do. */
-	if (ntfs_attr_size(ctx->attr) == i_size_read(vi))
+	if (ntfs_attr_size(a) == i_size_read(vi))
 		goto done;
 	// TODO: Implement the truncate...
 	ntfs_error(vi->i_sb, "Inode size has changed but this is not "
 			"implemented yet.  Resetting inode size to old value. "
 			" This is most likely a bug in the ntfs driver!");
-	i_size_write(vi, ntfs_attr_size(ctx->attr)); 
+	i_size_write(vi, ntfs_attr_size(a)); 
 done:
 	ntfs_attr_put_search_ctx(ctx);
 	unmap_mft_record(ni);

From 8907547d4b099e67762ea4891c127ea1f6dd1cb7 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rddunlap@osdl.org>
Date: Thu, 3 Mar 2005 11:19:53 +0000
Subject: [PATCH 020/113] NTFS: Fix printk format warnings on ia64. (Randy
 Dunlap)

Signed-off-by: Randy Dunlap <rddunlap@osdl.org>
Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog  |  1 +
 fs/ntfs/aops.c     |  3 ++-
 fs/ntfs/debug.c    | 15 +++++++++------
 fs/ntfs/inode.c    | 12 ++++++------
 fs/ntfs/lcnalloc.c |  2 +-
 fs/ntfs/mft.c      |  2 +-
 6 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index ad2c4e88f053..bfd86c5cc480 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -29,6 +29,7 @@ ToDo/Notes:
 	  compiled without debug.  This avoids a possible denial of service
 	  attack.  Thanks to Carl-Daniel Hailfinger from SuSE for pointing this
 	  out.
+	- Fix compilation warnings on ia64.  (Randy Dunlap)
 	- Use i_size_read() in fs/ntfs/attrib.c::ntfs_attr_set().
 	- Use i_size_read() in fs/ntfs/logfile.c::ntfs_{check,empty}_logfile().
 	- Use i_size_read() once and then use the cached value in
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 2b4b8b9e8796..2a7cba258cca 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -965,7 +965,8 @@ lock_retry_remap:
 						"attribute type 0x%x) because "
 						"its location on disk could "
 						"not be determined (error "
-						"code %lli).", (s64)block <<
+						"code %lli).",
+						(long long)block <<
 						bh_size_bits >>
 						vol->mft_record_size_bits,
 						ni->mft_no, ni->type,
diff --git a/fs/ntfs/debug.c b/fs/ntfs/debug.c
index 6fb6bb5e3723..807150e2c2b9 100644
--- a/fs/ntfs/debug.c
+++ b/fs/ntfs/debug.c
@@ -164,14 +164,17 @@ void ntfs_debug_dump_runlist(const runlist_element *rl)
 			if (index > -LCN_ENOENT - 1)
 				index = 3;
 			printk(KERN_DEBUG "%-16Lx %s %-16Lx%s\n",
-					(rl + i)->vcn, lcn_str[index],
-					(rl + i)->length, (rl + i)->length ?
-					"" : " (runlist end)");
+					(long long)(rl + i)->vcn, lcn_str[index],
+					(long long)(rl + i)->length,
+					(rl + i)->length ? "" :
+						" (runlist end)");
 		} else
 			printk(KERN_DEBUG "%-16Lx %-16Lx  %-16Lx%s\n",
-					(rl + i)->vcn, (rl + i)->lcn,
-					(rl + i)->length, (rl + i)->length ?
-					"" : " (runlist end)");
+					(long long)(rl + i)->vcn,
+					(long long)(rl + i)->lcn,
+					(long long)(rl + i)->length,
+					(rl + i)->length ? "" :
+						" (runlist end)");
 		if (!(rl + i)->length)
 			break;
 	}
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index cfca17384115..7ae647c640bb 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -2518,18 +2518,18 @@ int ntfs_write_inode(struct inode *vi, int sync)
 	nt = utc2ntfs(vi->i_mtime);
 	if (si->last_data_change_time != nt) {
 		ntfs_debug("Updating mtime for inode 0x%lx: old = 0x%llx, "
-				"new = 0x%llx", vi->i_ino,
+				"new = 0x%llx", vi->i_ino, (long long)
 				sle64_to_cpu(si->last_data_change_time),
-				sle64_to_cpu(nt));
+				(long long)sle64_to_cpu(nt));
 		si->last_data_change_time = nt;
 		modified = TRUE;
 	}
 	nt = utc2ntfs(vi->i_ctime);
 	if (si->last_mft_change_time != nt) {
 		ntfs_debug("Updating ctime for inode 0x%lx: old = 0x%llx, "
-				"new = 0x%llx", vi->i_ino,
+				"new = 0x%llx", vi->i_ino, (long long)
 				sle64_to_cpu(si->last_mft_change_time),
-				sle64_to_cpu(nt));
+				(long long)sle64_to_cpu(nt));
 		si->last_mft_change_time = nt;
 		modified = TRUE;
 	}
@@ -2537,8 +2537,8 @@ int ntfs_write_inode(struct inode *vi, int sync)
 	if (si->last_access_time != nt) {
 		ntfs_debug("Updating atime for inode 0x%lx: old = 0x%llx, "
 				"new = 0x%llx", vi->i_ino,
-				sle64_to_cpu(si->last_access_time),
-				sle64_to_cpu(nt));
+				(long long)sle64_to_cpu(si->last_access_time),
+				(long long)sle64_to_cpu(nt));
 		si->last_access_time = nt;
 		modified = TRUE;
 	}
diff --git a/fs/ntfs/lcnalloc.c b/fs/ntfs/lcnalloc.c
index 8db4492b139c..f2b7f8582c59 100644
--- a/fs/ntfs/lcnalloc.c
+++ b/fs/ntfs/lcnalloc.c
@@ -763,7 +763,7 @@ out:
 					"could allocate up to 0x%llx "
 					"clusters.",
 					(unsigned long long)rl[0].lcn,
-					(unsigned long long)count - clusters);
+					(unsigned long long)(count - clusters));
 		/* Deallocate all allocated clusters. */
 		ntfs_debug("Attempting rollback...");
 		err2 = ntfs_cluster_free_from_rl_nolock(vol, rl);
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 0975d738834c..66ef6e275a48 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -1802,7 +1802,7 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
 		return PTR_ERR(rl);
 	}
 	mft_ni->runlist.rl = rl;
-	ntfs_debug("Allocated %lli clusters.", nr);
+	ntfs_debug("Allocated %lli clusters.", (long long)nr);
 	/* Find the last run in the new runlist. */
 	for (; rl[1].length; rl++)
 		;

From 413826868fb49d200b741bcaeaf58ea5c5e45321 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Thu, 3 Mar 2005 13:44:15 +0000
Subject: [PATCH 021/113] NTFS: Make fs/ntfs/namei.c::ntfs_get_{parent,dentry}
 static and move the       definition of ntfs_export_ops from fs/ntfs/super.c
 to namei.c.       Also, declare ntfs_export_ops in fs/ntfs/ntfs.h.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog |  3 +++
 fs/ntfs/namei.c   | 31 +++++++++++++++++++++++++++++--
 fs/ntfs/ntfs.h    |  2 ++
 fs/ntfs/super.c   | 34 ----------------------------------
 4 files changed, 34 insertions(+), 36 deletions(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index bfd86c5cc480..cf21e0571b68 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -81,6 +81,9 @@ ToDo/Notes:
 	  mft record for resident attributes (fs/ntfs/inode.c).
 	- Small readability cleanup to use "a" instead of "ctx->attr"
 	  everywhere (fs/ntfs/inode.c).
+	- Make fs/ntfs/namei.c::ntfs_get_{parent,dentry} static and move the
+	  definition of ntfs_export_ops from fs/ntfs/super.c to namei.c.  Also,
+	  declare ntfs_export_ops in fs/ntfs/ntfs.h.
 
 2.1.22 - Many bug and race fixes and error handling improvements.
 
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c
index 7c7e13b43b2e..d261e25bd9ba 100644
--- a/fs/ntfs/namei.c
+++ b/fs/ntfs/namei.c
@@ -380,7 +380,7 @@ struct inode_operations ntfs_dir_inode_ops = {
  * Return the dentry of the parent directory on success or the error code on
  * error (IS_ERR() is true).
  */
-struct dentry *ntfs_get_parent(struct dentry *child_dent)
+static struct dentry *ntfs_get_parent(struct dentry *child_dent)
 {
 	struct inode *vi = child_dent->d_inode;
 	ntfs_inode *ni = NTFS_I(vi);
@@ -465,7 +465,7 @@ try_next:
  *
  * Return the dentry on success or the error code on error (IS_ERR() is true).
  */
-struct dentry *ntfs_get_dentry(struct super_block *sb, void *fh)
+static struct dentry *ntfs_get_dentry(struct super_block *sb, void *fh)
 {
 	struct inode *vi;
 	struct dentry *dent;
@@ -496,3 +496,30 @@ struct dentry *ntfs_get_dentry(struct super_block *sb, void *fh)
 	ntfs_debug("Done for inode 0x%lx, generation 0x%x.", ino, gen);
 	return dent;
 }
+
+/**
+ * Export operations allowing NFS exporting of mounted NTFS partitions.
+ *
+ * We use the default ->decode_fh() and ->encode_fh() for now.  Note that they
+ * use 32 bits to store the inode number which is an unsigned long so on 64-bit
+ * architectures is usually 64 bits so it would all fail horribly on huge
+ * volumes.  I guess we need to define our own encode and decode fh functions
+ * that store 64-bit inode numbers at some point but for now we will ignore the
+ * problem...
+ *
+ * We also use the default ->get_name() helper (used by ->decode_fh() via
+ * fs/exportfs/expfs.c::find_exported_dentry()) as that is completely fs
+ * independent.
+ *
+ * The default ->get_parent() just returns -EACCES so we have to provide our
+ * own and the default ->get_dentry() is incompatible with NTFS due to not
+ * allowing the inode number 0 which is used in NTFS for the system file $MFT
+ * and due to using iget() whereas NTFS needs ntfs_iget().
+ */
+struct export_operations ntfs_export_ops = {
+	.get_parent	= ntfs_get_parent,	/* Find the parent of a given
+						   directory. */
+	.get_dentry	= ntfs_get_dentry,	/* Find a dentry for the inode
+						   given a file handle
+						   sub-fragment. */
+};
diff --git a/fs/ntfs/ntfs.h b/fs/ntfs/ntfs.h
index 720ffb71bab8..a0bf706797bb 100644
--- a/fs/ntfs/ntfs.h
+++ b/fs/ntfs/ntfs.h
@@ -65,6 +65,8 @@ extern struct inode_operations ntfs_dir_inode_ops;
 extern struct  file_operations ntfs_empty_file_ops;
 extern struct inode_operations ntfs_empty_inode_ops;
 
+extern struct export_operations ntfs_export_ops;
+
 /**
  * NTFS_SB - return the ntfs volume given a vfs super block
  * @sb:		VFS super block
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index b676cf6fd93c..78416eb2a168 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -2250,40 +2250,6 @@ static struct super_operations ntfs_sops = {
 						   proc. */
 };
 
-
-/**
- * Declarations for NTFS specific export operations (fs/ntfs/namei.c).
- */
-extern struct dentry *ntfs_get_parent(struct dentry *child_dent);
-extern struct dentry *ntfs_get_dentry(struct super_block *sb, void *fh);
-
-/**
- * Export operations allowing NFS exporting of mounted NTFS partitions.
- *
- * We use the default ->decode_fh() and ->encode_fh() for now.  Note that they
- * use 32 bits to store the inode number which is an unsigned long so on 64-bit
- * architectures is usually 64 bits so it would all fail horribly on huge
- * volumes.  I guess we need to define our own encode and decode fh functions
- * that store 64-bit inode numbers at some point but for now we will ignore the
- * problem...
- *
- * We also use the default ->get_name() helper (used by ->decode_fh() via
- * fs/exportfs/expfs.c::find_exported_dentry()) as that is completely fs
- * independent.
- *
- * The default ->get_parent() just returns -EACCES so we have to provide our
- * own and the default ->get_dentry() is incompatible with NTFS due to not
- * allowing the inode number 0 which is used in NTFS for the system file $MFT
- * and due to using iget() whereas NTFS needs ntfs_iget().
- */
-static struct export_operations ntfs_export_ops = {
-	.get_parent	= ntfs_get_parent,	/* Find the parent of a given
-						   directory. */
-	.get_dentry	= ntfs_get_dentry,	/* Find a dentry for the inode
-						   given a file handle
-						   sub-fragment. */
-};
-
 /**
  * ntfs_fill_super - mount an ntfs filesystem
  * @sb:		super block of ntfs filesystem to mount

From 9451f8519c5e6d5d064c30033fc3d4ce77de321c Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Thu, 3 Mar 2005 14:43:43 +0000
Subject: [PATCH 022/113] NTFS: Correct sparse file handling.  The compressed
 values need to be       checked and set in the ntfs inode as done for
 compressed files       and the compressed size needs to be used for vfs
 inode->i_blocks       instead of the allocated size, again, as done for
 compressed files.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog |   4 ++
 fs/ntfs/inode.c   | 172 ++++++++++++++++++++++------------------------
 fs/ntfs/layout.h  |  15 ++--
 3 files changed, 94 insertions(+), 97 deletions(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index cf21e0571b68..cd348f0b90e5 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -84,6 +84,10 @@ ToDo/Notes:
 	- Make fs/ntfs/namei.c::ntfs_get_{parent,dentry} static and move the
 	  definition of ntfs_export_ops from fs/ntfs/super.c to namei.c.  Also,
 	  declare ntfs_export_ops in fs/ntfs/ntfs.h.
+	- Correct sparse file handling.  The compressed values need to be
+	  checked and set in the ntfs inode as done for compressed files and
+	  the compressed size needs to be used for vfs inode->i_blocks instead
+	  of the allocated size, again, as done for compressed files.
 
 2.1.22 - Many bug and race fixes and error handling improvements.
 
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 7ae647c640bb..f7bee8d014d9 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -1016,26 +1016,31 @@ skip_large_dir_stuff:
 		/* Setup the state. */
 		if (a->non_resident) {
 			NInoSetNonResident(ni);
-			if (a->flags & ATTR_COMPRESSION_MASK) {
-				NInoSetCompressed(ni);
-				if (vol->cluster_size > 4096) {
-					ntfs_error(vi->i_sb, "Found "
-						"compressed data but "
-						"compression is disabled due "
-						"to cluster size (%i) > 4kiB.",
-						vol->cluster_size);
-					goto unm_err_out;
+			if (a->flags & (ATTR_COMPRESSION_MASK |
+					ATTR_IS_SPARSE)) {
+				if (a->flags & ATTR_COMPRESSION_MASK) {
+					NInoSetCompressed(ni);
+					if (vol->cluster_size > 4096) {
+						ntfs_error(vi->i_sb, "Found "
+							"compressed data but "
+							"compression is "
+							"disabled due to "
+							"cluster size (%i) > "
+							"4kiB.",
+							vol->cluster_size);
+						goto unm_err_out;
+					}
+					if ((a->flags & ATTR_COMPRESSION_MASK)
+							!= ATTR_IS_COMPRESSED) {
+						ntfs_error(vi->i_sb, "Found "
+							"unknown compression "
+							"method or corrupt "
+							"file.");
+						goto unm_err_out;
+					}
 				}
-				if ((a->flags & ATTR_COMPRESSION_MASK)
-						!= ATTR_IS_COMPRESSED) {
-					ntfs_error(vi->i_sb, "Found "
-						"unknown compression method or "
-						"corrupt file.");
-					goto unm_err_out;
-				}
-				ni->itype.compressed.block_clusters = 1U <<
-						a->data.non_resident.
-						compression_unit;
+				if (a->flags & ATTR_IS_SPARSE)
+					NInoSetSparse(ni);
 				if (a->data.non_resident.compression_unit !=
 						4) {
 					ntfs_error(vi->i_sb, "Found "
@@ -1047,12 +1052,19 @@ skip_large_dir_stuff:
 					err = -EOPNOTSUPP;
 					goto unm_err_out;
 				}
+				ni->itype.compressed.block_clusters = 1U <<
+						a->data.non_resident.
+						compression_unit;
 				ni->itype.compressed.block_size = 1U << (
 						a->data.non_resident.
 						compression_unit +
 						vol->cluster_size_bits);
 				ni->itype.compressed.block_size_bits = ffs(
-					ni->itype.compressed.block_size) - 1;
+						ni->itype.compressed.
+						block_size) - 1;
+				ni->itype.compressed.size = sle64_to_cpu(
+						a->data.non_resident.
+						compressed_size);
 			}
 			if (a->flags & ATTR_IS_ENCRYPTED) {
 				if (a->flags & ATTR_COMPRESSION_MASK) {
@@ -1062,27 +1074,19 @@ skip_large_dir_stuff:
 				}
 				NInoSetEncrypted(ni);
 			}
-			if (a->flags & ATTR_IS_SPARSE)
-				NInoSetSparse(ni);
 			if (a->data.non_resident.lowest_vcn) {
 				ntfs_error(vi->i_sb, "First extent of $DATA "
 						"attribute has non zero "
 						"lowest_vcn.");
 				goto unm_err_out;
 			}
-			/* Setup all the sizes. */
 			vi->i_size = sle64_to_cpu(
 					a->data.non_resident.data_size);
 			ni->initialized_size = sle64_to_cpu(
 					a->data.non_resident.initialized_size);
 			ni->allocated_size = sle64_to_cpu(
 					a->data.non_resident.allocated_size);
-			if (NInoCompressed(ni))
-				ni->itype.compressed.size = sle64_to_cpu(
-						a->data.non_resident.
-						compressed_size);
 		} else { /* Resident attribute. */
-			/* Setup all the sizes. */
 			vi->i_size = ni->initialized_size = le32_to_cpu(
 					a->data.resident.value_length);
 			ni->allocated_size = le32_to_cpu(a->length) -
@@ -1120,11 +1124,10 @@ no_data_attr_special_case:
 	 * sizes of all non-resident attributes present to give us the Linux
 	 * correct size that should go into i_blocks (after division by 512).
 	 */
-	if (S_ISDIR(vi->i_mode) || !NInoCompressed(ni))
-		vi->i_blocks = ni->allocated_size >> 9;
-	else
+	if (S_ISREG(vi->i_mode) && (NInoCompressed(ni) || NInoSparse(ni)))
 		vi->i_blocks = ni->itype.compressed.size >> 9;
-
+	else
+		vi->i_blocks = ni->allocated_size >> 9;
 	ntfs_debug("Done.");
 	return 0;
 
@@ -1226,14 +1229,13 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
 					"linux-ntfs-dev@lists.sourceforge.net");
 			goto unm_err_out;
 		}
-		/* Resident attribute.  Setup all the sizes. */
 		vi->i_size = ni->initialized_size = le32_to_cpu(
 				a->data.resident.value_length);
 		ni->allocated_size = le32_to_cpu(a->length) -
 				le16_to_cpu(a->data.resident.value_offset);
 		if (vi->i_size > ni->allocated_size) {
-			ntfs_error(vi->i_sb, "Resident data attribute is "
-					"corrupt (size exceeds allocation).");
+			ntfs_error(vi->i_sb, "Resident attribute is corrupt "
+					"(size exceeds allocation).");
 			goto unm_err_out;
 		}
 	} else {
@@ -1249,43 +1251,50 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
 					"the mapping pairs array.");
 			goto unm_err_out;
 		}
-		if (a->flags & ATTR_COMPRESSION_MASK) {
+		if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_SPARSE)) {
+			if (a->flags & ATTR_COMPRESSION_MASK) {
+				NInoSetCompressed(ni);
+				if ((ni->type != AT_DATA) || (ni->type ==
+						AT_DATA && ni->name_len)) {
+					ntfs_error(vi->i_sb, "Found compressed "
+							"non-data or named "
+							"data attribute.  "
+							"Please report you "
+							"saw this message to "
+							"linux-ntfs-dev@lists."
+							"sourceforge.net");
+					goto unm_err_out;
+				}
+				if (vol->cluster_size > 4096) {
+					ntfs_error(vi->i_sb, "Found compressed "
+							"attribute but "
+							"compression is "
+							"disabled due to "
+							"cluster size (%i) > "
+							"4kiB.",
+							vol->cluster_size);
+					goto unm_err_out;
+				}
+				if ((a->flags & ATTR_COMPRESSION_MASK) !=
+						ATTR_IS_COMPRESSED) {
+					ntfs_error(vi->i_sb, "Found unknown "
+							"compression method.");
+					goto unm_err_out;
+				}
+			}
 			if (NInoMstProtected(ni)) {
 				ntfs_error(vi->i_sb, "Found mst protected "
 						"attribute but the attribute "
-						"is compressed.  Please report "
-						"you saw this message to "
+						"is %s.  Please report you "
+						"saw this message to "
 						"linux-ntfs-dev@lists."
-						"sourceforge.net");
+						"sourceforge.net",
+						NInoCompressed(ni) ?
+						"compressed" : "sparse");
 				goto unm_err_out;
 			}
-			NInoSetCompressed(ni);
-			if ((ni->type != AT_DATA) || (ni->type == AT_DATA &&
-					ni->name_len)) {
-				ntfs_error(vi->i_sb, "Found compressed "
-						"non-data or named data "
-						"attribute.  Please report "
-						"you saw this message to "
-						"linux-ntfs-dev@lists."
-						"sourceforge.net");
-				goto unm_err_out;
-			}
-			if (vol->cluster_size > 4096) {
-				ntfs_error(vi->i_sb, "Found compressed "
-						"attribute but compression is "
-						"disabled due to cluster size "
-						"(%i) > 4kiB.",
-						vol->cluster_size);
-				goto unm_err_out;
-			}
-			if ((a->flags & ATTR_COMPRESSION_MASK) !=
-					ATTR_IS_COMPRESSED) {
-				ntfs_error(vi->i_sb, "Found unknown "
-						"compression method.");
-				goto unm_err_out;
-			}
-			ni->itype.compressed.block_clusters = 1U <<
-					a->data.non_resident.compression_unit;
+			if (a->flags & ATTR_IS_SPARSE)
+				NInoSetSparse(ni);
 			if (a->data.non_resident.compression_unit != 4) {
 				ntfs_error(vi->i_sb, "Found nonstandard "
 						"compression unit (%u instead "
@@ -1295,11 +1304,15 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
 				err = -EOPNOTSUPP;
 				goto unm_err_out;
 			}
+			ni->itype.compressed.block_clusters = 1U <<
+					a->data.non_resident.compression_unit;
 			ni->itype.compressed.block_size = 1U << (
 					a->data.non_resident.compression_unit +
 					vol->cluster_size_bits);
 			ni->itype.compressed.block_size_bits = ffs(
 					ni->itype.compressed.block_size) - 1;
+			ni->itype.compressed.size = sle64_to_cpu(
+					a->data.non_resident.compressed_size);
 		}
 		if (a->flags & ATTR_IS_ENCRYPTED) {
 			if (a->flags & ATTR_COMPRESSION_MASK) {
@@ -1318,34 +1331,17 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
 			}
 			NInoSetEncrypted(ni);
 		}
-		if (a->flags & ATTR_IS_SPARSE) {
-			if (NInoMstProtected(ni)) {
-				ntfs_error(vi->i_sb, "Found mst protected "
-						"attribute but the attribute "
-						"is sparse.  Please report "
-						"you saw this message to "
-						"linux-ntfs-dev@lists."
-						"sourceforge.net");
-				goto unm_err_out;
-			}
-			NInoSetSparse(ni);
-		}
 		if (a->data.non_resident.lowest_vcn) {
 			ntfs_error(vi->i_sb, "First extent of attribute has "
 					"non-zero lowest_vcn.");
 			goto unm_err_out;
 		}
-		/* Setup all the sizes. */
 		vi->i_size = sle64_to_cpu(a->data.non_resident.data_size);
 		ni->initialized_size = sle64_to_cpu(
 				a->data.non_resident.initialized_size);
 		ni->allocated_size = sle64_to_cpu(
 				a->data.non_resident.allocated_size);
-		if (NInoCompressed(ni))
-			ni->itype.compressed.size = sle64_to_cpu(
-					a->data.non_resident.compressed_size);
 	}
-
 	/* Setup the operations for this attribute inode. */
 	vi->i_op = NULL;
 	vi->i_fop = NULL;
@@ -1353,12 +1349,10 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
 		vi->i_mapping->a_ops = &ntfs_mst_aops;
 	else
 		vi->i_mapping->a_ops = &ntfs_aops;
-
-	if (!NInoCompressed(ni))
-		vi->i_blocks = ni->allocated_size >> 9;
-	else
+	if (NInoCompressed(ni) || NInoSparse(ni))
 		vi->i_blocks = ni->itype.compressed.size >> 9;
-
+	else
+		vi->i_blocks = ni->allocated_size >> 9;
 	/*
 	 * Make sure the base inode doesn't go away and attach it to the
 	 * attribute inode.
@@ -1643,7 +1637,6 @@ skip_large_index_stuff:
 	vi->i_fop = NULL;
 	vi->i_mapping->a_ops = &ntfs_mst_aops;
 	vi->i_blocks = ni->allocated_size >> 9;
-
 	/*
 	 * Make sure the base inode doesn't go away and attach it to the
 	 * index inode.
@@ -1728,7 +1721,6 @@ int ntfs_read_inode_mount(struct inode *vi)
 	ni->type = AT_DATA;
 	ni->name = NULL;
 	ni->name_len = 0;
-
 	/*
 	 * This sets up our little cheat allowing us to reuse the async read io
 	 * completion handler for directories.
diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h
index 7cb8806d4559..8d1f1326612f 100644
--- a/fs/ntfs/layout.h
+++ b/fs/ntfs/layout.h
@@ -749,10 +749,11 @@ typedef struct {
 				record header aligned to 8-byte boundary. */
 /* 34*/			u8 compression_unit; /* The compression unit expressed
 				as the log to the base 2 of the number of
-				clusters in a compression unit. 0 means not
-				compressed. (This effectively limits the
+				clusters in a compression unit.  0 means not
+				compressed.  (This effectively limits the
 				compression unit size to be a power of two
-				clusters.) WinNT4 only uses a value of 4. */
+				clusters.)  WinNT4 only uses a value of 4.
+				Sparse files also have this set to 4. */
 /* 35*/			u8 reserved[5];		/* Align to 8-byte boundary. */
 /* The sizes below are only used when lowest_vcn is zero, as otherwise it would
    be difficult to keep them up-to-date.*/
@@ -772,10 +773,10 @@ typedef struct {
 				data_size. */
 /* sizeof(uncompressed attr) = 64*/
 /* 64*/			sle64 compressed_size;	/* Byte size of the attribute
-				value after compression. Only present when
-				compressed. Always is a multiple of the
-				cluster size. Represents the actual amount of
-				disk space being used on the disk. */
+				value after compression.  Only present when
+				compressed or sparse.  Always is a multiple of
+				the cluster size.  Represents the actual amount
+				of disk space being used on the disk. */
 /* sizeof(compressed attr) = 72*/
 		} __attribute__ ((__packed__)) non_resident;
 	} __attribute__ ((__packed__)) data;

From 7e693073a940c7484c0c21e3e1603e29ce46f30c Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Thu, 3 Mar 2005 16:38:59 +0000
Subject: [PATCH 023/113] NTFS: Add AT_EA in addition to AT_DATA to whitelist
 for being allowed to be       non-resident in
 fs/ntfs/attrib.c::ntfs_attr_can_be_non_resident().

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog | 2 ++
 fs/ntfs/attrib.c  | 7 ++++---
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index cd348f0b90e5..868871cb9c6e 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -88,6 +88,8 @@ ToDo/Notes:
 	  checked and set in the ntfs inode as done for compressed files and
 	  the compressed size needs to be used for vfs inode->i_blocks instead
 	  of the allocated size, again, as done for compressed files.
+	- Add AT_EA in addition to AT_DATA to whitelist for being allowed to
+	  be non-resident in fs/ntfs/attrib.c::ntfs_attr_can_be_non_resident().
 
 2.1.22 - Many bug and race fixes and error handling improvements.
 
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index 7a16f7ca76d8..fa464fce2261 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -1049,10 +1049,11 @@ int ntfs_attr_can_be_non_resident(const ntfs_volume *vol, const ATTR_TYPE type)
 	ATTR_DEF *ad;
 
 	/*
-	 * $DATA is always allowed to be non-resident even if $AttrDef does not
-	 * specify this in the flags of the $DATA attribute definition record.
+	 * $DATA and $EA are always allowed to be non-resident even if $AttrDef
+	 * does not specify this in the flags of the $DATA attribute definition
+	 * record.
 	 */
-	if (type == AT_DATA)
+	if (type == AT_DATA || type == AT_EA)
 		return 0;
 	/* Find the attribute definition record in $AttrDef. */
 	ad = ntfs_attr_find_in_attrdef(vol, type);

From 271849a98849394ea85fa7caa8a1aaa2b3a849b7 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Mon, 7 Mar 2005 21:36:18 +0000
Subject: [PATCH 024/113] NTFS: Add
 fs/ntfs/attrib.[hc]::ntfs_attr_vcn_to_lcn_nolock() used by the new      
 write code.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog |  6 ++--
 fs/ntfs/attrib.c  | 87 +++++++++++++++++++++++++++++++++++++++++++++++
 fs/ntfs/attrib.h  |  5 ++-
 3 files changed, 95 insertions(+), 3 deletions(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 868871cb9c6e..4af6ae6ff12b 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -88,8 +88,10 @@ ToDo/Notes:
 	  checked and set in the ntfs inode as done for compressed files and
 	  the compressed size needs to be used for vfs inode->i_blocks instead
 	  of the allocated size, again, as done for compressed files.
-	- Add AT_EA in addition to AT_DATA to whitelist for being allowed to
-	  be non-resident in fs/ntfs/attrib.c::ntfs_attr_can_be_non_resident().
+	- Add AT_EA in addition to AT_DATA to whitelist for being allowed to be
+	  non-resident in fs/ntfs/attrib.c::ntfs_attr_can_be_non_resident().
+	- Add fs/ntfs/attrib.c::ntfs_attr_vcn_to_lcn_nolock() used by the new
+	  write code.
 
 2.1.22 - Many bug and race fixes and error handling improvements.
 
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index fa464fce2261..1610f1cd2862 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -105,6 +105,93 @@ int ntfs_map_runlist(ntfs_inode *ni, VCN vcn)
 	return err;
 }
 
+/**
+ * ntfs_attr_vcn_to_lcn_nolock - convert a vcn into a lcn given an ntfs inode
+ * @ni:			ntfs inode of the attribute whose runlist to search
+ * @vcn:		vcn to convert
+ * @write_locked:	true if the runlist is locked for writing
+ *
+ * Find the virtual cluster number @vcn in the runlist of the ntfs attribute
+ * described by the ntfs inode @ni and return the corresponding logical cluster
+ * number (lcn).
+ *
+ * If the @vcn is not mapped yet, the attempt is made to map the attribute
+ * extent containing the @vcn and the vcn to lcn conversion is retried.
+ *
+ * If @write_locked is true the caller has locked the runlist for writing and
+ * if false for reading.
+ *
+ * Since lcns must be >= 0, we use negative return codes with special meaning:
+ *
+ * Return code	Meaning / Description
+ * ==========================================
+ *  LCN_HOLE	Hole / not allocated on disk.
+ *  LCN_ENOENT	There is no such vcn in the runlist, i.e. @vcn is out of bounds.
+ *  LCN_ENOMEM	Not enough memory to map runlist.
+ *  LCN_EIO	Critical error (runlist/file is corrupt, i/o error, etc).
+ *
+ * Locking: - The runlist must be locked on entry and is left locked on return.
+ *	    - If @write_locked is FALSE, i.e. the runlist is locked for reading,
+ *	      the lock may be dropped inside the function so you cannot rely on
+ *	      the runlist still being the same when this function returns.
+ */
+LCN ntfs_attr_vcn_to_lcn_nolock(ntfs_inode *ni, const VCN vcn,
+		const BOOL write_locked)
+{
+	LCN lcn;
+	BOOL is_retry = FALSE;
+
+	ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, %s_locked.",
+			ni->mft_no, (unsigned long long)vcn,
+			write_locked ? "write" : "read");
+	BUG_ON(!ni);
+	BUG_ON(!NInoNonResident(ni));
+	BUG_ON(vcn < 0);
+retry_remap:
+	/* Convert vcn to lcn.  If that fails map the runlist and retry once. */
+	lcn = ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn);
+	if (likely(lcn >= LCN_HOLE)) {
+		ntfs_debug("Done, lcn 0x%llx.", (long long)lcn);
+		return lcn;
+	}
+	if (lcn != LCN_RL_NOT_MAPPED) {
+		if (lcn != LCN_ENOENT)
+			lcn = LCN_EIO;
+	} else if (!is_retry) {
+		int err;
+
+		if (!write_locked) {
+			up_read(&ni->runlist.lock);
+			down_write(&ni->runlist.lock);
+			if (unlikely(ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn) !=
+					LCN_RL_NOT_MAPPED)) {
+				up_write(&ni->runlist.lock);
+				down_read(&ni->runlist.lock);
+				goto retry_remap;
+			}
+		}
+		err = ntfs_map_runlist_nolock(ni, vcn);
+		if (!write_locked) {
+			up_write(&ni->runlist.lock);
+			down_read(&ni->runlist.lock);
+		}
+		if (likely(!err)) {
+			is_retry = TRUE;
+			goto retry_remap;
+		}
+		if (err == -ENOENT)
+			lcn = LCN_ENOENT;
+		else if (err == -ENOMEM)
+			lcn = LCN_ENOMEM;
+		else
+			lcn = LCN_EIO;
+	}
+	if (lcn != LCN_ENOENT)
+		ntfs_error(ni->vol->sb, "Failed with error code %lli.",
+				(long long)lcn);
+	return lcn;
+}
+
 /**
  * ntfs_find_vcn_nolock - find a vcn in the runlist described by an ntfs inode
  * @ni:			ntfs inode describing the runlist to search
diff --git a/fs/ntfs/attrib.h b/fs/ntfs/attrib.h
index 3eb451657025..75041c89c738 100644
--- a/fs/ntfs/attrib.h
+++ b/fs/ntfs/attrib.h
@@ -2,7 +2,7 @@
  * attrib.h - Defines for attribute handling in NTFS Linux kernel driver.
  *	      Part of the Linux-NTFS project.
  *
- * Copyright (c) 2001-2004 Anton Altaparmakov
+ * Copyright (c) 2001-2005 Anton Altaparmakov
  * Copyright (c) 2002 Richard Russon
  *
  * This program/include file is free software; you can redistribute it and/or
@@ -63,6 +63,9 @@ typedef struct {
 extern int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn);
 extern int ntfs_map_runlist(ntfs_inode *ni, VCN vcn);
 
+extern LCN ntfs_attr_vcn_to_lcn_nolock(ntfs_inode *ni, const VCN vcn,
+		const BOOL write_locked);
+
 extern runlist_element *ntfs_find_vcn_nolock(ntfs_inode *ni, const VCN vcn,
 		const BOOL write_locked);
 

From c0c1cc0e46b36347f11b566f99087dc5e6fc1b89 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Mon, 7 Mar 2005 21:43:38 +0000
Subject: [PATCH 025/113] NTFS: - Fix bug in
 fs/ntfs/attrib.c::ntfs_find_vcn_nolock() where after         dropping the
 read lock and taking the write lock we were not checking         whether
 someone else did not already do the work we wanted to do.       - Rename
 ntfs_find_vcn_nolock() to ntfs_attr_find_vcn_nolock().       - Tidy up some
 comments in fs/ntfs/runlist.c.       - Add LCN_ENOMEM and LCN_EIO definitions
 to fs/ntfs/runlist.h.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog  |  5 +++++
 fs/ntfs/attrib.c   | 29 +++++++++++++++++------------
 fs/ntfs/attrib.h   |  4 ++--
 fs/ntfs/lcnalloc.c |  4 ++--
 fs/ntfs/mft.c      |  6 +++---
 fs/ntfs/runlist.c  | 15 ++++++++-------
 fs/ntfs/runlist.h  |  2 ++
 7 files changed, 39 insertions(+), 26 deletions(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 4af6ae6ff12b..7507a56a4921 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -92,6 +92,11 @@ ToDo/Notes:
 	  non-resident in fs/ntfs/attrib.c::ntfs_attr_can_be_non_resident().
 	- Add fs/ntfs/attrib.c::ntfs_attr_vcn_to_lcn_nolock() used by the new
 	  write code.
+	- Fix bug in fs/ntfs/attrib.c::ntfs_find_vcn_nolock() where after
+	  dropping the read lock and taking the write lock we were not checking
+	  whether someone else did not already do the work we wanted to do.
+	- Rename fs/ntfs/attrib.c::ntfs_find_vcn_nolock() to
+	  ntfs_attr_find_vcn_nolock() and update all callers.
 
 2.1.22 - Many bug and race fixes and error handling improvements.
 
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index 1610f1cd2862..6de5e04e97a2 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -193,19 +193,19 @@ retry_remap:
 }
 
 /**
- * ntfs_find_vcn_nolock - find a vcn in the runlist described by an ntfs inode
+ * ntfs_attr_find_vcn_nolock - find a vcn in the runlist of an ntfs inode
  * @ni:			ntfs inode describing the runlist to search
  * @vcn:		vcn to find
  * @write_locked:	true if the runlist is locked for writing
  *
  * Find the virtual cluster number @vcn in the runlist described by the ntfs
  * inode @ni and return the address of the runlist element containing the @vcn.
- * The runlist is left locked and the caller has to unlock it.  In the error
- * case, the runlist is left in the same locking state as on entry.
  *
- * Note if @write_locked is FALSE the lock may be dropped inside the function
- * so you cannot rely on the runlist still being the same when this function
- * returns.
+ * If the @vcn is not mapped yet, the attempt is made to map the attribute
+ * extent containing the @vcn and the vcn to lcn conversion is retried.
+ *
+ * If @write_locked is true the caller has locked the runlist for writing and
+ * if false for reading.
  *
  * Note you need to distinguish between the lcn of the returned runlist element
  * being >= 0 and LCN_HOLE.  In the later case you have to return zeroes on
@@ -221,13 +221,12 @@ retry_remap:
  *	-ENOMEM - Not enough memory to map runlist.
  *	-EIO	- Critical error (runlist/file is corrupt, i/o error, etc).
  *
- * Locking: - The runlist must be unlocked on entry.
- *	    - On failing return, the runlist is unlocked.
- *	    - On successful return, the runlist is locked.  If @need_write us
- *	      true, it is locked for writing.  Otherwise is is locked for
- *	      reading.
+ * Locking: - The runlist must be locked on entry and is left locked on return.
+ *	    - If @write_locked is FALSE, i.e. the runlist is locked for reading,
+ *	      the lock may be dropped inside the function so you cannot rely on
+ *	      the runlist still being the same when this function returns.
  */
-runlist_element *ntfs_find_vcn_nolock(ntfs_inode *ni, const VCN vcn,
+runlist_element *ntfs_attr_find_vcn_nolock(ntfs_inode *ni, const VCN vcn,
 		const BOOL write_locked)
 {
 	runlist_element *rl;
@@ -268,6 +267,12 @@ retry_remap:
 		if (!write_locked) {
 			up_read(&ni->runlist.lock);
 			down_write(&ni->runlist.lock);
+			if (unlikely(ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn) !=
+					LCN_RL_NOT_MAPPED)) {
+				up_write(&ni->runlist.lock);
+				down_read(&ni->runlist.lock);
+				goto retry_remap;
+			}
 		}
 		err = ntfs_map_runlist_nolock(ni, vcn);
 		if (!write_locked) {
diff --git a/fs/ntfs/attrib.h b/fs/ntfs/attrib.h
index 75041c89c738..e0a50f1ca76f 100644
--- a/fs/ntfs/attrib.h
+++ b/fs/ntfs/attrib.h
@@ -66,8 +66,8 @@ extern int ntfs_map_runlist(ntfs_inode *ni, VCN vcn);
 extern LCN ntfs_attr_vcn_to_lcn_nolock(ntfs_inode *ni, const VCN vcn,
 		const BOOL write_locked);
 
-extern runlist_element *ntfs_find_vcn_nolock(ntfs_inode *ni, const VCN vcn,
-		const BOOL write_locked);
+extern runlist_element *ntfs_attr_find_vcn_nolock(ntfs_inode *ni,
+		const VCN vcn, const BOOL write_locked);
 
 int ntfs_attr_lookup(const ATTR_TYPE type, const ntfschar *name,
 		const u32 name_len, const IGNORE_CASE_BOOL ic,
diff --git a/fs/ntfs/lcnalloc.c b/fs/ntfs/lcnalloc.c
index f2b7f8582c59..6e584ab743c5 100644
--- a/fs/ntfs/lcnalloc.c
+++ b/fs/ntfs/lcnalloc.c
@@ -850,7 +850,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
 
 	/* This returns with ni->runlist locked for reading on success. */
 	down_read(&ni->runlist.lock);
-	rl = ntfs_find_vcn_nolock(ni, start_vcn, FALSE);
+	rl = ntfs_attr_find_vcn_nolock(ni, start_vcn, FALSE);
 	if (IS_ERR(rl)) {
 		if (!is_rollback)
 			ntfs_error(vol->sb, "Failed to find first runlist "
@@ -904,7 +904,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
 
 			/* Attempt to map runlist. */
 			vcn = rl->vcn;
-			rl = ntfs_find_vcn_nolock(ni, vcn, FALSE);
+			rl = ntfs_attr_find_vcn_nolock(ni, vcn, FALSE);
 			if (IS_ERR(rl)) {
 				err = PTR_ERR(rl);
 				if (!is_rollback)
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 66ef6e275a48..61ce09f1b652 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -1297,7 +1297,7 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
 	read_lock_irqsave(&mftbmp_ni->size_lock, flags);
 	ll = mftbmp_ni->allocated_size;
 	read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
-	rl = ntfs_find_vcn_nolock(mftbmp_ni,
+	rl = ntfs_attr_find_vcn_nolock(mftbmp_ni,
 			(ll - 1) >> vol->cluster_size_bits, TRUE);
 	if (unlikely(IS_ERR(rl) || !rl->length || rl->lcn < 0)) {
 		up_write(&mftbmp_ni->runlist.lock);
@@ -1727,8 +1727,8 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
 	read_lock_irqsave(&mft_ni->size_lock, flags);
 	ll = mft_ni->allocated_size;
 	read_unlock_irqrestore(&mft_ni->size_lock, flags);
-	rl = ntfs_find_vcn_nolock(mft_ni, (ll - 1) >> vol->cluster_size_bits,
-			TRUE);
+	rl = ntfs_attr_find_vcn_nolock(mft_ni,
+			(ll - 1) >> vol->cluster_size_bits, TRUE);
 	if (unlikely(IS_ERR(rl) || !rl->length || rl->lcn < 0)) {
 		up_write(&mft_ni->runlist.lock);
 		ntfs_error(vol->sb, "Failed to determine last allocated "
diff --git a/fs/ntfs/runlist.c b/fs/ntfs/runlist.c
index 1b344dd4d407..3f479f176610 100644
--- a/fs/ntfs/runlist.c
+++ b/fs/ntfs/runlist.c
@@ -933,17 +933,18 @@ err_out:
  *
  * It is up to the caller to serialize access to the runlist @rl.
  *
- * Since lcns must be >= 0, we use negative return values with special meaning:
+ * Since lcns must be >= 0, we use negative return codes with special meaning:
  *
- * Return value			Meaning / Description
+ * Return code		Meaning / Description
  * ==================================================
- *  -1 = LCN_HOLE		Hole / not allocated on disk.
- *  -2 = LCN_RL_NOT_MAPPED	This is part of the runlist which has not been
- *				inserted into the runlist yet.
- *  -3 = LCN_ENOENT		There is no such vcn in the attribute.
+ *  LCN_HOLE		Hole / not allocated on disk.
+ *  LCN_RL_NOT_MAPPED	This is part of the runlist which has not been
+ *			inserted into the runlist yet.
+ *  LCN_ENOENT		There is no such vcn in the attribute.
  *
  * Locking: - The caller must have locked the runlist (for reading or writing).
- *	    - This function does not touch the lock.
+ *	    - This function does not touch the lock, nor does it modify the
+ *	      runlist.
  */
 LCN ntfs_rl_vcn_to_lcn(const runlist_element *rl, const VCN vcn)
 {
diff --git a/fs/ntfs/runlist.h b/fs/ntfs/runlist.h
index 7107fde59df9..60c42f3a3fc4 100644
--- a/fs/ntfs/runlist.h
+++ b/fs/ntfs/runlist.h
@@ -66,6 +66,8 @@ typedef enum {
 	LCN_HOLE		= -1,	/* Keep this as highest value or die! */
 	LCN_RL_NOT_MAPPED	= -2,
 	LCN_ENOENT		= -3,
+	LCN_ENOMEM		= -4,
+	LCN_EIO			= -5,
 } LCN_SPECIAL_VALUES;
 
 extern runlist_element *ntfs_runlists_merge(runlist_element *drl,

From 2bfb4fff3e9731ecfe745881e53cfb2e646c47bb Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Wed, 9 Mar 2005 15:15:06 +0000
Subject: [PATCH 026/113] NTFS: Add
 fs/ntfs/attrib.[hc]::ntfs_attr_make_non_resident().

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog |   1 +
 fs/ntfs/attrib.c  | 300 ++++++++++++++++++++++++++++++++++++++++++++++
 fs/ntfs/attrib.h  |   2 +
 3 files changed, 303 insertions(+)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 7507a56a4921..71680a92b4d6 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -97,6 +97,7 @@ ToDo/Notes:
 	  whether someone else did not already do the work we wanted to do.
 	- Rename fs/ntfs/attrib.c::ntfs_find_vcn_nolock() to
 	  ntfs_attr_find_vcn_nolock() and update all callers.
+	- Add fs/ntfs/attrib.[hc]::ntfs_attr_make_non_resident().
 
 2.1.22 - Many bug and race fixes and error handling improvements.
 
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index 6de5e04e97a2..3b9de4040216 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -25,6 +25,8 @@
 #include "attrib.h"
 #include "debug.h"
 #include "layout.h"
+#include "lcnalloc.h"
+#include "malloc.h"
 #include "mft.h"
 #include "ntfs.h"
 #include "types.h"
@@ -1226,6 +1228,304 @@ int ntfs_attr_record_resize(MFT_RECORD *m, ATTR_RECORD *a, u32 new_size)
 	return 0;
 }
 
+/**
+ * ntfs_attr_make_non_resident - convert a resident to a non-resident attribute
+ * @ni:		ntfs inode describing the attribute to convert
+ *
+ * Convert the resident ntfs attribute described by the ntfs inode @ni to a
+ * non-resident one.
+ *
+ * Return 0 on success and -errno on error.  The following error return codes
+ * are defined:
+ *	-EPERM	- The attribute is not allowed to be non-resident.
+ *	-ENOMEM	- Not enough memory.
+ *	-ENOSPC	- Not enough disk space.
+ *	-EINVAL	- Attribute not defined on the volume.
+ *	-EIO	- I/o error or other error.
+ *
+ * NOTE to self: No changes in the attribute list are required to move from
+ *		 a resident to a non-resident attribute.
+ *
+ * Locking: - The caller must hold i_sem on the inode.
+ */
+int ntfs_attr_make_non_resident(ntfs_inode *ni)
+{
+	s64 new_size;
+	struct inode *vi = VFS_I(ni);
+	ntfs_volume *vol = ni->vol;
+	ntfs_inode *base_ni;
+	MFT_RECORD *m;
+	ATTR_RECORD *a;
+	ntfs_attr_search_ctx *ctx;
+	struct page *page;
+	runlist_element *rl;
+	u8 *kaddr;
+	unsigned long flags;
+	int mp_size, mp_ofs, name_ofs, arec_size, err, err2;
+	u32 attr_size;
+	u8 old_res_attr_flags;
+
+	/* Check that the attribute is allowed to be non-resident. */
+	err = ntfs_attr_can_be_non_resident(vol, ni->type);
+	if (unlikely(err)) {
+		if (err == -EPERM)
+			ntfs_debug("Attribute is not allowed to be "
+					"non-resident.");
+		else
+			ntfs_debug("Attribute not defined on the NTFS "
+					"volume!");
+		return err;
+	}
+	/*
+	 * The size needs to be aligned to a cluster boundary for allocation
+	 * purposes.
+	 */
+	new_size = (i_size_read(vi) + vol->cluster_size - 1) &
+			~(vol->cluster_size - 1);
+	if (new_size > 0) {
+		/*
+		 * Will need the page later and since the page lock nests
+		 * outside all ntfs locks, we need to get the page now.
+		 */
+		page = find_or_create_page(vi->i_mapping, 0,
+				mapping_gfp_mask(vi->i_mapping));
+		if (unlikely(!page))
+			return -ENOMEM;
+		/* Start by allocating clusters to hold the attribute value. */
+		rl = ntfs_cluster_alloc(vol, 0, new_size >>
+				vol->cluster_size_bits, -1, DATA_ZONE);
+		if (IS_ERR(rl)) {
+			err = PTR_ERR(rl);
+			ntfs_debug("Failed to allocate cluster%s, error code "
+					"%i.\n", (new_size >>
+					vol->cluster_size_bits) > 1 ? "s" : "",
+					err);
+			goto page_err_out;
+		}
+	} else {
+		rl = NULL;
+		page = NULL;
+	}
+	/* Determine the size of the mapping pairs array. */
+	mp_size = ntfs_get_size_for_mapping_pairs(vol, rl, 0);
+	if (unlikely(mp_size < 0)) {
+		err = mp_size;
+		ntfs_debug("Failed to get size for mapping pairs array, error "
+				"code %i.", err);
+		goto rl_err_out;
+	}
+	down_write(&ni->runlist.lock);
+	if (!NInoAttr(ni))
+		base_ni = ni;
+	else
+		base_ni = ni->ext.base_ntfs_ino;
+	m = map_mft_record(base_ni);
+	if (IS_ERR(m)) {
+		err = PTR_ERR(m);
+		m = NULL;
+		ctx = NULL;
+		goto err_out;
+	}
+	ctx = ntfs_attr_get_search_ctx(base_ni, m);
+	if (unlikely(!ctx)) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
+			CASE_SENSITIVE, 0, NULL, 0, ctx);
+	if (unlikely(err)) {
+		if (err == -ENOENT)
+			err = -EIO;
+		goto err_out;
+	}
+	m = ctx->mrec;
+	a = ctx->attr;
+	BUG_ON(NInoNonResident(ni));
+	BUG_ON(a->non_resident);
+	/*
+	 * Calculate new offsets for the name and the mapping pairs array.
+	 * We assume the attribute is not compressed or sparse.
+	 */
+	name_ofs = (offsetof(ATTR_REC,
+			data.non_resident.compressed_size) + 7) & ~7;
+	mp_ofs = (name_ofs + a->name_length * sizeof(ntfschar) + 7) & ~7;
+	/*
+	 * Determine the size of the resident part of the now non-resident
+	 * attribute record.
+	 */
+	arec_size = (mp_ofs + mp_size + 7) & ~7;
+	/*
+	 * If the page is not uptodate bring it uptodate by copying from the
+	 * attribute value.
+	 */
+	attr_size = le32_to_cpu(a->data.resident.value_length);
+	BUG_ON(attr_size != i_size_read(vi));
+	if (page && !PageUptodate(page)) {
+		kaddr = kmap_atomic(page, KM_USER0);
+		memcpy(kaddr, (u8*)a +
+				le16_to_cpu(a->data.resident.value_offset),
+				attr_size);
+		memset(kaddr + attr_size, 0, PAGE_CACHE_SIZE - attr_size);
+		kunmap_atomic(kaddr, KM_USER0);
+		flush_dcache_page(page);
+		SetPageUptodate(page);
+	}
+	/* Backup the attribute flag. */
+	old_res_attr_flags = a->data.resident.flags;
+	/* Resize the resident part of the attribute record. */
+	err = ntfs_attr_record_resize(m, a, arec_size);
+	if (unlikely(err))
+		goto err_out;
+	/* Setup the in-memory attribute structure to be non-resident. */
+	NInoSetNonResident(ni);
+	ni->runlist.rl = rl;
+	write_lock_irqsave(&ni->size_lock, flags);
+	ni->allocated_size = new_size;
+	write_unlock_irqrestore(&ni->size_lock, flags);
+	/*
+	 * FIXME: For now just clear all of these as we do not support them
+	 * when writing.
+	 */
+	NInoClearCompressed(ni);
+	NInoClearSparse(ni);
+	NInoClearEncrypted(ni);
+	/*
+	 * Convert the resident part of the attribute record to describe a
+	 * non-resident attribute.
+	 */
+	a->non_resident = 1;
+	/* Move the attribute name if it exists and update the offset. */
+	if (a->name_length)
+		memmove((u8*)a + name_ofs, (u8*)a + le16_to_cpu(a->name_offset),
+				a->name_length * sizeof(ntfschar));
+	a->name_offset = cpu_to_le16(name_ofs);
+	/* Update the flags to match the in-memory ones. */
+	a->flags &= cpu_to_le16(0xffff & ~le16_to_cpu(ATTR_IS_SPARSE |
+			ATTR_IS_ENCRYPTED | ATTR_COMPRESSION_MASK));
+	/* Setup the fields specific to non-resident attributes. */
+	a->data.non_resident.lowest_vcn = 0;
+	a->data.non_resident.highest_vcn = cpu_to_sle64((new_size - 1) >>
+			vol->cluster_size_bits);
+	a->data.non_resident.mapping_pairs_offset = cpu_to_le16(mp_ofs);
+	a->data.non_resident.compression_unit = 0;
+	memset(&a->data.non_resident.reserved, 0,
+			sizeof(a->data.non_resident.reserved));
+	a->data.non_resident.allocated_size = cpu_to_sle64(new_size);
+	a->data.non_resident.data_size =
+			a->data.non_resident.initialized_size =
+			cpu_to_sle64(attr_size);
+	/* Generate the mapping pairs array into the attribute record. */
+	err = ntfs_mapping_pairs_build(vol, (u8*)a + mp_ofs,
+			arec_size - mp_ofs, rl, 0, NULL);
+	if (unlikely(err)) {
+		ntfs_debug("Failed to build mapping pairs, error code %i.",
+				err);
+		goto undo_err_out;
+	}
+	/* Mark the mft record dirty, so it gets written back. */
+	flush_dcache_mft_record_page(ctx->ntfs_ino);
+	mark_mft_record_dirty(ctx->ntfs_ino);
+	ntfs_attr_put_search_ctx(ctx);
+	unmap_mft_record(base_ni);
+	up_write(&ni->runlist.lock);
+	if (page) {
+		set_page_dirty(page);
+		unlock_page(page);
+		page_cache_release(page);
+	}
+	ntfs_debug("Done.");
+	return 0;
+undo_err_out:
+	/* Convert the attribute back into a resident attribute. */
+	a->non_resident = 0;
+	/* Move the attribute name if it exists and update the offset. */
+	name_ofs = (offsetof(ATTR_RECORD, data.resident.reserved) +
+			sizeof(a->data.resident.reserved) + 7) & ~7;
+	if (a->name_length)
+		memmove((u8*)a + name_ofs, (u8*)a + le16_to_cpu(a->name_offset),
+				a->name_length * sizeof(ntfschar));
+	mp_ofs = (name_ofs + a->name_length * sizeof(ntfschar) + 7) & ~7;
+	a->name_offset = cpu_to_le16(name_ofs);
+	arec_size = (mp_ofs + attr_size + 7) & ~7;
+	/* Resize the resident part of the attribute record. */
+	err2 = ntfs_attr_record_resize(m, a, arec_size);
+	if (unlikely(err2)) {
+		/*
+		 * This cannot happen (well if memory corruption is at work it
+		 * could happen in theory), but deal with it as well as we can.
+		 * If the old size is too small, truncate the attribute,
+		 * otherwise simply give it a larger allocated size.
+		 * FIXME: Should check whether chkdsk complains when the
+		 * allocated size is much bigger than the resident value size.
+		 */
+		arec_size = le32_to_cpu(a->length);
+		if ((mp_ofs + attr_size) > arec_size) {
+			err2 = attr_size;
+			attr_size = arec_size - mp_ofs;
+			ntfs_error(vol->sb, "Failed to undo partial resident "
+					"to non-resident attribute "
+					"conversion.  Truncating inode 0x%lx, "
+					"attribute type 0x%x from %i bytes to "
+					"%i bytes to maintain metadata "
+					"consistency.  THIS MEANS YOU ARE "
+					"LOSING %i BYTES DATA FROM THIS %s.",
+					vi->i_ino,
+					(unsigned)le32_to_cpu(ni->type),
+					err2, attr_size, err2 - attr_size,
+					((ni->type == AT_DATA) &&
+					!ni->name_len) ? "FILE": "ATTRIBUTE");
+			write_lock_irqsave(&ni->size_lock, flags);
+			ni->initialized_size = attr_size;
+			i_size_write(vi, attr_size);
+			write_unlock_irqrestore(&ni->size_lock, flags);
+		}
+	}
+	/* Setup the fields specific to resident attributes. */
+	a->data.resident.value_length = cpu_to_le32(attr_size);
+	a->data.resident.value_offset = cpu_to_le16(mp_ofs);
+	a->data.resident.flags = old_res_attr_flags;
+	memset(&a->data.resident.reserved, 0,
+			sizeof(a->data.resident.reserved));
+	/* Copy the data from the page back to the attribute value. */
+	if (page) {
+		kaddr = kmap_atomic(page, KM_USER0);
+		memcpy((u8*)a + mp_ofs, kaddr, attr_size);
+		kunmap_atomic(kaddr, KM_USER0);
+	}
+	/* Finally setup the ntfs inode appropriately. */
+	write_lock_irqsave(&ni->size_lock, flags);
+	ni->allocated_size = arec_size - mp_ofs;
+	write_unlock_irqrestore(&ni->size_lock, flags);
+	NInoClearNonResident(ni);
+	/* Mark the mft record dirty, so it gets written back. */
+	flush_dcache_mft_record_page(ctx->ntfs_ino);
+	mark_mft_record_dirty(ctx->ntfs_ino);
+err_out:
+	if (ctx)
+		ntfs_attr_put_search_ctx(ctx);
+	if (m)
+		unmap_mft_record(base_ni);
+	ni->runlist.rl = NULL;
+	up_write(&ni->runlist.lock);
+rl_err_out:
+	if (rl) {
+		if (ntfs_cluster_free_from_rl(vol, rl) < 0) {
+			ntfs_free(rl);
+			ntfs_error(vol->sb, "Failed to release allocated "
+					"cluster(s) in error code path.  Run "
+					"chkdsk to recover the lost "
+					"cluster(s).");
+			NVolSetErrors(vol);
+		}
+page_err_out:
+		unlock_page(page);
+		page_cache_release(page);
+	}
+	if (err == -EINVAL)
+		err = -EIO;
+	return err;
+}
+
 /**
  * ntfs_attr_set - fill (a part of) an attribute with a byte
  * @ni:		ntfs inode describing the attribute to fill
diff --git a/fs/ntfs/attrib.h b/fs/ntfs/attrib.h
index e0a50f1ca76f..d73385198336 100644
--- a/fs/ntfs/attrib.h
+++ b/fs/ntfs/attrib.h
@@ -98,6 +98,8 @@ extern int ntfs_attr_can_be_resident(const ntfs_volume *vol,
 
 extern int ntfs_attr_record_resize(MFT_RECORD *m, ATTR_RECORD *a, u32 new_size);
 
+extern int ntfs_attr_make_non_resident(ntfs_inode *ni);
+
 extern int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt,
 		const u8 val);
 

From 43b01fda8b17b2b63e7dcdeed11c2ebba56b1fc9 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Wed, 9 Mar 2005 15:18:43 +0000
Subject: [PATCH 027/113] NTFS: Fix sign of various error return values to be
 negative in       fs/ntfs/lcnalloc.c.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog  |  2 ++
 fs/ntfs/lcnalloc.c | 18 +++++++++---------
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 71680a92b4d6..e0b4adf5adce 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -98,6 +98,8 @@ ToDo/Notes:
 	- Rename fs/ntfs/attrib.c::ntfs_find_vcn_nolock() to
 	  ntfs_attr_find_vcn_nolock() and update all callers.
 	- Add fs/ntfs/attrib.[hc]::ntfs_attr_make_non_resident().
+	- Fix sign of various error return values to be negative in
+	  fs/ntfs/lcnalloc.c.
 
 2.1.22 - Many bug and race fixes and error handling improvements.
 
diff --git a/fs/ntfs/lcnalloc.c b/fs/ntfs/lcnalloc.c
index 6e584ab743c5..71bf08730b05 100644
--- a/fs/ntfs/lcnalloc.c
+++ b/fs/ntfs/lcnalloc.c
@@ -1,7 +1,7 @@
 /*
  * lcnalloc.c - Cluster (de)allocation code.  Part of the Linux-NTFS project.
  *
- * Copyright (c) 2004 Anton Altaparmakov
+ * Copyright (c) 2004-2005 Anton Altaparmakov
  *
  * This program/include file is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as published
@@ -60,7 +60,7 @@ int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol,
 		if (rl->lcn < 0)
 			continue;
 		err = ntfs_bitmap_clear_run(lcnbmp_vi, rl->lcn, rl->length);
-		if (unlikely(err && (!ret || ret == ENOMEM) && ret != err))
+		if (unlikely(err && (!ret || ret == -ENOMEM) && ret != err))
 			ret = err;
 	}
 	ntfs_debug("Done.");
@@ -693,7 +693,7 @@ switch_to_data1_zone:		search_zone = 2;
 		if (zone == MFT_ZONE || mft_zone_size <= 0) {
 			ntfs_debug("No free clusters left, going to out.");
 			/* Really no more space left on device. */
-			err = ENOSPC;
+			err = -ENOSPC;
 			goto out;
 		} /* zone == DATA_ZONE && mft_zone_size > 0 */
 		ntfs_debug("Shrinking mft zone.");
@@ -757,9 +757,9 @@ out:
 	if (rl) {
 		int err2;
 
-		if (err == ENOSPC)
+		if (err == -ENOSPC)
 			ntfs_debug("Not enough space to complete allocation, "
-					"err ENOSPC, first free lcn 0x%llx, "
+					"err -ENOSPC, first free lcn 0x%llx, "
 					"could allocate up to 0x%llx "
 					"clusters.",
 					(unsigned long long)rl[0].lcn,
@@ -775,10 +775,10 @@ out:
 		}
 		/* Free the runlist. */
 		ntfs_free(rl);
-	} else if (err == ENOSPC)
-		ntfs_debug("No space left at all, err = ENOSPC, "
-				"first free lcn = 0x%llx.",
-				(unsigned long long)vol->data1_zone_pos);
+	} else if (err == -ENOSPC)
+		ntfs_debug("No space left at all, err = -ENOSPC, first free "
+				"lcn = 0x%llx.",
+				(long long)vol->data1_zone_pos);
 	up_write(&vol->lcnbmp_lock);
 	return ERR_PTR(err);
 }

From 905685f68fc72844b8c2689c39a5c6c35e840152 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Thu, 10 Mar 2005 11:06:19 +0000
Subject: [PATCH 028/113] NTFS: - Modify ->readpage and ->writepage
 (fs/ntfs/aops.c) so they detect         and handle the case where an
 attribute is converted from resident         to non-resident by a concurrent
 file write.       - Reorder some operations when converting an attribute from
 resident         to non-resident (fs/ntfs/attrib.c) so it is safe wrt
 concurrent         ->readpage and ->writepage.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog |  3 +++
 fs/ntfs/aops.c    | 18 ++++++++++++++++++
 fs/ntfs/attrib.c  | 41 +++++++++++++++++++++++++----------------
 3 files changed, 46 insertions(+), 16 deletions(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index e0b4adf5adce..b29e0618f358 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -100,6 +100,9 @@ ToDo/Notes:
 	- Add fs/ntfs/attrib.[hc]::ntfs_attr_make_non_resident().
 	- Fix sign of various error return values to be negative in
 	  fs/ntfs/lcnalloc.c.
+	- Modify ->readpage and ->writepage (fs/ntfs/aops.c) so they detect and
+	  handle the case where an attribute is converted from resident to
+	  non-resident by a concurrent file write.
 
 2.1.22 - Many bug and race fixes and error handling improvements.
 
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 2a7cba258cca..6241c4cfbe28 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -355,6 +355,7 @@ static int ntfs_readpage(struct file *file, struct page *page)
 	u32 attr_len;
 	int err = 0;
 
+retry_readpage:
 	BUG_ON(!PageLocked(page));
 	/*
 	 * This can potentially happen because we clear PageUptodate() during
@@ -408,6 +409,14 @@ static int ntfs_readpage(struct file *file, struct page *page)
 		err = PTR_ERR(mrec);
 		goto err_out;
 	}
+	/*
+	 * If a parallel write made the attribute non-resident, drop the mft
+	 * record and retry the readpage.
+	 */
+	if (unlikely(NInoNonResident(ni))) {
+		unmap_mft_record(base_ni);
+		goto retry_readpage;
+	}
 	ctx = ntfs_attr_get_search_ctx(base_ni, mrec);
 	if (unlikely(!ctx)) {
 		err = -ENOMEM;
@@ -1248,6 +1257,7 @@ static int ntfs_writepage(struct page *page, struct writeback_control *wbc)
 	u32 attr_len;
 	int err;
 
+retry_writepage:
 	BUG_ON(!PageLocked(page));
 	i_size = i_size_read(vi);
 	/* Is the page fully outside i_size? (truncate in progress) */
@@ -1338,6 +1348,14 @@ static int ntfs_writepage(struct page *page, struct writeback_control *wbc)
 		ctx = NULL;
 		goto err_out;
 	}
+	/*
+	 * If a parallel write made the attribute non-resident, drop the mft
+	 * record and retry the writepage.
+	 */
+	if (unlikely(NInoNonResident(ni))) {
+		unmap_mft_record(base_ni);
+		goto retry_writepage;
+	}
 	ctx = ntfs_attr_get_search_ctx(base_ni, m);
 	if (unlikely(!ctx)) {
 		err = -ENOMEM;
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index 3b9de4040216..41859343a0c8 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -1376,19 +1376,6 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
 	err = ntfs_attr_record_resize(m, a, arec_size);
 	if (unlikely(err))
 		goto err_out;
-	/* Setup the in-memory attribute structure to be non-resident. */
-	NInoSetNonResident(ni);
-	ni->runlist.rl = rl;
-	write_lock_irqsave(&ni->size_lock, flags);
-	ni->allocated_size = new_size;
-	write_unlock_irqrestore(&ni->size_lock, flags);
-	/*
-	 * FIXME: For now just clear all of these as we do not support them
-	 * when writing.
-	 */
-	NInoClearCompressed(ni);
-	NInoClearSparse(ni);
-	NInoClearEncrypted(ni);
 	/*
 	 * Convert the resident part of the attribute record to describe a
 	 * non-resident attribute.
@@ -1399,7 +1386,10 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
 		memmove((u8*)a + name_ofs, (u8*)a + le16_to_cpu(a->name_offset),
 				a->name_length * sizeof(ntfschar));
 	a->name_offset = cpu_to_le16(name_ofs);
-	/* Update the flags to match the in-memory ones. */
+	/*
+	 * FIXME: For now just clear all of these as we do not support them
+	 * when writing.
+	 */
 	a->flags &= cpu_to_le16(0xffff & ~le16_to_cpu(ATTR_IS_SPARSE |
 			ATTR_IS_ENCRYPTED | ATTR_COMPRESSION_MASK));
 	/* Setup the fields specific to non-resident attributes. */
@@ -1422,6 +1412,25 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
 				err);
 		goto undo_err_out;
 	}
+	/* Setup the in-memory attribute structure to be non-resident. */
+	/*
+	 * FIXME: For now just clear all of these as we do not support them
+	 * when writing.
+	 */
+	NInoClearSparse(ni);
+	NInoClearEncrypted(ni);
+	NInoClearCompressed(ni);
+	ni->runlist.rl = rl;
+	write_lock_irqsave(&ni->size_lock, flags);
+	ni->allocated_size = new_size;
+	write_unlock_irqrestore(&ni->size_lock, flags);
+	/*
+	 * This needs to be last since the address space operations ->readpage
+	 * and ->writepage can run concurrently with us as they are not
+	 * serialized on i_sem.  Note, we are not allowed to fail once we flip
+	 * this switch, which is another reason to do this last.
+	 */
+	NInoSetNonResident(ni);
 	/* Mark the mft record dirty, so it gets written back. */
 	flush_dcache_mft_record_page(ctx->ntfs_ino);
 	mark_mft_record_dirty(ctx->ntfs_ino);
@@ -1431,6 +1440,7 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
 	if (page) {
 		set_page_dirty(page);
 		unlock_page(page);
+		mark_page_accessed(page);
 		page_cache_release(page);
 	}
 	ntfs_debug("Done.");
@@ -1492,11 +1502,10 @@ undo_err_out:
 		memcpy((u8*)a + mp_ofs, kaddr, attr_size);
 		kunmap_atomic(kaddr, KM_USER0);
 	}
-	/* Finally setup the ntfs inode appropriately. */
+	/* Setup the allocated size in the ntfs inode in case it changed. */
 	write_lock_irqsave(&ni->size_lock, flags);
 	ni->allocated_size = arec_size - mp_ofs;
 	write_unlock_irqrestore(&ni->size_lock, flags);
-	NInoClearNonResident(ni);
 	/* Mark the mft record dirty, so it gets written back. */
 	flush_dcache_mft_record_page(ctx->ntfs_ino);
 	mark_mft_record_dirty(ctx->ntfs_ino);

From 1ef334d372d6a7006e20f56f7e85d8f4ec32e3c2 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Mon, 4 Apr 2005 14:59:42 +0100
Subject: [PATCH 029/113] NTFS: Include linux/swap.h in fs/ntfs/attrib.c for
 mark_page_accessed().

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/attrib.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index 41859343a0c8..97e6fb047aa0 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -21,6 +21,7 @@
  */
 
 #include <linux/buffer_head.h>
+#include <linux/swap.h>
 
 #include "attrib.h"
 #include "debug.h"

From 53d59aad9326199ef5749c97513db498309a057e Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Thu, 17 Mar 2005 10:51:33 +0000
Subject: [PATCH 030/113] NTFS: Fix compilation when configured read-only.

- Add ifdef NTFS_RW around write specific code if fs/ntfs/runlist.[hc] and
  fs/ntfs/attrib.[hc].
- Minor bugfix to fs/ntfs/attrib.c::ntfs_attr_make_non_resident() where the
  runlist was not freed in all error cases.
- Add fs/ntfs/runlist.[hc]::ntfs_rl_find_vcn_nolock().

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/attrib.c   | 13 ++++++++++++-
 fs/ntfs/attrib.h   |  4 ++++
 fs/ntfs/lcnalloc.c |  1 -
 fs/ntfs/runlist.c  | 35 +++++++++++++++++++++++++++++++++++
 fs/ntfs/runlist.h  |  9 ++++++++-
 5 files changed, 59 insertions(+), 3 deletions(-)

diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index 97e6fb047aa0..1b95f39234a3 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -1057,6 +1057,8 @@ void ntfs_attr_put_search_ctx(ntfs_attr_search_ctx *ctx)
 	return;
 }
 
+#ifdef NTFS_RW
+
 /**
  * ntfs_attr_find_in_attrdef - find an attribute in the $AttrDef system file
  * @vol:	ntfs volume to which the attribute belongs
@@ -1243,6 +1245,13 @@ int ntfs_attr_record_resize(MFT_RECORD *m, ATTR_RECORD *a, u32 new_size)
  *	-ENOSPC	- Not enough disk space.
  *	-EINVAL	- Attribute not defined on the volume.
  *	-EIO	- I/o error or other error.
+ * Note that -ENOSPC is also returned in the case that there is not enough
+ * space in the mft record to do the conversion.  This can happen when the mft
+ * record is already very full.  The caller is responsible for trying to make
+ * space in the mft record and trying again.  FIXME: Do we need a separate
+ * error return code for this kind of -ENOSPC or is it always worth trying
+ * again in case the attribute may then fit in a resident state so no need to
+ * make it non-resident at all?  Ho-hum...  (AIA)
  *
  * NOTE to self: No changes in the attribute list are required to move from
  *		 a resident to a non-resident attribute.
@@ -1520,13 +1529,13 @@ err_out:
 rl_err_out:
 	if (rl) {
 		if (ntfs_cluster_free_from_rl(vol, rl) < 0) {
-			ntfs_free(rl);
 			ntfs_error(vol->sb, "Failed to release allocated "
 					"cluster(s) in error code path.  Run "
 					"chkdsk to recover the lost "
 					"cluster(s).");
 			NVolSetErrors(vol);
 		}
+		ntfs_free(rl);
 page_err_out:
 		unlock_page(page);
 		page_cache_release(page);
@@ -1680,3 +1689,5 @@ done:
 	ntfs_debug("Done.");
 	return 0;
 }
+
+#endif /* NTFS_RW */
diff --git a/fs/ntfs/attrib.h b/fs/ntfs/attrib.h
index d73385198336..0e4ac6d3c0e7 100644
--- a/fs/ntfs/attrib.h
+++ b/fs/ntfs/attrib.h
@@ -89,6 +89,8 @@ extern ntfs_attr_search_ctx *ntfs_attr_get_search_ctx(ntfs_inode *ni,
 		MFT_RECORD *mrec);
 extern void ntfs_attr_put_search_ctx(ntfs_attr_search_ctx *ctx);
 
+#ifdef NTFS_RW
+
 extern int ntfs_attr_size_bounds_check(const ntfs_volume *vol,
 		const ATTR_TYPE type, const s64 size);
 extern int ntfs_attr_can_be_non_resident(const ntfs_volume *vol,
@@ -103,4 +105,6 @@ extern int ntfs_attr_make_non_resident(ntfs_inode *ni);
 extern int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt,
 		const u8 val);
 
+#endif /* NTFS_RW */
+
 #endif /* _LINUX_NTFS_ATTRIB_H */
diff --git a/fs/ntfs/lcnalloc.c b/fs/ntfs/lcnalloc.c
index 71bf08730b05..7087b5b0e6ce 100644
--- a/fs/ntfs/lcnalloc.c
+++ b/fs/ntfs/lcnalloc.c
@@ -848,7 +848,6 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
 
 	total_freed = real_freed = 0;
 
-	/* This returns with ni->runlist locked for reading on success. */
 	down_read(&ni->runlist.lock);
 	rl = ntfs_attr_find_vcn_nolock(ni, start_vcn, FALSE);
 	if (IS_ERR(rl)) {
diff --git a/fs/ntfs/runlist.c b/fs/ntfs/runlist.c
index 3f479f176610..396d767c2cab 100644
--- a/fs/ntfs/runlist.c
+++ b/fs/ntfs/runlist.c
@@ -980,6 +980,39 @@ LCN ntfs_rl_vcn_to_lcn(const runlist_element *rl, const VCN vcn)
 	return LCN_ENOENT;
 }
 
+#ifdef NTFS_RW
+
+/**
+ * ntfs_rl_find_vcn_nolock - find a vcn in a runlist
+ * @rl:		runlist to search
+ * @vcn:	vcn to find
+ *
+ * Find the virtual cluster number @vcn in the runlist @rl and return the
+ * address of the runlist element containing the @vcn on success.
+ *
+ * Return NULL if @rl is NULL or @vcn is in an unmapped part/out of bounds of
+ * the runlist.
+ *
+ * Locking: The runlist must be locked on entry.
+ */
+runlist_element *ntfs_rl_find_vcn_nolock(runlist_element *rl, const VCN vcn)
+{
+	BUG_ON(vcn < 0);
+	if (unlikely(!rl || vcn < rl[0].vcn))
+		return NULL;
+	while (likely(rl->length)) {
+		if (unlikely(vcn < rl[1].vcn)) {
+			if (likely(rl->lcn >= LCN_HOLE))
+				return rl;
+			return NULL;
+		}
+		rl++;
+	}
+	if (likely(rl->lcn == LCN_ENOENT))
+		return rl;
+	return NULL;
+}
+
 /**
  * ntfs_get_nr_significant_bytes - get number of bytes needed to store a number
  * @n:		number for which to get the number of bytes for
@@ -1452,3 +1485,5 @@ int ntfs_rl_truncate_nolock(const ntfs_volume *vol, runlist *const runlist,
 	ntfs_debug("Done.");
 	return 0;
 }
+
+#endif /* NTFS_RW */
diff --git a/fs/ntfs/runlist.h b/fs/ntfs/runlist.h
index 60c42f3a3fc4..cf5c1b44bea8 100644
--- a/fs/ntfs/runlist.h
+++ b/fs/ntfs/runlist.h
@@ -2,7 +2,7 @@
  * runlist.h - Defines for runlist handling in NTFS Linux kernel driver.
  *	       Part of the Linux-NTFS project.
  *
- * Copyright (c) 2001-2004 Anton Altaparmakov
+ * Copyright (c) 2001-2005 Anton Altaparmakov
  * Copyright (c) 2002 Richard Russon
  *
  * This program/include file is free software; you can redistribute it and/or
@@ -78,6 +78,11 @@ extern runlist_element *ntfs_mapping_pairs_decompress(const ntfs_volume *vol,
 
 extern LCN ntfs_rl_vcn_to_lcn(const runlist_element *rl, const VCN vcn);
 
+#ifdef NTFS_RW
+
+extern runlist_element *ntfs_rl_find_vcn_nolock(runlist_element *rl,
+		const VCN vcn);
+
 extern int ntfs_get_size_for_mapping_pairs(const ntfs_volume *vol,
 		const runlist_element *rl, const VCN start_vcn);
 
@@ -88,4 +93,6 @@ extern int ntfs_mapping_pairs_build(const ntfs_volume *vol, s8 *dst,
 extern int ntfs_rl_truncate_nolock(const ntfs_volume *vol,
 		runlist *const runlist, const s64 new_length);
 
+#endif /* NTFS_RW */
+
 #endif /* _LINUX_NTFS_RUNLIST_H */

From 251c8427c9c418674fc3c04a11de95dc3661b560 Mon Sep 17 00:00:00 2001
From: Jesper Juhl <juhl-lkml@dif.dk>
Date: Mon, 4 Apr 2005 14:59:56 +0100
Subject: [PATCH 031/113] NTFS: Remove checks for NULL before calling kfree()
 since kfree() does the       checking itself.  (Jesper Juhl)

Signed-off-by: Jesper Juhl <juhl-lkml@dif.dk>
Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog |  2 ++
 fs/ntfs/dir.c     | 12 ++++--------
 fs/ntfs/namei.c   |  3 +--
 fs/ntfs/super.c   |  3 +--
 4 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index b29e0618f358..51428bfce952 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -103,6 +103,8 @@ ToDo/Notes:
 	- Modify ->readpage and ->writepage (fs/ntfs/aops.c) so they detect and
 	  handle the case where an attribute is converted from resident to
 	  non-resident by a concurrent file write.
+	- Remove checks for NULL before calling kfree() since kfree() does the
+	  checking itself.  (Jesper Juhl)
 
 2.1.22 - Many bug and race fixes and error handling improvements.
 
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index 95c44577160c..a56ca1821eed 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -183,8 +183,7 @@ found_it:
 				name->len = 0;
 				*res = name;
 			} else {
-				if (name)
-					kfree(name);
+				kfree(name);
 				*res = NULL;
 			}
 			mref = le64_to_cpu(ie->data.dir.indexed_file);
@@ -444,8 +443,7 @@ found_it2:
 				name->len = 0;
 				*res = name;
 			} else {
-				if (name)
-					kfree(name);
+				kfree(name);
 				*res = NULL;
 			}
 			mref = le64_to_cpu(ie->data.dir.indexed_file);
@@ -1462,10 +1460,8 @@ err_out:
 		unlock_page(ia_page);
 		ntfs_unmap_page(ia_page);
 	}
-	if (ir)
-		kfree(ir);
-	if (name)
-		kfree(name);
+	kfree(ir);
+	kfree(name);
 	if (ctx)
 		ntfs_attr_put_search_ctx(ctx);
 	if (m)
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c
index d261e25bd9ba..351dbc3b6e40 100644
--- a/fs/ntfs/namei.c
+++ b/fs/ntfs/namei.c
@@ -153,8 +153,7 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent,
 			ntfs_error(vol->sb, "ntfs_iget(0x%lx) failed with "
 					"error code %li.", dent_ino,
 					PTR_ERR(dent_inode));
-		if (name)
-			kfree(name);
+		kfree(name);
 		/* Return the error code. */
 		return (struct dentry *)dent_inode;
 	}
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 78416eb2a168..25fa1d1668da 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -1193,8 +1193,7 @@ static BOOL load_and_init_quota(ntfs_volume *vol)
 		return FALSE;
 	}
 	/* We do not care for the type of match that was found. */
-	if (name)
-		kfree(name);
+	kfree(name);
 	/* Get the inode. */
 	tmp_ino = ntfs_iget(vol->sb, MREF(mref));
 	if (IS_ERR(tmp_ino) || is_bad_inode(tmp_ino)) {

From b0d2374d62faed034dd80e6524efb98a6341597c Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Mon, 4 Apr 2005 16:20:14 +0100
Subject: [PATCH 032/113] NTFS: Some utilities modify the boot sector but do
 not update the checksum.       Thus, relax the checking in
 fs/ntfs/super.c::is_boot_sector_ntfs() to       only emit a warning when the
 checksum is incorrect rather than       refusing the mount.  Thanks to Bernd
 Casimir for pointing this       problem out.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog |  5 +++++
 fs/ntfs/super.c   | 12 ++++++++----
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 51428bfce952..8ff6ea778fbd 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -105,6 +105,11 @@ ToDo/Notes:
 	  non-resident by a concurrent file write.
 	- Remove checks for NULL before calling kfree() since kfree() does the
 	  checking itself.  (Jesper Juhl)
+	- Some utilities modify the boot sector but do not update the checksum.
+	  Thus, relax the checking in fs/ntfs/super.c::is_boot_sector_ntfs() to
+	  only emit a warning when the checksum is incorrect rather than
+	  refusing the mount.  Thanks to Bernd Casimir for pointing this
+	  problem out.
 
 2.1.22 - Many bug and race fixes and error handling improvements.
 
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 25fa1d1668da..6f752ea765c3 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -38,6 +38,7 @@
 #include "debug.h"
 #include "index.h"
 #include "aops.h"
+#include "layout.h"
 #include "malloc.h"
 #include "ntfs.h"
 
@@ -532,16 +533,19 @@ static BOOL is_boot_sector_ntfs(const struct super_block *sb,
 {
 	/*
 	 * Check that checksum == sum of u32 values from b to the checksum
-	 * field. If checksum is zero, no checking is done.
+	 * field.  If checksum is zero, no checking is done.  We will work when
+	 * the checksum test fails, since some utilities update the boot sector
+	 * ignoring the checksum which leaves the checksum out-of-date.  We
+	 * report a warning if this is the case.
 	 */
-	if ((void*)b < (void*)&b->checksum && b->checksum) {
+	if ((void*)b < (void*)&b->checksum && b->checksum && !silent) {
 		le32 *u;
 		u32 i;
 
 		for (i = 0, u = (le32*)b; u < (le32*)(&b->checksum); ++u)
 			i += le32_to_cpup(u);
 		if (le32_to_cpu(b->checksum) != i)
-			goto not_ntfs;
+			ntfs_warning(sb, "Invalid boot sector checksum.");
 	}
 	/* Check OEMidentifier is "NTFS    " */
 	if (b->oem_id != magicNTFS)
@@ -591,7 +595,7 @@ static BOOL is_boot_sector_ntfs(const struct super_block *sb,
 	 * many BIOSes will refuse to boot from a bootsector if the magic is
 	 * incorrect, so we emit a warning.
 	 */
-	if (!silent && b->end_of_sector_marker != cpu_to_le16(0xaa55))
+	if (!silent && b->end_of_sector_marker != const_cpu_to_le16(0xaa55))
 		ntfs_warning(sb, "Invalid end of sector marker.");
 	return TRUE;
 not_ntfs:

From bb3cf33509009132cf8c7a7729f9d26c0c5fa961 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Wed, 6 Apr 2005 13:34:31 +0100
Subject: [PATCH 033/113] NTFS: Update attribute definition handling.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog |  1 +
 fs/ntfs/attrib.c  | 21 +++++++--------------
 fs/ntfs/layout.h  | 42 ++++++++++++++++++++++++++++++------------
 fs/ntfs/ntfs.h    |  3 ++-
 4 files changed, 40 insertions(+), 27 deletions(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 8ff6ea778fbd..52f868a9662b 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -110,6 +110,7 @@ ToDo/Notes:
 	  only emit a warning when the checksum is incorrect rather than
 	  refusing the mount.  Thanks to Bernd Casimir for pointing this
 	  problem out.
+	- Update attribute definition handling.
 
 2.1.22 - Many bug and race fixes and error handling improvements.
 
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index 1b95f39234a3..23ca3bdfb89a 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -1138,28 +1138,21 @@ int ntfs_attr_size_bounds_check(const ntfs_volume *vol, const ATTR_TYPE type,
  * Check whether the attribute of @type on the ntfs volume @vol is allowed to
  * be non-resident.  This information is obtained from $AttrDef system file.
  *
- * Return 0 if the attribute is allowed to be non-resident, -EPERM if not, or
+ * Return 0 if the attribute is allowed to be non-resident, -EPERM if not, and
  * -ENOENT if the attribute is not listed in $AttrDef.
  */
 int ntfs_attr_can_be_non_resident(const ntfs_volume *vol, const ATTR_TYPE type)
 {
 	ATTR_DEF *ad;
 
-	/*
-	 * $DATA and $EA are always allowed to be non-resident even if $AttrDef
-	 * does not specify this in the flags of the $DATA attribute definition
-	 * record.
-	 */
-	if (type == AT_DATA || type == AT_EA)
-		return 0;
 	/* Find the attribute definition record in $AttrDef. */
 	ad = ntfs_attr_find_in_attrdef(vol, type);
 	if (unlikely(!ad))
 		return -ENOENT;
 	/* Check the flags and return the result. */
-	if (ad->flags & CAN_BE_NON_RESIDENT)
-		return 0;
-	return -EPERM;
+	if (ad->flags & ATTR_DEF_RESIDENT)
+		return -EPERM;
+	return 0;
 }
 
 /**
@@ -1182,9 +1175,9 @@ int ntfs_attr_can_be_non_resident(const ntfs_volume *vol, const ATTR_TYPE type)
  */
 int ntfs_attr_can_be_resident(const ntfs_volume *vol, const ATTR_TYPE type)
 {
-	if (type != AT_INDEX_ALLOCATION && type != AT_EA)
-		return 0;
-	return -EPERM;
+	if (type == AT_INDEX_ALLOCATION || type == AT_EA)
+		return -EPERM;
+	return 0;
 }
 
 /**
diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h
index 8d1f1326612f..458cb541d4dd 100644
--- a/fs/ntfs/layout.h
+++ b/fs/ntfs/layout.h
@@ -547,26 +547,44 @@ enum {
 	COLLATION_NTOFS_ULONG		= const_cpu_to_le32(0x10),
 	COLLATION_NTOFS_SID		= const_cpu_to_le32(0x11),
 	COLLATION_NTOFS_SECURITY_HASH	= const_cpu_to_le32(0x12),
-	COLLATION_NTOFS_ULONGS		= const_cpu_to_le32(0x13)
+	COLLATION_NTOFS_ULONGS		= const_cpu_to_le32(0x13),
 };
 
 typedef le32 COLLATION_RULE;
 
 /*
  * The flags (32-bit) describing attribute properties in the attribute
- * definition structure.  FIXME: This information is from Regis's information
- * and, according to him, it is not certain and probably incomplete.
- * The INDEXABLE flag is fairly certainly correct as only the file name
- * attribute has this flag set and this is the only attribute indexed in NT4.
+ * definition structure.  FIXME: This information is based on Regis's
+ * information and, according to him, it is not certain and probably
+ * incomplete.  The INDEXABLE flag is fairly certainly correct as only the file
+ * name attribute has this flag set and this is the only attribute indexed in
+ * NT4.
  */
 enum {
-	INDEXABLE	    = const_cpu_to_le32(0x02), /* Attribute can be
-							  indexed. */
-	NEED_TO_REGENERATE  = const_cpu_to_le32(0x40), /* Need to regenerate
-							  during regeneration
-							  phase. */
-	CAN_BE_NON_RESIDENT = const_cpu_to_le32(0x80), /* Attribute can be
-							  non-resident. */
+	ATTR_DEF_INDEXABLE	= const_cpu_to_le32(0x02), /* Attribute can be
+					indexed. */
+	ATTR_DEF_MULTIPLE	= const_cpu_to_le32(0x04), /* Attribute type
+					can be present multiple times in the
+					mft records of an inode. */
+	ATTR_DEF_NOT_ZERO	= const_cpu_to_le32(0x08), /* Attribute value
+					must contain at least one non-zero
+					byte. */
+	ATTR_DEF_INDEXED_UNIQUE	= const_cpu_to_le32(0x10), /* Attribute must be
+					indexed and the attribute value must be
+					unique for the attribute type in all of
+					the mft records of an inode. */
+	ATTR_DEF_NAMED_UNIQUE	= const_cpu_to_le32(0x20), /* Attribute must be
+					named and the name must be unique for
+					the attribute type in all of the mft
+					records of an inode. */
+	ATTR_DEF_RESIDENT	= const_cpu_to_le32(0x40), /* Attribute must be
+					resident. */
+	ATTR_DEF_ALWAYS_LOG	= const_cpu_to_le32(0x80), /* Always log
+					modifications to this attribute,
+					regardless of whether it is resident or
+					non-resident.  Without this, only log
+					modifications if the attribute is
+					resident. */
 };
 
 typedef le32 ATTR_DEF_FLAGS;
diff --git a/fs/ntfs/ntfs.h b/fs/ntfs/ntfs.h
index a0bf706797bb..e85f6e928b1e 100644
--- a/fs/ntfs/ntfs.h
+++ b/fs/ntfs/ntfs.h
@@ -2,7 +2,7 @@
  * ntfs.h - Defines for NTFS Linux kernel driver. Part of the Linux-NTFS
  *	    project.
  *
- * Copyright (c) 2001-2004 Anton Altaparmakov
+ * Copyright (c) 2001-2005 Anton Altaparmakov
  * Copyright (C) 2002 Richard Russon
  *
  * This program/include file is free software; you can redistribute it and/or
@@ -41,6 +41,7 @@ typedef enum {
 	NTFS_BLOCK_SIZE_BITS	= 9,
 	NTFS_SB_MAGIC		= 0x5346544e,	/* 'NTFS' */
 	NTFS_MAX_NAME_LEN	= 255,
+	NTFS_MAX_ATTR_NAME_LEN	= 255,
 } NTFS_CONSTANTS;
 
 /* Global variables. */

From 7fafb8b634121f4fa35ff92f85737f8bc2259f06 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Wed, 6 Apr 2005 16:09:21 +0100
Subject: [PATCH 034/113] NTFS: Minor cleanup: Define and use
 NTFS_MAX_CLUSTER_SIZE constant instead       of hard coded 0x10000 in
 fs/ntfs/super.c.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog | 2 ++
 fs/ntfs/ntfs.h    | 3 +++
 fs/ntfs/super.c   | 6 +++---
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 52f868a9662b..788e3bd2528b 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -111,6 +111,8 @@ ToDo/Notes:
 	  refusing the mount.  Thanks to Bernd Casimir for pointing this
 	  problem out.
 	- Update attribute definition handling.
+	- Add NTFS_MAX_CLUSTER_SIZE and NTFS_MAX_PAGES_PER_CLUSTER constants.
+	- Use NTFS_MAX_CLUSTER_SIZE in super.c instead of hard coding 0x10000.
 
 2.1.22 - Many bug and race fixes and error handling improvements.
 
diff --git a/fs/ntfs/ntfs.h b/fs/ntfs/ntfs.h
index e85f6e928b1e..446b5014115c 100644
--- a/fs/ntfs/ntfs.h
+++ b/fs/ntfs/ntfs.h
@@ -31,6 +31,7 @@
 #include <linux/fs.h>
 #include <linux/nls.h>
 #include <linux/smp.h>
+#include <linux/pagemap.h>
 
 #include "types.h"
 #include "volume.h"
@@ -42,6 +43,8 @@ typedef enum {
 	NTFS_SB_MAGIC		= 0x5346544e,	/* 'NTFS' */
 	NTFS_MAX_NAME_LEN	= 255,
 	NTFS_MAX_ATTR_NAME_LEN	= 255,
+	NTFS_MAX_CLUSTER_SIZE	= 64 * 1024,	/* 64kiB */
+	NTFS_MAX_PAGES_PER_CLUSTER = NTFS_MAX_CLUSTER_SIZE / PAGE_CACHE_SIZE,
 } NTFS_CONSTANTS;
 
 /* Global variables. */
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 6f752ea765c3..8e50aa929f1c 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -561,9 +561,9 @@ static BOOL is_boot_sector_ntfs(const struct super_block *sb,
 	default:
 		goto not_ntfs;
 	}
-	/* Check the cluster size is not above 65536 bytes. */
+	/* Check the cluster size is not above the maximum (64kiB). */
 	if ((u32)le16_to_cpu(b->bpb.bytes_per_sector) *
-			b->bpb.sectors_per_cluster > 0x10000)
+			b->bpb.sectors_per_cluster > NTFS_MAX_CLUSTER_SIZE)
 		goto not_ntfs;
 	/* Check reserved/unused fields are really zero. */
 	if (le16_to_cpu(b->bpb.reserved_sectors) ||
@@ -2585,7 +2585,7 @@ err_out_now:
  */
 kmem_cache_t *ntfs_name_cache;
 
-/* Slab caches for efficient allocation/deallocation of of inodes. */
+/* Slab caches for efficient allocation/deallocation of inodes. */
 kmem_cache_t *ntfs_inode_cache;
 kmem_cache_t *ntfs_big_inode_cache;
 

From d53ee3222459f347cb18985a845864bc81a44eaf Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Wed, 6 Apr 2005 16:11:20 +0100
Subject: [PATCH 035/113] NTFS: Use MAX_BUF_PER_PAGE instead of variable sized
 array allocation for       better code generation and one less sparse warning
 in fs/ntfs/aops.c.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog |  2 ++
 fs/ntfs/aops.c    | 14 ++++++++------
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 788e3bd2528b..7f7eec58fee2 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -113,6 +113,8 @@ ToDo/Notes:
 	- Update attribute definition handling.
 	- Add NTFS_MAX_CLUSTER_SIZE and NTFS_MAX_PAGES_PER_CLUSTER constants.
 	- Use NTFS_MAX_CLUSTER_SIZE in super.c instead of hard coding 0x10000.
+	- Use MAX_BUF_PER_PAGE instead of variable sized array allocation for
+	  better code generation and one less sparse warning in fs/ntfs/aops.c.
 
 2.1.22 - Many bug and race fixes and error handling improvements.
 
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 6241c4cfbe28..24c46c200337 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -825,17 +825,15 @@ static int ntfs_write_mst_block(struct page *page,
 	ntfs_inode *ni = NTFS_I(vi);
 	ntfs_volume *vol = ni->vol;
 	u8 *kaddr;
-	unsigned char bh_size_bits = vi->i_blkbits;
-	unsigned int bh_size = 1 << bh_size_bits;
 	unsigned int rec_size = ni->itype.index.block_size;
 	ntfs_inode *locked_nis[PAGE_CACHE_SIZE / rec_size];
 	struct buffer_head *bh, *head, *tbh, *rec_start_bh;
-	int max_bhs = PAGE_CACHE_SIZE / bh_size;
-	struct buffer_head *bhs[max_bhs];
+	struct buffer_head *bhs[MAX_BUF_PER_PAGE];
 	runlist_element *rl;
-	int i, nr_locked_nis, nr_recs, nr_bhs, bhs_per_rec, err, err2;
-	unsigned rec_size_bits;
+	int i, nr_locked_nis, nr_recs, nr_bhs, max_bhs, bhs_per_rec, err, err2;
+	unsigned bh_size, rec_size_bits;
 	BOOL sync, is_mft, page_is_dirty, rec_is_dirty;
+	unsigned char bh_size_bits;
 
 	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
 			"0x%lx.", vi->i_ino, ni->type, page->index);
@@ -850,7 +848,11 @@ static int ntfs_write_mst_block(struct page *page,
 	 */
 	BUG_ON(!(is_mft || S_ISDIR(vi->i_mode) ||
 			(NInoAttr(ni) && ni->type == AT_INDEX_ALLOCATION)));
+	bh_size_bits = vi->i_blkbits;
+	bh_size = 1 << bh_size_bits;
+	max_bhs = PAGE_CACHE_SIZE / bh_size;
 	BUG_ON(!max_bhs);
+	BUG_ON(max_bhs > MAX_BUF_PER_PAGE);
 
 	/* Were we called for sync purposes? */
 	sync = (wbc->sync_mode == WB_SYNC_ALL);

From 2fb21db2548fc8b196eb8d8425f05ee1965d5344 Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Wed, 25 May 2005 21:15:34 +0300
Subject: [PATCH 036/113] NTFS: Remove spurious void pointer casts from
 fs/ntfs/.

Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog | 1 +
 fs/ntfs/inode.c   | 5 ++---
 fs/ntfs/unistr.c  | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 7f7eec58fee2..f8ba90dd8e39 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -115,6 +115,7 @@ ToDo/Notes:
 	- Use NTFS_MAX_CLUSTER_SIZE in super.c instead of hard coding 0x10000.
 	- Use MAX_BUF_PER_PAGE instead of variable sized array allocation for
 	  better code generation and one less sparse warning in fs/ntfs/aops.c.
+	- Remove spurious void pointer casts from fs/ntfs/.  (Pekka Enberg)
 
 2.1.22 - Many bug and race fixes and error handling improvements.
 
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index f7bee8d014d9..886214a77f90 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -317,8 +317,7 @@ struct inode *ntfs_alloc_big_inode(struct super_block *sb)
 	ntfs_inode *ni;
 
 	ntfs_debug("Entering.");
-	ni = (ntfs_inode *)kmem_cache_alloc(ntfs_big_inode_cache,
-			SLAB_NOFS);
+	ni = kmem_cache_alloc(ntfs_big_inode_cache, SLAB_NOFS);
 	if (likely(ni != NULL)) {
 		ni->state = 0;
 		return VFS_I(ni);
@@ -343,7 +342,7 @@ static inline ntfs_inode *ntfs_alloc_extent_inode(void)
 	ntfs_inode *ni;
 
 	ntfs_debug("Entering.");
-	ni = (ntfs_inode *)kmem_cache_alloc(ntfs_inode_cache, SLAB_NOFS);
+	ni = kmem_cache_alloc(ntfs_inode_cache, SLAB_NOFS);
 	if (likely(ni != NULL)) {
 		ni->state = 0;
 		return ni;
diff --git a/fs/ntfs/unistr.c b/fs/ntfs/unistr.c
index 560b0ea255b0..19c42e231b44 100644
--- a/fs/ntfs/unistr.c
+++ b/fs/ntfs/unistr.c
@@ -264,7 +264,7 @@ int ntfs_nlstoucs(const ntfs_volume *vol, const char *ins,
 
 	/* We don't trust outside sources. */
 	if (ins) {
-		ucs = (ntfschar*)kmem_cache_alloc(ntfs_name_cache, SLAB_NOFS);
+		ucs = kmem_cache_alloc(ntfs_name_cache, SLAB_NOFS);
 		if (ucs) {
 			for (i = o = 0; i < ins_len; i += wc_len) {
 				wc_len = nls->char2uni(ins + i, ins_len - i,

From 442d207eb0b4e7047c4fedccd900c425e689d502 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Fri, 27 May 2005 16:42:56 +0100
Subject: [PATCH 037/113] NTFS: Use C99 style structure initialization after
 memory allocation where       possible (fs/ntfs/{attrib.c,index.c,super.c}). 
 Thanks to Al Viro and       Pekka Enberg.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog |  3 +++
 fs/ntfs/attrib.c  | 17 ++++++++---------
 fs/ntfs/index.c   | 16 +++-------------
 fs/ntfs/super.c   | 39 +++++++++++----------------------------
 4 files changed, 25 insertions(+), 50 deletions(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index f8ba90dd8e39..cb86140aa2a3 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -116,6 +116,9 @@ ToDo/Notes:
 	- Use MAX_BUF_PER_PAGE instead of variable sized array allocation for
 	  better code generation and one less sparse warning in fs/ntfs/aops.c.
 	- Remove spurious void pointer casts from fs/ntfs/.  (Pekka Enberg)
+	- Use C99 style structure initialization after memory allocation where
+	  possible (fs/ntfs/{attrib.c,index.c,super.c}).  Thanks to Al Viro and
+	  Pekka Enberg.
 
 2.1.22 - Many bug and race fixes and error handling improvements.
 
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index 23ca3bdfb89a..104eedfb2507 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -982,15 +982,14 @@ int ntfs_attr_lookup(const ATTR_TYPE type, const ntfschar *name,
 static inline void ntfs_attr_init_search_ctx(ntfs_attr_search_ctx *ctx,
 		ntfs_inode *ni, MFT_RECORD *mrec)
 {
-	ctx->mrec = mrec;
-	/* Sanity checks are performed elsewhere. */
-	ctx->attr = (ATTR_RECORD*)((u8*)mrec + le16_to_cpu(mrec->attrs_offset));
-	ctx->is_first = TRUE;
-	ctx->ntfs_ino = ni;
-	ctx->al_entry = NULL;
-	ctx->base_ntfs_ino = NULL;
-	ctx->base_mrec = NULL;
-	ctx->base_attr = NULL;
+	*ctx = (ntfs_attr_search_ctx) {
+		.mrec = mrec,
+		/* Sanity checks are performed elsewhere. */
+		.attr = (ATTR_RECORD*)((u8*)mrec +
+				le16_to_cpu(mrec->attrs_offset)),
+		.is_first = TRUE,
+		.ntfs_ino = ni,
+	};
 }
 
 /**
diff --git a/fs/ntfs/index.c b/fs/ntfs/index.c
index 71bd2cd7a4d9..11fd5307d780 100644
--- a/fs/ntfs/index.c
+++ b/fs/ntfs/index.c
@@ -1,7 +1,7 @@
 /*
  * index.c - NTFS kernel index handling.  Part of the Linux-NTFS project.
  *
- * Copyright (c) 2004 Anton Altaparmakov
+ * Copyright (c) 2004-2005 Anton Altaparmakov
  *
  * This program/include file is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as published
@@ -39,18 +39,8 @@ ntfs_index_context *ntfs_index_ctx_get(ntfs_inode *idx_ni)
 	ntfs_index_context *ictx;
 
 	ictx = kmem_cache_alloc(ntfs_index_ctx_cache, SLAB_NOFS);
-	if (ictx) {
-		ictx->idx_ni = idx_ni;
-		ictx->entry = NULL;
-		ictx->data = NULL;
-		ictx->data_len = 0;
-		ictx->is_in_root = 0;
-		ictx->ir = NULL;
-		ictx->actx = NULL;
-		ictx->base_ni = NULL;
-		ictx->ia = NULL;
-		ictx->page = NULL;
-	}
+	if (ictx)
+		*ictx = (ntfs_index_context){ .idx_ni = idx_ni };
 	return ictx;
 }
 
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 8e50aa929f1c..455cbe0a6296 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -2292,36 +2292,19 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
 		return -ENOMEM;
 	}
 	/* Initialize ntfs_volume structure. */
-	memset(vol, 0, sizeof(ntfs_volume));
-	vol->sb = sb;
-	vol->upcase = NULL;
-	vol->attrdef = NULL;
-	vol->mft_ino = NULL;
-	vol->mftbmp_ino = NULL;
+	*vol = (ntfs_volume) {
+		.sb = sb,
+		/*
+		 * Default is group and other don't have any access to files or
+		 * directories while owner has full access. Further, files by
+		 * default are not executable but directories are of course
+		 * browseable.
+		 */
+		.fmask = 0177,
+		.dmask = 0077,
+	};
 	init_rwsem(&vol->mftbmp_lock);
-#ifdef NTFS_RW
-	vol->mftmirr_ino = NULL;
-	vol->logfile_ino = NULL;
-#endif /* NTFS_RW */
-	vol->lcnbmp_ino = NULL;
 	init_rwsem(&vol->lcnbmp_lock);
-	vol->vol_ino = NULL;
-	vol->root_ino = NULL;
-	vol->secure_ino = NULL;
-	vol->extend_ino = NULL;
-#ifdef NTFS_RW
-	vol->quota_ino = NULL;
-	vol->quota_q_ino = NULL;
-#endif /* NTFS_RW */
-	vol->nls_map = NULL;
-
-	/*
-	 * Default is group and other don't have any access to files or
-	 * directories while owner has full access. Further, files by default
-	 * are not executable but directories are of course browseable.
-	 */
-	vol->fmask = 0177;
-	vol->dmask = 0077;
 
 	unlock_kernel();
 

From f5f287738bddf38ec9ca79344f00dab675e1bbf5 Mon Sep 17 00:00:00 2001
From: Sonny Rao <sonny@burdell.org>
Date: Thu, 23 Jun 2005 16:57:56 -0500
Subject: [PATCH 038/113] JFS: performance patch

Basically, we saw a large amount of time spent in the
jfs_strfromUCS_le() function, mispredicting the branch inside the
loop, so I just added some unlikely modifiers to the if statements to
re-ordered the code.  Again, these simple changes provided > 2 % on
spec-sfs, so please consider it for inclusion.

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
 fs/jfs/jfs_unicode.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/jfs/jfs_unicode.c b/fs/jfs/jfs_unicode.c
index b32208aad550..f327decfb155 100644
--- a/fs/jfs/jfs_unicode.c
+++ b/fs/jfs/jfs_unicode.c
@@ -51,8 +51,9 @@ int jfs_strfromUCS_le(char *to, const __le16 * from,
 		}
 	} else {
 		for (i = 0; (i < len) && from[i]; i++) {
-			if (le16_to_cpu(from[i]) & 0xff00) {
-				if (warn) {
+			if (unlikely(le16_to_cpu(from[i]) & 0xff00)) {
+				to[i] = '?';
+				if (unlikely(warn)) {
 					warn--;
 					warn_again--;
 					printk(KERN_ERR
@@ -61,7 +62,7 @@ int jfs_strfromUCS_le(char *to, const __le16 * from,
 					printk(KERN_ERR
 				"mount with iocharset=utf8 to access\n");
 				}
-				to[i] = '?';
+
 			}
 			else
 				to[i] = (char) (le16_to_cpu(from[i]));

From 3f2faef00c6af17542ea8672ed7d09367222b2d0 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Sat, 25 Jun 2005 15:28:56 +0100
Subject: [PATCH 039/113] NTFS: Stamp the transaction log ($UsnJrnl), aka user
 space journal, if it       is active on the volume and we are mounting
 read-write or remounting       from read-only to read-write.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog |   4 +-
 fs/ntfs/Makefile  |   2 +-
 fs/ntfs/layout.h  |  22 ++--
 fs/ntfs/super.c   | 254 +++++++++++++++++++++++++++++++++++++++++++++-
 fs/ntfs/types.h   |  10 +-
 fs/ntfs/usnjrnl.c |  84 +++++++++++++++
 fs/ntfs/usnjrnl.h | 205 +++++++++++++++++++++++++++++++++++++
 fs/ntfs/volume.h  |   6 ++
 8 files changed, 566 insertions(+), 21 deletions(-)
 create mode 100644 fs/ntfs/usnjrnl.c
 create mode 100644 fs/ntfs/usnjrnl.h

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index cb86140aa2a3..08c8c04b0216 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -1,6 +1,5 @@
 ToDo/Notes:
 	- Find and fix bugs.
-	- Checkpoint or disable the user space journal ($UsnJrnl).
 	- In between ntfs_prepare/commit_write, need exclusion between
 	  simultaneous file extensions.  This is given to us by holding i_sem
 	  on the inode.  The only places in the kernel when a file is resized
@@ -119,6 +118,9 @@ ToDo/Notes:
 	- Use C99 style structure initialization after memory allocation where
 	  possible (fs/ntfs/{attrib.c,index.c,super.c}).  Thanks to Al Viro and
 	  Pekka Enberg.
+	- Stamp the transaction log ($UsnJrnl), aka user space journal, if it
+	  is active on the volume and we are mounting read-write or remounting
+	  from read-only to read-write.
 
 2.1.22 - Many bug and race fixes and error handling improvements.
 
diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile
index f8c97d41226b..59f9606a82a1 100644
--- a/fs/ntfs/Makefile
+++ b/fs/ntfs/Makefile
@@ -15,5 +15,5 @@ endif
 ifeq ($(CONFIG_NTFS_RW),y)
 EXTRA_CFLAGS += -DNTFS_RW
 
-ntfs-objs += bitmap.o lcnalloc.o logfile.o quota.o
+ntfs-objs += bitmap.o lcnalloc.o logfile.o quota.o usnjrnl.o
 endif
diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h
index 458cb541d4dd..03c3e8612e7c 100644
--- a/fs/ntfs/layout.h
+++ b/fs/ntfs/layout.h
@@ -936,20 +936,12 @@ typedef struct {
 		/* 56*/	le64 quota_charged;	/* Byte size of the charge to
 				the quota for all streams of the file. Note: Is
 				zero if quotas are disabled. */
-		/* 64*/	le64 usn;		/* Last update sequence number
-				of the file. This is a direct index into the
-				change (aka usn) journal file. It is zero if
-				the usn journal is disabled.
-				NOTE: To disable the journal need to delete
-				the journal file itself and to then walk the
-				whole mft and set all Usn entries in all mft
-				records to zero! (This can take a while!)
-				The journal is FILE_Extend/$UsnJrnl. Win2k
-				will recreate the journal and initiate
-				logging if necessary when mounting the
-				partition. This, in contrast to disabling the
-				journal is a very fast process, so the user
-				won't even notice it. */
+		/* 64*/	USN usn;		/* Last update sequence number
+				of the file.  This is a direct index into the
+				transaction log file ($UsnJrnl).  It is zero if
+				the usn journal is disabled or this file has
+				not been subject to logging yet.  See usnjrnl.h
+				for details. */
 		} __attribute__ ((__packed__)) v3;
 	/* sizeof() = 72 bytes (NTFS 3.x) */
 	} __attribute__ ((__packed__)) ver;
@@ -1912,7 +1904,7 @@ enum {
 	VOLUME_FLAGS_MASK		= const_cpu_to_le16(0x803f),
 
 	/* To make our life easier when checking if we must mount read-only. */
-	VOLUME_MUST_MOUNT_RO_MASK	= const_cpu_to_le16(0x8037),
+	VOLUME_MUST_MOUNT_RO_MASK	= const_cpu_to_le16(0x8027),
 } __attribute__ ((__packed__));
 
 typedef le16 VOLUME_FLAGS;
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 455cbe0a6296..92e1d28219b3 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -34,6 +34,7 @@
 #include "sysctl.h"
 #include "logfile.h"
 #include "quota.h"
+#include "usnjrnl.h"
 #include "dir.h"
 #include "debug.h"
 #include "index.h"
@@ -497,6 +498,12 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt)
 			NVolSetErrors(vol);
 			return -EROFS;
 		}
+		if (!ntfs_stamp_usnjrnl(vol)) {
+			ntfs_error(sb, "Failed to stamp transation log "
+					"($UsnJrnl)%s", es);
+			NVolSetErrors(vol);
+			return -EROFS;
+		}
 	} else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) {
 		/* Remounting read-only. */
 		if (!NVolErrors(vol)) {
@@ -1218,6 +1225,167 @@ static BOOL load_and_init_quota(ntfs_volume *vol)
 	return TRUE;
 }
 
+/**
+ * load_and_init_usnjrnl - load and setup the transaction log if present
+ * @vol:	ntfs super block describing device whose usnjrnl file to load
+ *
+ * Return TRUE on success or FALSE on error.
+ *
+ * If $UsnJrnl is not present or in the process of being disabled, we set
+ * NVolUsnJrnlStamped() and return success.
+ *
+ * If the $UsnJrnl $DATA/$J attribute has a size equal to the lowest valid usn,
+ * i.e. transaction logging has only just been enabled or the journal has been
+ * stamped and nothing has been logged since, we also set NVolUsnJrnlStamped()
+ * and return success.
+ */
+static BOOL load_and_init_usnjrnl(ntfs_volume *vol)
+{
+	MFT_REF mref;
+	struct inode *tmp_ino;
+	ntfs_inode *tmp_ni;
+	struct page *page;
+	ntfs_name *name = NULL;
+	USN_HEADER *uh;
+	static const ntfschar UsnJrnl[9] = { const_cpu_to_le16('$'),
+			const_cpu_to_le16('U'), const_cpu_to_le16('s'),
+			const_cpu_to_le16('n'), const_cpu_to_le16('J'),
+			const_cpu_to_le16('r'), const_cpu_to_le16('n'),
+			const_cpu_to_le16('l'), 0 };
+	static ntfschar Max[5] = { const_cpu_to_le16('$'),
+			const_cpu_to_le16('M'), const_cpu_to_le16('a'),
+			const_cpu_to_le16('x'), 0 };
+	static ntfschar J[3] = { const_cpu_to_le16('$'),
+			const_cpu_to_le16('J'), 0 };
+
+	ntfs_debug("Entering.");
+	/*
+	 * Find the inode number for the transaction log file by looking up the
+	 * filename $UsnJrnl in the extended system files directory $Extend.
+	 */
+	down(&vol->extend_ino->i_sem);
+	mref = ntfs_lookup_inode_by_name(NTFS_I(vol->extend_ino), UsnJrnl, 8,
+			&name);
+	up(&vol->extend_ino->i_sem);
+	if (IS_ERR_MREF(mref)) {
+		/*
+		 * If the file does not exist, transaction logging is disabled,
+		 * just return success.
+		 */
+		if (MREF_ERR(mref) == -ENOENT) {
+			ntfs_debug("$UsnJrnl not present.  Volume does not "
+					"have transaction logging enabled.");
+not_enabled:
+			/*
+			 * No need to try to stamp the transaction log if
+			 * transaction logging is not enabled.
+			 */
+			NVolSetUsnJrnlStamped(vol);
+			return TRUE;
+		}
+		/* A real error occured. */
+		ntfs_error(vol->sb, "Failed to find inode number for "
+				"$UsnJrnl.");
+		return FALSE;
+	}
+	/* We do not care for the type of match that was found. */
+	kfree(name);
+	/* Get the inode. */
+	tmp_ino = ntfs_iget(vol->sb, MREF(mref));
+	if (unlikely(IS_ERR(tmp_ino) || is_bad_inode(tmp_ino))) {
+		if (!IS_ERR(tmp_ino))
+			iput(tmp_ino);
+		ntfs_error(vol->sb, "Failed to load $UsnJrnl.");
+		return FALSE;
+	}
+	vol->usnjrnl_ino = tmp_ino;
+	/*
+	 * If the transaction log is in the process of being deleted, we can
+	 * ignore it.
+	 */
+	if (unlikely(vol->vol_flags & VOLUME_DELETE_USN_UNDERWAY)) {
+		ntfs_debug("$UsnJrnl in the process of being disabled.  "
+				"Volume does not have transaction logging "
+				"enabled.");
+		goto not_enabled;
+	}
+	/* Get the $DATA/$Max attribute. */
+	tmp_ino = ntfs_attr_iget(vol->usnjrnl_ino, AT_DATA, Max, 4);
+	if (IS_ERR(tmp_ino)) {
+		ntfs_error(vol->sb, "Failed to load $UsnJrnl/$DATA/$Max "
+				"attribute.");
+		return FALSE;
+	}
+	vol->usnjrnl_max_ino = tmp_ino;
+	if (unlikely(i_size_read(tmp_ino) < sizeof(USN_HEADER))) {
+		ntfs_error(vol->sb, "Found corrupt $UsnJrnl/$DATA/$Max "
+				"attribute (size is 0x%llx but should be at "
+				"least 0x%x bytes).", i_size_read(tmp_ino),
+				sizeof(USN_HEADER));
+		return FALSE;
+	}
+	/* Get the $DATA/$J attribute. */
+	tmp_ino = ntfs_attr_iget(vol->usnjrnl_ino, AT_DATA, J, 2);
+	if (IS_ERR(tmp_ino)) {
+		ntfs_error(vol->sb, "Failed to load $UsnJrnl/$DATA/$J "
+				"attribute.");
+		return FALSE;
+	}
+	vol->usnjrnl_j_ino = tmp_ino;
+	/* Verify $J is non-resident and sparse. */
+	tmp_ni = NTFS_I(vol->usnjrnl_j_ino);
+	if (unlikely(!NInoNonResident(tmp_ni) || !NInoSparse(tmp_ni))) {
+		ntfs_error(vol->sb, "$UsnJrnl/$DATA/$J attribute is resident "
+				"and/or not sparse.");
+		return FALSE;
+	}
+	/* Read the USN_HEADER from $DATA/$Max. */
+	page = ntfs_map_page(vol->usnjrnl_max_ino->i_mapping, 0);
+	if (IS_ERR(page)) {
+		ntfs_error(vol->sb, "Failed to read from $UsnJrnl/$DATA/$Max "
+				"attribute.");
+		return FALSE;
+	}
+	uh = (USN_HEADER*)page_address(page);
+	/* Sanity check the $Max. */
+	if (unlikely(sle64_to_cpu(uh->allocation_delta) >
+			sle64_to_cpu(uh->maximum_size))) {
+		ntfs_error(vol->sb, "Allocation delta (0x%llx) exceeds "
+				"maximum size (0x%llx).  $UsnJrnl is corrupt.",
+				(long long)sle64_to_cpu(uh->allocation_delta),
+				(long long)sle64_to_cpu(uh->maximum_size));
+		ntfs_unmap_page(page);
+		return FALSE;
+	}
+	/*
+	 * If the transaction log has been stamped and nothing has been written
+	 * to it since, we do not need to stamp it.
+	 */
+	if (unlikely(sle64_to_cpu(uh->lowest_valid_usn) >=
+			i_size_read(vol->usnjrnl_j_ino))) {
+		if (likely(sle64_to_cpu(uh->lowest_valid_usn) ==
+				i_size_read(vol->usnjrnl_j_ino))) {
+			ntfs_unmap_page(page);
+			ntfs_debug("$UsnJrnl is enabled but nothing has been "
+					"logged since it was last stamped.  "
+					"Treating this as if the volume does "
+					"not have transaction logging "
+					"enabled.");
+			goto not_enabled;
+		}
+		ntfs_error(vol->sb, "$UsnJrnl has lowest valid usn (0x%llx) "
+				"which is out of bounds (0x%llx).  $UsnJrnl "
+				"is corrupt.",
+				(long long)sle64_to_cpu(uh->lowest_valid_usn),
+				i_size_read(vol->usnjrnl_j_ino));
+		ntfs_unmap_page(page);
+		return FALSE;
+	}
+	ntfs_unmap_page(page);
+	ntfs_debug("Done.");
+	return TRUE;
+}
+
 /**
  * load_and_init_attrdef - load the attribute definitions table for a volume
  * @vol:	ntfs super block describing device whose attrdef to load
@@ -1653,7 +1821,7 @@ get_ctx_vol_failed:
 		goto iput_logfile_err_out;
 	}
 	/* If on NTFS versions before 3.0, we are done. */
-	if (vol->major_ver < 3)
+	if (unlikely(vol->major_ver < 3))
 		return TRUE;
 	/* NTFS 3.0+ specific initialization. */
 	/* Get the security descriptors inode. */
@@ -1664,7 +1832,7 @@ get_ctx_vol_failed:
 		ntfs_error(sb, "Failed to load $Secure.");
 		goto iput_root_err_out;
 	}
-	// FIXME: Initialize security.
+	// TODO: Initialize security.
 	/* Get the extended system files' directory inode. */
 	vol->extend_ino = ntfs_iget(sb, FILE_Extend);
 	if (IS_ERR(vol->extend_ino) || is_bad_inode(vol->extend_ino)) {
@@ -1715,10 +1883,60 @@ get_ctx_vol_failed:
 		sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
 		NVolSetErrors(vol);
 	}
-	// TODO: Delete or checkpoint the $UsnJrnl if it exists.
+	/*
+	 * Find the transaction log file ($UsnJrnl), load it if present, check
+	 * it, and set it up.
+	 */
+	if (!load_and_init_usnjrnl(vol)) {
+		static const char *es1 = "Failed to load $UsnJrnl";
+		static const char *es2 = ".  Run chkdsk.";
+
+		/* If a read-write mount, convert it to a read-only mount. */
+		if (!(sb->s_flags & MS_RDONLY)) {
+			if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
+					ON_ERRORS_CONTINUE))) {
+				ntfs_error(sb, "%s and neither on_errors="
+						"continue nor on_errors="
+						"remount-ro was specified%s",
+						es1, es2);
+				goto iput_usnjrnl_err_out;
+			}
+			sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
+			ntfs_error(sb, "%s.  Mounting read-only%s", es1, es2);
+		} else
+			ntfs_warning(sb, "%s.  Will not be able to remount "
+					"read-write%s", es1, es2);
+		/* This will prevent a read-write remount. */
+		NVolSetErrors(vol);
+	}
+	/* If (still) a read-write mount, stamp the transaction log. */
+	if (!(sb->s_flags & MS_RDONLY) && !ntfs_stamp_usnjrnl(vol)) {
+		static const char *es1 = "Failed to stamp transaction log "
+				"($UsnJrnl)";
+		static const char *es2 = ".  Run chkdsk.";
+
+		/* Convert to a read-only mount. */
+		if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
+				ON_ERRORS_CONTINUE))) {
+			ntfs_error(sb, "%s and neither on_errors=continue nor "
+					"on_errors=remount-ro was specified%s",
+					es1, es2);
+			goto iput_usnjrnl_err_out;
+		}
+		ntfs_error(sb, "%s.  Mounting read-only%s", es1, es2);
+		sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
+		NVolSetErrors(vol);
+	}
 #endif /* NTFS_RW */
 	return TRUE;
 #ifdef NTFS_RW
+iput_usnjrnl_err_out:
+	if (vol->usnjrnl_j_ino)
+		iput(vol->usnjrnl_j_ino);
+	if (vol->usnjrnl_max_ino)
+		iput(vol->usnjrnl_max_ino);
+	if (vol->usnjrnl_ino)
+		iput(vol->usnjrnl_ino);
 iput_quota_err_out:
 	if (vol->quota_q_ino)
 		iput(vol->quota_q_ino);
@@ -1792,6 +2010,12 @@ static void ntfs_put_super(struct super_block *sb)
 
 	/* NTFS 3.0+ specific. */
 	if (vol->major_ver >= 3) {
+		if (vol->usnjrnl_j_ino)
+			ntfs_commit_inode(vol->usnjrnl_j_ino);
+		if (vol->usnjrnl_max_ino)
+			ntfs_commit_inode(vol->usnjrnl_max_ino);
+		if (vol->usnjrnl_ino)
+			ntfs_commit_inode(vol->usnjrnl_ino);
 		if (vol->quota_q_ino)
 			ntfs_commit_inode(vol->quota_q_ino);
 		if (vol->quota_ino)
@@ -1847,6 +2071,18 @@ static void ntfs_put_super(struct super_block *sb)
 	/* NTFS 3.0+ specific clean up. */
 	if (vol->major_ver >= 3) {
 #ifdef NTFS_RW
+		if (vol->usnjrnl_j_ino) {
+			iput(vol->usnjrnl_j_ino);
+			vol->usnjrnl_j_ino = NULL;
+		}
+		if (vol->usnjrnl_max_ino) {
+			iput(vol->usnjrnl_max_ino);
+			vol->usnjrnl_max_ino = NULL;
+		}
+		if (vol->usnjrnl_ino) {
+			iput(vol->usnjrnl_ino);
+			vol->usnjrnl_ino = NULL;
+		}
 		if (vol->quota_q_ino) {
 			iput(vol->quota_q_ino);
 			vol->quota_q_ino = NULL;
@@ -2463,6 +2699,18 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
 	/* NTFS 3.0+ specific clean up. */
 	if (vol->major_ver >= 3) {
 #ifdef NTFS_RW
+		if (vol->usnjrnl_j_ino) {
+			iput(vol->usnjrnl_j_ino);
+			vol->usnjrnl_j_ino = NULL;
+		}
+		if (vol->usnjrnl_max_ino) {
+			iput(vol->usnjrnl_max_ino);
+			vol->usnjrnl_max_ino = NULL;
+		}
+		if (vol->usnjrnl_ino) {
+			iput(vol->usnjrnl_ino);
+			vol->usnjrnl_ino = NULL;
+		}
 		if (vol->quota_q_ino) {
 			iput(vol->quota_q_ino);
 			vol->quota_q_ino = NULL;
diff --git a/fs/ntfs/types.h b/fs/ntfs/types.h
index 08a55aa53d4e..6e4a7e3343f2 100644
--- a/fs/ntfs/types.h
+++ b/fs/ntfs/types.h
@@ -2,7 +2,7 @@
  * types.h - Defines for NTFS Linux kernel driver specific types.
  *	     Part of the Linux-NTFS project.
  *
- * Copyright (c) 2001-2004 Anton Altaparmakov
+ * Copyright (c) 2001-2005 Anton Altaparmakov
  *
  * This program/include file is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as published
@@ -53,6 +53,14 @@ typedef sle64 leLCN;
 typedef s64 LSN;
 typedef sle64 leLSN;
 
+/*
+ * The NTFS transaction log $UsnJrnl uses usn which are signed 64-bit values.
+ * We define our own type USN, to allow for type checking and better code
+ * readability.
+ */
+typedef s64 USN;
+typedef sle64 leUSN;
+
 typedef enum {
 	FALSE = 0,
 	TRUE = 1
diff --git a/fs/ntfs/usnjrnl.c b/fs/ntfs/usnjrnl.c
new file mode 100644
index 000000000000..77773240d139
--- /dev/null
+++ b/fs/ntfs/usnjrnl.c
@@ -0,0 +1,84 @@
+/*
+ * usnjrnl.h - NTFS kernel transaction log ($UsnJrnl) handling.  Part of the
+ *	       Linux-NTFS project.
+ *
+ * Copyright (c) 2005 Anton Altaparmakov
+ *
+ * This program/include file is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program/include file is distributed in the hope that it will be
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program (in the main directory of the Linux-NTFS
+ * distribution in the file COPYING); if not, write to the Free Software
+ * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifdef NTFS_RW
+
+#include <linux/fs.h>
+#include <linux/highmem.h>
+#include <linux/mm.h>
+
+#include "aops.h"
+#include "debug.h"
+#include "endian.h"
+#include "time.h"
+#include "types.h"
+#include "usnjrnl.h"
+#include "volume.h"
+
+/**
+ * ntfs_stamp_usnjrnl - stamp the transaction log ($UsnJrnl) on an ntfs volume
+ * @vol:	ntfs volume on which to stamp the transaction log
+ *
+ * Stamp the transaction log ($UsnJrnl) on the ntfs volume @vol and return
+ * TRUE on success and FALSE on error.
+ *
+ * This function assumes that the transaction log has already been loaded and
+ * consistency checked by a call to fs/ntfs/super.c::load_and_init_usnjrnl().
+ */
+BOOL ntfs_stamp_usnjrnl(ntfs_volume *vol)
+{
+	ntfs_debug("Entering.");
+	if (likely(!NVolUsnJrnlStamped(vol))) {
+		sle64 stamp;
+		struct page *page;
+		USN_HEADER *uh;
+
+		page = ntfs_map_page(vol->usnjrnl_max_ino->i_mapping, 0);
+		if (IS_ERR(page)) {
+			ntfs_error(vol->sb, "Failed to read from "
+					"$UsnJrnl/$DATA/$Max attribute.");
+			return FALSE;
+		}
+		uh = (USN_HEADER*)page_address(page);
+		stamp = get_current_ntfs_time();
+		ntfs_debug("Stamping transaction log ($UsnJrnl): old "
+				"journal_id 0x%llx, old lowest_valid_usn "
+				"0x%llx, new journal_id 0x%llx, new "
+				"lowest_valid_usn 0x%llx.",
+				(long long)sle64_to_cpu(uh->journal_id),
+				(long long)sle64_to_cpu(uh->lowest_valid_usn),
+				(long long)sle64_to_cpu(stamp),
+				i_size_read(vol->usnjrnl_j_ino));
+		uh->lowest_valid_usn =
+				cpu_to_sle64(i_size_read(vol->usnjrnl_j_ino));
+		uh->journal_id = stamp;
+		flush_dcache_page(page);
+		set_page_dirty(page);
+		ntfs_unmap_page(page);
+		/* Set the flag so we do not have to do it again on remount. */
+		NVolSetUsnJrnlStamped(vol);
+	}
+	ntfs_debug("Done.");
+	return TRUE;
+}
+
+#endif /* NTFS_RW */
diff --git a/fs/ntfs/usnjrnl.h b/fs/ntfs/usnjrnl.h
new file mode 100644
index 000000000000..ff988b0deb45
--- /dev/null
+++ b/fs/ntfs/usnjrnl.h
@@ -0,0 +1,205 @@
+/*
+ * usnjrnl.h - Defines for NTFS kernel transaction log ($UsnJrnl) handling.
+ *	       Part of the Linux-NTFS project.
+ *
+ * Copyright (c) 2005 Anton Altaparmakov
+ *
+ * This program/include file is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program/include file is distributed in the hope that it will be
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program (in the main directory of the Linux-NTFS
+ * distribution in the file COPYING); if not, write to the Free Software
+ * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _LINUX_NTFS_USNJRNL_H
+#define _LINUX_NTFS_USNJRNL_H
+
+#ifdef NTFS_RW
+
+#include "types.h"
+#include "endian.h"
+#include "layout.h"
+#include "volume.h"
+
+/*
+ * Transaction log ($UsnJrnl) organization:
+ *
+ * The transaction log records whenever a file is modified in any way.  So for
+ * example it will record that file "blah" was written to at a particular time
+ * but not what was written.  If will record that a file was deleted or
+ * created, that a file was truncated, etc.  See below for all the reason
+ * codes used.
+ *
+ * The transaction log is in the $Extend directory which is in the root
+ * directory of each volume.  If it is not present it means transaction
+ * logging is disabled.  If it is present it means transaction logging is
+ * either enabled or in the process of being disabled in which case we can
+ * ignore it as it will go away as soon as Windows gets its hands on it.
+ *
+ * To determine whether the transaction logging is enabled or in the process
+ * of being disabled, need to check the volume flags in the
+ * $VOLUME_INFORMATION attribute in the $Volume system file (which is present
+ * in the root directory and has a fixed mft record number, see layout.h).
+ * If the flag VOLUME_DELETE_USN_UNDERWAY is set it means the transaction log
+ * is in the process of being disabled and if this flag is clear it means the
+ * transaction log is enabled.
+ *
+ * The transaction log consists of two parts; the $DATA/$Max attribute as well
+ * as the $DATA/$J attribute.  $Max is a header describing the transaction
+ * log whilst $J is the transaction log data itself as a sequence of variable
+ * sized USN_RECORDs (see below for all the structures).
+ *
+ * We do not care about transaction logging at this point in time but we still
+ * need to let windows know that the transaction log is out of date.  To do
+ * this we need to stamp the transaction log.  This involves setting the
+ * lowest_valid_usn field in the $DATA/$Max attribute to the usn to be used
+ * for the next added USN_RECORD to the $DATA/$J attribute as well as
+ * generating a new journal_id in $DATA/$Max.
+ *
+ * The journal_id is as of the current version (2.0) of the transaction log
+ * simply the 64-bit timestamp of when the journal was either created or last
+ * stamped.
+ *
+ * To determine the next usn there are two ways.  The first is to parse
+ * $DATA/$J and to find the last USN_RECORD in it and to add its record_length
+ * to its usn (which is the byte offset in the $DATA/$J attribute).  The
+ * second is simply to take the data size of the attribute.  Since the usns
+ * are simply byte offsets into $DATA/$J, this is exactly the next usn.  For
+ * obvious reasons we use the second method as it is much simpler and faster.
+ *
+ * As an aside, note that to actually disable the transaction log, one would
+ * need to set the VOLUME_DELETE_USN_UNDERWAY flag (see above), then go
+ * through all the mft records on the volume and set the usn field in their
+ * $STANDARD_INFORMATION attribute to zero.  Once that is done, one would need
+ * to delete the transaction log file, i.e. \$Extent\$UsnJrnl, and finally,
+ * one would need to clear the VOLUME_DELETE_USN_UNDERWAY flag.
+ *
+ * Note that if a volume is unmounted whilst the transaction log is being
+ * disabled, the process will continue the next time the volume is mounted.
+ * This is why we can safely mount read-write when we see a transaction log
+ * in the process of being deleted.
+ */
+
+/* Some $UsnJrnl related constants. */
+#define UsnJrnlMajorVer		2
+#define UsnJrnlMinorVer		0
+
+/*
+ * $DATA/$Max attribute.  This is (always?) resident and has a fixed size of
+ * 32 bytes.  It contains the header describing the transaction log.
+ */
+typedef struct {
+/*Ofs*/
+/*   0*/sle64 maximum_size;	/* The maximum on-disk size of the $DATA/$J
+				   attribute. */
+/*   8*/sle64 allocation_delta;	/* Number of bytes by which to increase the
+				   size of the $DATA/$J attribute. */
+/*0x10*/sle64 journal_id;	/* Current id of the transaction log. */
+/*0x18*/leUSN lowest_valid_usn;	/* Lowest valid usn in $DATA/$J for the
+				   current journal_id. */
+/* sizeof() = 32 (0x20) bytes */
+} __attribute__ ((__packed__)) USN_HEADER;
+
+/*
+ * Reason flags (32-bit).  Cumulative flags describing the change(s) to the
+ * file since it was last opened.  I think the names speak for themselves but
+ * if you disagree check out the descriptions in the Linux NTFS project NTFS
+ * documentation: http://linux-ntfs.sourceforge.net/ntfs/files/usnjrnl.html
+ */
+enum {
+	USN_REASON_DATA_OVERWRITE	= const_cpu_to_le32(0x00000001),
+	USN_REASON_DATA_EXTEND		= const_cpu_to_le32(0x00000002),
+	USN_REASON_DATA_TRUNCATION	= const_cpu_to_le32(0x00000004),
+	USN_REASON_NAMED_DATA_OVERWRITE	= const_cpu_to_le32(0x00000010),
+	USN_REASON_NAMED_DATA_EXTEND	= const_cpu_to_le32(0x00000020),
+	USN_REASON_NAMED_DATA_TRUNCATION= const_cpu_to_le32(0x00000040),
+	USN_REASON_FILE_CREATE		= const_cpu_to_le32(0x00000100),
+	USN_REASON_FILE_DELETE		= const_cpu_to_le32(0x00000200),
+	USN_REASON_EA_CHANGE		= const_cpu_to_le32(0x00000400),
+	USN_REASON_SECURITY_CHANGE	= const_cpu_to_le32(0x00000800),
+	USN_REASON_RENAME_OLD_NAME	= const_cpu_to_le32(0x00001000),
+	USN_REASON_RENAME_NEW_NAME	= const_cpu_to_le32(0x00002000),
+	USN_REASON_INDEXABLE_CHANGE	= const_cpu_to_le32(0x00004000),
+	USN_REASON_BASIC_INFO_CHANGE	= const_cpu_to_le32(0x00008000),
+	USN_REASON_HARD_LINK_CHANGE	= const_cpu_to_le32(0x00010000),
+	USN_REASON_COMPRESSION_CHANGE	= const_cpu_to_le32(0x00020000),
+	USN_REASON_ENCRYPTION_CHANGE	= const_cpu_to_le32(0x00040000),
+	USN_REASON_OBJECT_ID_CHANGE	= const_cpu_to_le32(0x00080000),
+	USN_REASON_REPARSE_POINT_CHANGE	= const_cpu_to_le32(0x00100000),
+	USN_REASON_STREAM_CHANGE	= const_cpu_to_le32(0x00200000),
+	USN_REASON_CLOSE		= const_cpu_to_le32(0x80000000),
+};
+
+typedef le32 USN_REASON_FLAGS;
+
+/*
+ * Source info flags (32-bit).  Information about the source of the change(s)
+ * to the file.  For detailed descriptions of what these mean, see the Linux
+ * NTFS project NTFS documentation:
+ *	http://linux-ntfs.sourceforge.net/ntfs/files/usnjrnl.html
+ */
+enum {
+	USN_SOURCE_DATA_MANAGEMENT	  = const_cpu_to_le32(0x00000001),
+	USN_SOURCE_AUXILIARY_DATA	  = const_cpu_to_le32(0x00000002),
+	USN_SOURCE_REPLICATION_MANAGEMENT = const_cpu_to_le32(0x00000004),
+};
+
+typedef le32 USN_SOURCE_INFO_FLAGS;
+
+/*
+ * $DATA/$J attribute.  This is always non-resident, is marked as sparse, and
+ * is of variabled size.  It consists of a sequence of variable size
+ * USN_RECORDS.  The minimum allocated_size is allocation_delta as
+ * specified in $DATA/$Max.  When the maximum_size specified in $DATA/$Max is
+ * exceeded by more than allocation_delta bytes, allocation_delta bytes are
+ * allocated and appended to the $DATA/$J attribute and an equal number of
+ * bytes at the beginning of the attribute are freed and made sparse.  Note the
+ * making sparse only happens at volume checkpoints and hence the actual
+ * $DATA/$J size can exceed maximum_size + allocation_delta temporarily.
+ */
+typedef struct {
+/*Ofs*/
+/*   0*/le32 length;		/* Byte size of this record (8-byte
+				   aligned). */
+/*   4*/le16 major_ver;		/* Major version of the transaction log used
+				   for this record. */
+/*   6*/le16 minor_ver;		/* Minor version of the transaction log used
+				   for this record. */
+/*   8*/leMFT_REF mft_reference;/* The mft reference of the file (or
+				   directory) described by this record. */
+/*0x10*/leMFT_REF parent_directory;/* The mft reference of the parent
+				   directory of the file described by this
+				   record. */
+/*0x18*/leUSN usn;		/* The usn of this record.  Equals the offset
+				   within the $DATA/$J attribute. */
+/*0x20*/sle64 time;		/* Time when this record was created. */
+/*0x28*/USN_REASON_FLAGS reason;/* Reason flags (see above). */
+/*0x2c*/USN_SOURCE_INFO_FLAGS source_info;/* Source info flags (see above). */
+/*0x30*/le32 security_id;	/* File security_id copied from
+				   $STANDARD_INFORMATION. */
+/*0x34*/FILE_ATTR_FLAGS file_attributes;	/* File attributes copied from
+				   $STANDARD_INFORMATION or $FILE_NAME (not
+				   sure which). */
+/*0x38*/le16 file_name_size;	/* Size of the file name in bytes. */
+/*0x3a*/le16 file_name_offset;	/* Offset to the file name in bytes from the
+				   start of this record. */
+/*0x3c*/ntfschar file_name[0];	/* Use when creating only.  When reading use
+				   file_name_offset to determine the location
+				   of the name. */
+/* sizeof() = 60 (0x3c) bytes */
+} __attribute__ ((__packed__)) USN_RECORD;
+
+extern BOOL ntfs_stamp_usnjrnl(ntfs_volume *vol);
+
+#endif /* NTFS_RW */
+
+#endif /* _LINUX_NTFS_USNJRNL_H */
diff --git a/fs/ntfs/volume.h b/fs/ntfs/volume.h
index 62be73ad0156..375cd20a9f61 100644
--- a/fs/ntfs/volume.h
+++ b/fs/ntfs/volume.h
@@ -125,6 +125,10 @@ typedef struct {
 	/* $Quota stuff is NTFS3.0+ specific.  Unused/NULL otherwise. */
 	struct inode *quota_ino;	/* The VFS inode of $Quota. */
 	struct inode *quota_q_ino;	/* Attribute inode for $Quota/$Q. */
+	/* $UsnJrnl stuff is NTFS3.0+ specific.  Unused/NULL otherwise. */
+	struct inode *usnjrnl_ino;	/* The VFS inode of $UsnJrnl. */
+	struct inode *usnjrnl_max_ino;	/* Attribute inode for $UsnJrnl/$Max. */
+	struct inode *usnjrnl_j_ino;	/* Attribute inode for $UsnJrnl/$J. */
 #endif /* NTFS_RW */
 	struct nls_table *nls_map;
 } ntfs_volume;
@@ -141,6 +145,7 @@ typedef enum {
 				      file names in WIN32 namespace. */
 	NV_LogFileEmpty,	/* 1: $LogFile journal is empty. */
 	NV_QuotaOutOfDate,	/* 1: $Quota is out of date. */
+	NV_UsnJrnlStamped,	/* 1: $UsnJrnl has been stamped. */
 	NV_SparseEnabled,	/* 1: May create sparse files. */
 } ntfs_volume_flags;
 
@@ -168,6 +173,7 @@ NVOL_FNS(ShowSystemFiles)
 NVOL_FNS(CaseSensitive)
 NVOL_FNS(LogFileEmpty)
 NVOL_FNS(QuotaOutOfDate)
+NVOL_FNS(UsnJrnlStamped)
 NVOL_FNS(SparseEnabled)
 
 #endif /* _LINUX_NTFS_VOLUME_H */

From 9f993fe4634b39ca4404ba278053b03f360ec08a Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Sat, 25 Jun 2005 16:15:36 +0100
Subject: [PATCH 040/113] NTFS: Fix a bug in address space operations error
 recovery code paths where       if the runlist was not mapped at all and a
 mapping error occured we       would leave the runlist locked on exit to the
 function so that the       next access to the same file would try to take the
 lock and deadlock.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog |  4 ++++
 fs/ntfs/aops.c    | 16 ++++++++++++----
 fs/ntfs/layout.h  |  2 +-
 3 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 08c8c04b0216..59ecca4297bb 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -121,6 +121,10 @@ ToDo/Notes:
 	- Stamp the transaction log ($UsnJrnl), aka user space journal, if it
 	  is active on the volume and we are mounting read-write or remounting
 	  from read-only to read-write.
+	- Fix a bug in address space operations error recovery code paths where
+	  if the runlist was not mapped at all and a mapping error occured we
+	  would leave the runlist locked on exit to the function so that the
+	  next access to the same file would try to take the lock and deadlock.
 
 2.1.22 - Many bug and race fixes and error handling improvements.
 
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 24c46c200337..3f43bfe6184e 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -264,7 +264,8 @@ lock_retry_remap:
 					goto lock_retry_remap;
 				rl = NULL;
 				lcn = err;
-			}
+			} else if (!rl)
+				up_read(&ni->runlist.lock);
 			/* Hard error, zero out region. */
 			bh->b_blocknr = -1;
 			SetPageError(page);
@@ -690,7 +691,8 @@ lock_retry_remap:
 				goto lock_retry_remap;
 			rl = NULL;
 			lcn = err;
-		}
+		} else if (!rl)
+			up_read(&ni->runlist.lock);
 		/* Failed to map the buffer, even after retrying. */
 		bh->b_blocknr = -1;
 		ntfs_error(vol->sb, "Failed to write to inode 0x%lx, "
@@ -965,8 +967,11 @@ lock_retry_remap:
 					if (err2 == -ENOMEM)
 						page_is_dirty = TRUE;
 					lcn = err2;
-				} else
+				} else {
 					err2 = -EIO;
+					if (!rl)
+						up_read(&ni->runlist.lock);
+				}
 				/* Hard error.  Abort writing this record. */
 				if (!err || err == -ENOMEM)
 					err = err2;
@@ -1660,6 +1665,8 @@ lock_retry_remap:
 							"not supported yet. "
 							"Sorry.");
 					err = -EOPNOTSUPP;
+					if (!rl)
+						up_read(&ni->runlist.lock);
 					goto err_out;
 				} else if (!is_retry &&
 						lcn == LCN_RL_NOT_MAPPED) {
@@ -1674,7 +1681,8 @@ lock_retry_remap:
 						goto lock_retry_remap;
 					rl = NULL;
 					lcn = err;
-				}
+				} else if (!rl)
+					up_read(&ni->runlist.lock);
 				/*
 				 * Failed to map the buffer, even after
 				 * retrying.
diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h
index 03c3e8612e7c..609ad1728ce4 100644
--- a/fs/ntfs/layout.h
+++ b/fs/ntfs/layout.h
@@ -936,7 +936,7 @@ typedef struct {
 		/* 56*/	le64 quota_charged;	/* Byte size of the charge to
 				the quota for all streams of the file. Note: Is
 				zero if quotas are disabled. */
-		/* 64*/	USN usn;		/* Last update sequence number
+		/* 64*/	leUSN usn;		/* Last update sequence number
 				of the file.  This is a direct index into the
 				transaction log file ($UsnJrnl).  It is zero if
 				the usn journal is disabled or this file has

From ca8fd7a0c6aa835e8014830b290cb965e85ac88e Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Sat, 25 Jun 2005 16:31:27 +0100
Subject: [PATCH 041/113] NTFS: Detect the case when Windows has been suspended
 to disk on the volume       to be mounted and if this is the case do not
 allow (re)mounting       read-write.  This is done by parsing hiberfil.sys if
 present.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog |   3 +
 fs/ntfs/super.c   | 179 +++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 171 insertions(+), 11 deletions(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 59ecca4297bb..c089bf0c02ac 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -125,6 +125,9 @@ ToDo/Notes:
 	  if the runlist was not mapped at all and a mapping error occured we
 	  would leave the runlist locked on exit to the function so that the
 	  next access to the same file would try to take the lock and deadlock.
+	- Detect the case when Windows has been suspended to disk on the volume
+	  to be mounted and if this is the case do not allow (re)mounting
+	  read-write.  This is done by parsing hiberfil.sys if present.
 
 2.1.22 - Many bug and race fixes and error handling improvements.
 
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 92e1d28219b3..41aa8eb6755b 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -1156,6 +1156,124 @@ static BOOL load_and_check_logfile(ntfs_volume *vol)
 	return TRUE;
 }
 
+#define NTFS_HIBERFIL_HEADER_SIZE	4096
+
+/**
+ * check_windows_hibernation_status - check if Windows is suspended on a volume
+ * @vol:	ntfs super block of device to check
+ *
+ * Check if Windows is hibernated on the ntfs volume @vol.  This is done by
+ * looking for the file hiberfil.sys in the root directory of the volume.  If
+ * the file is not present Windows is definitely not suspended.
+ *
+ * If hiberfil.sys exists and is less than 4kiB in size it means Windows is
+ * definitely suspended (this volume is not the system volume).  Caveat:  on a
+ * system with many volumes it is possible that the < 4kiB check is bogus but
+ * for now this should do fine.
+ *
+ * If hiberfil.sys exists and is larger than 4kiB in size, we need to read the
+ * hiberfil header (which is the first 4kiB).  If this begins with "hibr",
+ * Windows is definitely suspended.  If it is completely full of zeroes,
+ * Windows is definitely not hibernated.  Any other case is treated as if
+ * Windows is suspended.  This caters for the above mentioned caveat of a
+ * system with many volumes where no "hibr" magic would be present and there is
+ * no zero header.
+ *
+ * Return 0 if Windows is not hibernated on the volume, >0 if Windows is
+ * hibernated on the volume, and -errno on error.
+ */
+static int check_windows_hibernation_status(ntfs_volume *vol)
+{
+	MFT_REF mref;
+	struct inode *vi;
+	ntfs_inode *ni;
+	struct page *page;
+	u32 *kaddr, *kend;
+	ntfs_name *name = NULL;
+	int ret = 1;
+	static const ntfschar hiberfil[13] = { const_cpu_to_le16('h'),
+			const_cpu_to_le16('i'), const_cpu_to_le16('b'),
+			const_cpu_to_le16('e'), const_cpu_to_le16('r'),
+			const_cpu_to_le16('f'), const_cpu_to_le16('i'),
+			const_cpu_to_le16('l'), const_cpu_to_le16('.'),
+			const_cpu_to_le16('s'), const_cpu_to_le16('y'),
+			const_cpu_to_le16('s'), 0 };
+
+	ntfs_debug("Entering.");
+	/*
+	 * Find the inode number for the hibernation file by looking up the
+	 * filename hiberfil.sys in the root directory.
+	 */
+	down(&vol->root_ino->i_sem);
+	mref = ntfs_lookup_inode_by_name(NTFS_I(vol->root_ino), hiberfil, 12,
+			&name);
+	up(&vol->root_ino->i_sem);
+	if (IS_ERR_MREF(mref)) {
+		ret = MREF_ERR(mref);
+		/* If the file does not exist, Windows is not hibernated. */
+		if (ret == -ENOENT) {
+			ntfs_debug("hiberfil.sys not present.  Windows is not "
+					"hibernated on the volume.");
+			return 0;
+		}
+		/* A real error occured. */
+		ntfs_error(vol->sb, "Failed to find inode number for "
+				"hiberfil.sys.");
+		return ret;
+	}
+	/* We do not care for the type of match that was found. */
+	kfree(name);
+	/* Get the inode. */
+	vi = ntfs_iget(vol->sb, MREF(mref));
+	if (IS_ERR(vi) || is_bad_inode(vi)) {
+		if (!IS_ERR(vi))
+			iput(vi);
+		ntfs_error(vol->sb, "Failed to load hiberfil.sys.");
+		return IS_ERR(vi) ? PTR_ERR(vi) : -EIO;
+	}
+	if (unlikely(i_size_read(vi) < NTFS_HIBERFIL_HEADER_SIZE)) {
+		ntfs_debug("hiberfil.sys is smaller than 4kiB (0x%llx).  "
+				"Windows is hibernated on the volume.  This "
+				"is not the system volume.", i_size_read(vi));
+		goto iput_out;
+	}
+	ni = NTFS_I(vi);
+	page = ntfs_map_page(vi->i_mapping, 0);
+	if (IS_ERR(page)) {
+		ntfs_error(vol->sb, "Failed to read from hiberfil.sys.");
+		ret = PTR_ERR(page);
+		goto iput_out;
+	}
+	kaddr = (u32*)page_address(page);
+	if (*(le32*)kaddr == const_cpu_to_le32(0x72626968)/*'hibr'*/) {
+		ntfs_debug("Magic \"hibr\" found in hiberfil.sys.  Windows is "
+				"hibernated on the volume.  This is the "
+				"system volume.");
+		goto unm_iput_out;
+	}
+	kend = kaddr + NTFS_HIBERFIL_HEADER_SIZE/sizeof(*kaddr);
+	do {
+		if (unlikely(*kaddr)) {
+			ntfs_debug("hiberfil.sys is larger than 4kiB "
+					"(0x%llx), does not contain the "
+					"\"hibr\" magic, and does not have a "
+					"zero header.  Windows is hibernated "
+					"on the volume.  This is not the "
+					"system volume.", i_size_read(vi));
+			goto unm_iput_out;
+		}
+	} while (++kaddr < kend);
+	ntfs_debug("hiberfil.sys contains a zero header.  Windows is not "
+			"hibernated on the volume.  This is the system "
+			"volume.");
+	ret = 0;
+unm_iput_out:
+	ntfs_unmap_page(page);
+iput_out:
+	iput(vi);
+	return ret;
+}
+
 /**
  * load_and_init_quota - load and setup the quota file for a volume if present
  * @vol:	ntfs super block describing device whose quota file to load
@@ -1570,6 +1688,9 @@ static BOOL load_system_files(ntfs_volume *vol)
 	MFT_RECORD *m;
 	VOLUME_INFORMATION *vi;
 	ntfs_attr_search_ctx *ctx;
+#ifdef NTFS_RW
+	int err;
+#endif /* NTFS_RW */
 
 	ntfs_debug("Entering.");
 #ifdef NTFS_RW
@@ -1746,6 +1867,50 @@ get_ctx_vol_failed:
 		/* This will prevent a read-write remount. */
 		NVolSetErrors(vol);
 	}
+#endif /* NTFS_RW */
+	/* Get the root directory inode so we can do path lookups. */
+	vol->root_ino = ntfs_iget(sb, FILE_root);
+	if (IS_ERR(vol->root_ino) || is_bad_inode(vol->root_ino)) {
+		if (!IS_ERR(vol->root_ino))
+			iput(vol->root_ino);
+		ntfs_error(sb, "Failed to load root directory.");
+		goto iput_logfile_err_out;
+	}
+#ifdef NTFS_RW
+	/*
+	 * Check if Windows is suspended to disk on the target volume.  If it
+	 * is hibernated, we must not write *anything* to the disk so set
+	 * NVolErrors() without setting the dirty volume flag and mount
+	 * read-only.  This will prevent read-write remounting and it will also
+	 * prevent all writes.
+	 */
+	err = check_windows_hibernation_status(vol);
+	if (unlikely(err)) {
+		static const char *es1a = "Failed to determine if Windows is "
+				"hibernated";
+		static const char *es1b = "Windows is hibernated";
+		static const char *es2 = ".  Run chkdsk.";
+		const char *es1;
+
+		es1 = err < 0 ? es1a : es1b;
+		/* If a read-write mount, convert it to a read-only mount. */
+		if (!(sb->s_flags & MS_RDONLY)) {
+			if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
+					ON_ERRORS_CONTINUE))) {
+				ntfs_error(sb, "%s and neither on_errors="
+						"continue nor on_errors="
+						"remount-ro was specified%s",
+						es1, es2);
+				goto iput_root_err_out;
+			}
+			sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
+			ntfs_error(sb, "%s.  Mounting read-only%s", es1, es2);
+		} else
+			ntfs_warning(sb, "%s.  Will not be able to remount "
+					"read-write%s", es1, es2);
+		/* This will prevent a read-write remount. */
+		NVolSetErrors(vol);
+	}
 	/* If (still) a read-write mount, mark the volume dirty. */
 	if (!(sb->s_flags & MS_RDONLY) &&
 			ntfs_set_volume_flags(vol, VOLUME_IS_DIRTY)) {
@@ -1759,7 +1924,7 @@ get_ctx_vol_failed:
 			ntfs_error(sb, "%s and neither on_errors=continue nor "
 					"on_errors=remount-ro was specified%s",
 					es1, es2);
-			goto iput_logfile_err_out;
+			goto iput_root_err_out;
 		}
 		ntfs_error(sb, "%s.  Mounting read-only%s", es1, es2);
 		sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
@@ -1786,7 +1951,7 @@ get_ctx_vol_failed:
 			ntfs_error(sb, "%s and neither on_errors=continue nor "
 					"on_errors=remount-ro was specified%s",
 					es1, es2);
-			goto iput_logfile_err_out;
+			goto iput_root_err_out;
 		}
 		ntfs_error(sb, "%s.  Mounting read-only%s", es1, es2);
 		sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
@@ -1805,21 +1970,13 @@ get_ctx_vol_failed:
 			ntfs_error(sb, "%s and neither on_errors=continue nor "
 					"on_errors=remount-ro was specified%s",
 					es1, es2);
-			goto iput_logfile_err_out;
+			goto iput_root_err_out;
 		}
 		ntfs_error(sb, "%s.  Mounting read-only%s", es1, es2);
 		sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
 		NVolSetErrors(vol);
 	}
 #endif /* NTFS_RW */
-	/* Get the root directory inode. */
-	vol->root_ino = ntfs_iget(sb, FILE_root);
-	if (IS_ERR(vol->root_ino) || is_bad_inode(vol->root_ino)) {
-		if (!IS_ERR(vol->root_ino))
-			iput(vol->root_ino);
-		ntfs_error(sb, "Failed to load root directory.");
-		goto iput_logfile_err_out;
-	}
 	/* If on NTFS versions before 3.0, we are done. */
 	if (unlikely(vol->major_ver < 3))
 		return TRUE;

From 3bd1f4a173a3445f9919c21e775de2d8b9deacf8 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Sat, 25 Jun 2005 16:51:58 +0100
Subject: [PATCH 042/113] NTFS: Fix several occurences of a bug where we would
 perform 'var & ~const'       with a 64-bit variable and a int, i.e. 32-bit,
 constant.  This causes       the higher order 32-bits of the 64-bit variable
 to be zeroed.  To fix       this cast the 'const' to the same 64-bit type as
 'var'.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog  | 4 ++++
 fs/ntfs/dir.c      | 3 ++-
 fs/ntfs/inode.h    | 2 +-
 fs/ntfs/lcnalloc.c | 4 ++--
 fs/ntfs/logfile.c  | 6 +++---
 5 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index c089bf0c02ac..a916c8b0697a 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -128,6 +128,10 @@ ToDo/Notes:
 	- Detect the case when Windows has been suspended to disk on the volume
 	  to be mounted and if this is the case do not allow (re)mounting
 	  read-write.  This is done by parsing hiberfil.sys if present.
+	- Fix several occurences of a bug where we would perform 'var & ~const'
+	  with a 64-bit variable and a int, i.e. 32-bit, constant.  This causes
+	  the higher order 32-bits of the 64-bit variable to be zeroed.  To fix
+	  this cast the 'const' to the same 64-bit type as 'var'.
 
 2.1.22 - Many bug and race fixes and error handling improvements.
 
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index a56ca1821eed..46779471c542 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -1308,7 +1308,8 @@ find_next_index_buffer:
 	ntfs_debug("Handling index buffer 0x%llx.",
 			(unsigned long long)bmp_pos + cur_bmp_pos);
 	/* If the current index buffer is in the same page we reuse the page. */
-	if ((prev_ia_pos & PAGE_CACHE_MASK) != (ia_pos & PAGE_CACHE_MASK)) {
+	if ((prev_ia_pos & (s64)PAGE_CACHE_MASK) !=
+			(ia_pos & (s64)PAGE_CACHE_MASK)) {
 		prev_ia_pos = ia_pos;
 		if (likely(ia_page != NULL)) {
 			unlock_page(ia_page);
diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h
index 6eb99777a722..3de5c0231966 100644
--- a/fs/ntfs/inode.h
+++ b/fs/ntfs/inode.h
@@ -110,7 +110,7 @@ struct _ntfs_inode {
 			u8 block_size_bits; 	/* Log2 of the above. */
 			u8 vcn_size_bits;	/* Log2 of the above. */
 		} index;
-		struct { /* It is a compressed file or an attribute inode. */
+		struct { /* It is a compressed/sparse file/attribute inode. */
 			s64 size;		/* Copy of compressed_size from
 						   $DATA. */
 			u32 block_size;		/* Size of a compression block
diff --git a/fs/ntfs/lcnalloc.c b/fs/ntfs/lcnalloc.c
index 7087b5b0e6ce..a4bc07616e5d 100644
--- a/fs/ntfs/lcnalloc.c
+++ b/fs/ntfs/lcnalloc.c
@@ -293,7 +293,7 @@ runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn,
 			buf_size = i_size - last_read_pos;
 		buf_size <<= 3;
 		lcn = bmp_pos & 7;
-		bmp_pos &= ~7;
+		bmp_pos &= ~(LCN)7;
 		ntfs_debug("Before inner while loop: buf_size %i, lcn 0x%llx, "
 				"bmp_pos 0x%llx, need_writeback %i.", buf_size,
 				(unsigned long long)lcn,
@@ -311,7 +311,7 @@ runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn,
 					(unsigned int)*byte);
 			/* Skip full bytes. */
 			if (*byte == 0xff) {
-				lcn = (lcn + 8) & ~7;
+				lcn = (lcn + 8) & ~(LCN)7;
 				ntfs_debug("Continuing while loop 1.");
 				continue;
 			}
diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c
index e680dd0cdb64..8edb8e20fb08 100644
--- a/fs/ntfs/logfile.c
+++ b/fs/ntfs/logfile.c
@@ -1,7 +1,7 @@
 /*
  * logfile.c - NTFS kernel journal handling. Part of the Linux-NTFS project.
  *
- * Copyright (c) 2002-2004 Anton Altaparmakov
+ * Copyright (c) 2002-2005 Anton Altaparmakov
  *
  * This program/include file is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as published
@@ -410,7 +410,7 @@ err_out:
 }
 
 /**
- * ntfs_ckeck_logfile - check in the journal if the volume is consistent
+ * ntfs_check_logfile - check the journal for consistency
  * @log_vi:	struct inode of loaded journal $LogFile to check
  *
  * Check the $LogFile journal for consistency and return TRUE if it is
@@ -464,7 +464,7 @@ BOOL ntfs_check_logfile(struct inode *log_vi)
 	 * optimize log_page_size and log_page_bits into constants.
 	 */
 	log_page_bits = generic_ffs(log_page_size) - 1;
-	size &= ~(log_page_size - 1);
+	size &= ~(s64)(log_page_size - 1);
 	/*
 	 * Ensure the log file is big enough to store at least the two restart
 	 * pages and the minimum number of log record pages.

From 1d58b27b8d77ecb816cfa8f846b78c845675eb89 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Sat, 25 Jun 2005 17:04:55 +0100
Subject: [PATCH 043/113] NTFS: Change the runlist terminator of the newly
 allocated cluster(s) to       LCN_ENOENT in ntfs_attr_make_non_resident(). 
 Otherwise the runlist       code gets confused.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog | 3 +++
 fs/ntfs/attrib.c  | 8 ++++++++
 2 files changed, 11 insertions(+)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index a916c8b0697a..aff749db314c 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -132,6 +132,9 @@ ToDo/Notes:
 	  with a 64-bit variable and a int, i.e. 32-bit, constant.  This causes
 	  the higher order 32-bits of the 64-bit variable to be zeroed.  To fix
 	  this cast the 'const' to the same 64-bit type as 'var'.
+	- Change the runlist terminator of the newly allocated cluster(s) to
+	  LCN_ENOENT in ntfs_attr_make_non_resident().  Otherwise the runlist
+	  code gets confused.
 
 2.1.22 - Many bug and race fixes and error handling improvements.
 
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index 104eedfb2507..34ea405b883d 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -1285,6 +1285,8 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
 	new_size = (i_size_read(vi) + vol->cluster_size - 1) &
 			~(vol->cluster_size - 1);
 	if (new_size > 0) {
+		runlist_element *rl2;
+
 		/*
 		 * Will need the page later and since the page lock nests
 		 * outside all ntfs locks, we need to get the page now.
@@ -1304,6 +1306,12 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
 					err);
 			goto page_err_out;
 		}
+		/* Change the runlist terminator to LCN_ENOENT. */
+		rl2 = rl;
+		while (rl2->length)
+			rl2++;
+		BUG_ON(rl2->lcn != LCN_RL_NOT_MAPPED);
+		rl2->lcn = LCN_ENOENT;
 	} else {
 		rl = NULL;
 		page = NULL;

From fa3be92317c4ae34edcf5274e8bbeff181e20b7a Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Sat, 25 Jun 2005 17:15:36 +0100
Subject: [PATCH 044/113] NTFS: Add an extra parameter @last_vcn to
 ntfs_get_size_for_mapping_pairs()       and ntfs_mapping_pairs_build() to
 allow the runlist encoding to be       partial which is desirable when
 filling holes in sparse attributes.       Update all callers.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog |   4 ++
 fs/ntfs/attrib.c  |   4 +-
 fs/ntfs/mft.c     |  12 ++--
 fs/ntfs/runlist.c | 167 ++++++++++++++++++++++++++++++++--------------
 fs/ntfs/runlist.h |   5 +-
 5 files changed, 132 insertions(+), 60 deletions(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index aff749db314c..67994c9c248f 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -135,6 +135,10 @@ ToDo/Notes:
 	- Change the runlist terminator of the newly allocated cluster(s) to
 	  LCN_ENOENT in ntfs_attr_make_non_resident().  Otherwise the runlist
 	  code gets confused.
+	- Add an extra parameter @last_vcn to ntfs_get_size_for_mapping_pairs()
+	  and ntfs_mapping_pairs_build() to allow the runlist encoding to be
+	  partial which is desirable when filling holes in sparse attributes.
+	  Update all callers.
 
 2.1.22 - Many bug and race fixes and error handling improvements.
 
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index 34ea405b883d..c6b2bb64d651 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -1317,7 +1317,7 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
 		page = NULL;
 	}
 	/* Determine the size of the mapping pairs array. */
-	mp_size = ntfs_get_size_for_mapping_pairs(vol, rl, 0);
+	mp_size = ntfs_get_size_for_mapping_pairs(vol, rl, 0, -1);
 	if (unlikely(mp_size < 0)) {
 		err = mp_size;
 		ntfs_debug("Failed to get size for mapping pairs array, error "
@@ -1416,7 +1416,7 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
 			cpu_to_sle64(attr_size);
 	/* Generate the mapping pairs array into the attribute record. */
 	err = ntfs_mapping_pairs_build(vol, (u8*)a + mp_ofs,
-			arec_size - mp_ofs, rl, 0, NULL);
+			arec_size - mp_ofs, rl, 0, -1, NULL);
 	if (unlikely(err)) {
 		ntfs_debug("Failed to build mapping pairs, error code %i.",
 				err);
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 61ce09f1b652..3d0ba8e60adc 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -1407,7 +1407,7 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
 	BUG_ON(ll < rl2->vcn);
 	BUG_ON(ll >= rl2->vcn + rl2->length);
 	/* Get the size for the new mapping pairs array for this extent. */
-	mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, ll);
+	mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, ll, -1);
 	if (unlikely(mp_size <= 0)) {
 		ntfs_error(vol->sb, "Get size for mapping pairs failed for "
 				"mft bitmap attribute extent.");
@@ -1441,7 +1441,7 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
 	/* Generate the mapping pairs array directly into the attr record. */
 	ret = ntfs_mapping_pairs_build(vol, (u8*)a +
 			le16_to_cpu(a->data.non_resident.mapping_pairs_offset),
-			mp_size, rl2, ll, NULL);
+			mp_size, rl2, ll, -1, NULL);
 	if (unlikely(ret)) {
 		ntfs_error(vol->sb, "Failed to build mapping pairs array for "
 				"mft bitmap attribute.");
@@ -1529,7 +1529,7 @@ undo_alloc:
 				a->data.non_resident.mapping_pairs_offset),
 				old_alen - le16_to_cpu(
 				a->data.non_resident.mapping_pairs_offset),
-				rl2, ll, NULL)) {
+				rl2, ll, -1, NULL)) {
 			ntfs_error(vol->sb, "Failed to restore mapping pairs "
 					"array.%s", es);
 			NVolSetErrors(vol);
@@ -1838,7 +1838,7 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
 	BUG_ON(ll < rl2->vcn);
 	BUG_ON(ll >= rl2->vcn + rl2->length);
 	/* Get the size for the new mapping pairs array for this extent. */
-	mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, ll);
+	mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, ll, -1);
 	if (unlikely(mp_size <= 0)) {
 		ntfs_error(vol->sb, "Get size for mapping pairs failed for "
 				"mft data attribute extent.");
@@ -1877,7 +1877,7 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
 	/* Generate the mapping pairs array directly into the attr record. */
 	ret = ntfs_mapping_pairs_build(vol, (u8*)a +
 			le16_to_cpu(a->data.non_resident.mapping_pairs_offset),
-			mp_size, rl2, ll, NULL);
+			mp_size, rl2, ll, -1, NULL);
 	if (unlikely(ret)) {
 		ntfs_error(vol->sb, "Failed to build mapping pairs array of "
 				"mft data attribute.");
@@ -1959,7 +1959,7 @@ undo_alloc:
 				a->data.non_resident.mapping_pairs_offset),
 				old_alen - le16_to_cpu(
 				a->data.non_resident.mapping_pairs_offset),
-				rl2, ll, NULL)) {
+				rl2, ll, -1, NULL)) {
 			ntfs_error(vol->sb, "Failed to restore mapping pairs "
 					"array.%s", es);
 			NVolSetErrors(vol);
diff --git a/fs/ntfs/runlist.c b/fs/ntfs/runlist.c
index 396d767c2cab..758855b0414e 100644
--- a/fs/ntfs/runlist.c
+++ b/fs/ntfs/runlist.c
@@ -1048,10 +1048,17 @@ static inline int ntfs_get_nr_significant_bytes(const s64 n)
  * ntfs_get_size_for_mapping_pairs - get bytes needed for mapping pairs array
  * @vol:	ntfs volume (needed for the ntfs version)
  * @rl:		locked runlist to determine the size of the mapping pairs of
- * @start_vcn:	vcn at which to start the mapping pairs array
+ * @first_vcn:	first vcn which to include in the mapping pairs array
+ * @last_vcn:	last vcn which to include in the mapping pairs array
  *
  * Walk the locked runlist @rl and calculate the size in bytes of the mapping
- * pairs array corresponding to the runlist @rl, starting at vcn @start_vcn.
+ * pairs array corresponding to the runlist @rl, starting at vcn @first_vcn and
+ * finishing with vcn @last_vcn.
+ *
+ * A @last_vcn of -1 means end of runlist and in that case the size of the
+ * mapping pairs array corresponding to the runlist starting at vcn @first_vcn
+ * and finishing at the end of the runlist is determined.
+ *
  * This for example allows us to allocate a buffer of the right size when
  * building the mapping pairs array.
  *
@@ -1067,34 +1074,50 @@ static inline int ntfs_get_nr_significant_bytes(const s64 n)
  *	    remains locked throughout, and is left locked upon return.
  */
 int ntfs_get_size_for_mapping_pairs(const ntfs_volume *vol,
-		const runlist_element *rl, const VCN start_vcn)
+		const runlist_element *rl, const VCN first_vcn,
+		const VCN last_vcn)
 {
 	LCN prev_lcn;
 	int rls;
+	BOOL the_end = FALSE;
 
-	BUG_ON(start_vcn < 0);
+	BUG_ON(first_vcn < 0);
+	BUG_ON(last_vcn < -1);
+	BUG_ON(last_vcn >= 0 && first_vcn > last_vcn);
 	if (!rl) {
-		BUG_ON(start_vcn);
+		BUG_ON(first_vcn);
+		BUG_ON(last_vcn > 0);
 		return 1;
 	}
-	/* Skip to runlist element containing @start_vcn. */
-	while (rl->length && start_vcn >= rl[1].vcn)
+	/* Skip to runlist element containing @first_vcn. */
+	while (rl->length && first_vcn >= rl[1].vcn)
 		rl++;
-	if ((!rl->length && start_vcn > rl->vcn) || start_vcn < rl->vcn)
+	if (unlikely((!rl->length && first_vcn > rl->vcn) ||
+			first_vcn < rl->vcn))
 		return -EINVAL;
 	prev_lcn = 0;
 	/* Always need the termining zero byte. */
 	rls = 1;
 	/* Do the first partial run if present. */
-	if (start_vcn > rl->vcn) {
-		s64 delta;
+	if (first_vcn > rl->vcn) {
+		s64 delta, length = rl->length;
 
 		/* We know rl->length != 0 already. */
-		if (rl->length < 0 || rl->lcn < LCN_HOLE)
+		if (unlikely(length < 0 || rl->lcn < LCN_HOLE))
 			goto err_out;
-		delta = start_vcn - rl->vcn;
+		/*
+		 * If @stop_vcn is given and finishes inside this run, cap the
+		 * run length.
+		 */
+		if (unlikely(last_vcn >= 0 && rl[1].vcn > last_vcn)) {
+			s64 s1 = last_vcn + 1;
+			if (unlikely(rl[1].vcn > s1))
+				length = s1 - rl->vcn;
+			the_end = TRUE;
+		}
+		delta = first_vcn - rl->vcn;
 		/* Header byte + length. */
-		rls += 1 + ntfs_get_nr_significant_bytes(rl->length - delta);
+		rls += 1 + ntfs_get_nr_significant_bytes(length - delta);
 		/*
 		 * If the logical cluster number (lcn) denotes a hole and we
 		 * are on NTFS 3.0+, we don't store it at all, i.e. we need
@@ -1102,9 +1125,9 @@ int ntfs_get_size_for_mapping_pairs(const ntfs_volume *vol,
 		 * Note: this assumes that on NTFS 1.2-, holes are stored with
 		 * an lcn of -1 and not a delta_lcn of -1 (unless both are -1).
 		 */
-		if (rl->lcn >= 0 || vol->major_ver < 3) {
+		if (likely(rl->lcn >= 0 || vol->major_ver < 3)) {
 			prev_lcn = rl->lcn;
-			if (rl->lcn >= 0)
+			if (likely(rl->lcn >= 0))
 				prev_lcn += delta;
 			/* Change in lcn. */
 			rls += ntfs_get_nr_significant_bytes(prev_lcn);
@@ -1113,11 +1136,23 @@ int ntfs_get_size_for_mapping_pairs(const ntfs_volume *vol,
 		rl++;
 	}
 	/* Do the full runs. */
-	for (; rl->length; rl++) {
-		if (rl->length < 0 || rl->lcn < LCN_HOLE)
+	for (; rl->length && !the_end; rl++) {
+		s64 length = rl->length;
+
+		if (unlikely(length < 0 || rl->lcn < LCN_HOLE))
 			goto err_out;
+		/*
+		 * If @stop_vcn is given and finishes inside this run, cap the
+		 * run length.
+		 */
+		if (unlikely(last_vcn >= 0 && rl[1].vcn > last_vcn)) {
+			s64 s1 = last_vcn + 1;
+			if (unlikely(rl[1].vcn > s1))
+				length = s1 - rl->vcn;
+			the_end = TRUE;
+		}
 		/* Header byte + length. */
-		rls += 1 + ntfs_get_nr_significant_bytes(rl->length);
+		rls += 1 + ntfs_get_nr_significant_bytes(length);
 		/*
 		 * If the logical cluster number (lcn) denotes a hole and we
 		 * are on NTFS 3.0+, we don't store it at all, i.e. we need
@@ -1125,7 +1160,7 @@ int ntfs_get_size_for_mapping_pairs(const ntfs_volume *vol,
 		 * Note: this assumes that on NTFS 1.2-, holes are stored with
 		 * an lcn of -1 and not a delta_lcn of -1 (unless both are -1).
 		 */
-		if (rl->lcn >= 0 || vol->major_ver < 3) {
+		if (likely(rl->lcn >= 0 || vol->major_ver < 3)) {
 			/* Change in lcn. */
 			rls += ntfs_get_nr_significant_bytes(rl->lcn -
 					prev_lcn);
@@ -1168,7 +1203,7 @@ static inline int ntfs_write_significant_bytes(s8 *dst, const s8 *dst_max,
 
 	i = 0;
 	do {
-		if (dst > dst_max)
+		if (unlikely(dst > dst_max))
 			goto err_out;
 		*dst++ = l & 0xffll;
 		l >>= 8;
@@ -1177,12 +1212,12 @@ static inline int ntfs_write_significant_bytes(s8 *dst, const s8 *dst_max,
 	j = (n >> 8 * (i - 1)) & 0xff;
 	/* If the sign bit is wrong, we need an extra byte. */
 	if (n < 0 && j >= 0) {
-		if (dst > dst_max)
+		if (unlikely(dst > dst_max))
 			goto err_out;
 		i++;
 		*dst = (s8)-1;
 	} else if (n > 0 && j < 0) {
-		if (dst > dst_max)
+		if (unlikely(dst > dst_max))
 			goto err_out;
 		i++;
 		*dst = (s8)0;
@@ -1198,13 +1233,18 @@ err_out:
  * @dst:	destination buffer to which to write the mapping pairs array
  * @dst_len:	size of destination buffer @dst in bytes
  * @rl:		locked runlist for which to build the mapping pairs array
- * @start_vcn:	vcn at which to start the mapping pairs array
+ * @first_vcn:	first vcn which to include in the mapping pairs array
+ * @last_vcn:	last vcn which to include in the mapping pairs array
  * @stop_vcn:	first vcn outside destination buffer on success or -ENOSPC
  *
  * Create the mapping pairs array from the locked runlist @rl, starting at vcn
- * @start_vcn and save the array in @dst.  @dst_len is the size of @dst in
- * bytes and it should be at least equal to the value obtained by calling
- * ntfs_get_size_for_mapping_pairs().
+ * @first_vcn and finishing with vcn @last_vcn and save the array in @dst.
+ * @dst_len is the size of @dst in bytes and it should be at least equal to the
+ * value obtained by calling ntfs_get_size_for_mapping_pairs().
+ *
+ * A @last_vcn of -1 means end of runlist and in that case the mapping pairs
+ * array corresponding to the runlist starting at vcn @first_vcn and finishing
+ * at the end of the runlist is created.
  *
  * If @rl is NULL, just write a single terminator byte to @dst.
  *
@@ -1213,7 +1253,7 @@ err_out:
  * been filled with all the mapping pairs that will fit, thus it can be treated
  * as partial success, in that a new attribute extent needs to be created or
  * the next extent has to be used and the mapping pairs build has to be
- * continued with @start_vcn set to *@stop_vcn.
+ * continued with @first_vcn set to *@stop_vcn.
  *
  * Return 0 on success and -errno on error.  The following error codes are
  * defined:
@@ -1227,27 +1267,32 @@ err_out:
  */
 int ntfs_mapping_pairs_build(const ntfs_volume *vol, s8 *dst,
 		const int dst_len, const runlist_element *rl,
-		const VCN start_vcn, VCN *const stop_vcn)
+		const VCN first_vcn, const VCN last_vcn, VCN *const stop_vcn)
 {
 	LCN prev_lcn;
 	s8 *dst_max, *dst_next;
 	int err = -ENOSPC;
+	BOOL the_end = FALSE;
 	s8 len_len, lcn_len;
 
-	BUG_ON(start_vcn < 0);
+	BUG_ON(first_vcn < 0);
+	BUG_ON(last_vcn < -1);
+	BUG_ON(last_vcn >= 0 && first_vcn > last_vcn);
 	BUG_ON(dst_len < 1);
 	if (!rl) {
-		BUG_ON(start_vcn);
+		BUG_ON(first_vcn);
+		BUG_ON(last_vcn > 0);
 		if (stop_vcn)
 			*stop_vcn = 0;
 		/* Terminator byte. */
 		*dst = 0;
 		return 0;
 	}
-	/* Skip to runlist element containing @start_vcn. */
-	while (rl->length && start_vcn >= rl[1].vcn)
+	/* Skip to runlist element containing @first_vcn. */
+	while (rl->length && first_vcn >= rl[1].vcn)
 		rl++;
-	if ((!rl->length && start_vcn > rl->vcn) || start_vcn < rl->vcn)
+	if (unlikely((!rl->length && first_vcn > rl->vcn) ||
+			first_vcn < rl->vcn))
 		return -EINVAL;
 	/*
 	 * @dst_max is used for bounds checking in
@@ -1256,17 +1301,27 @@ int ntfs_mapping_pairs_build(const ntfs_volume *vol, s8 *dst,
 	dst_max = dst + dst_len - 1;
 	prev_lcn = 0;
 	/* Do the first partial run if present. */
-	if (start_vcn > rl->vcn) {
-		s64 delta;
+	if (first_vcn > rl->vcn) {
+		s64 delta, length = rl->length;
 
 		/* We know rl->length != 0 already. */
-		if (rl->length < 0 || rl->lcn < LCN_HOLE)
+		if (unlikely(length < 0 || rl->lcn < LCN_HOLE))
 			goto err_out;
-		delta = start_vcn - rl->vcn;
+		/*
+		 * If @stop_vcn is given and finishes inside this run, cap the
+		 * run length.
+		 */
+		if (unlikely(last_vcn >= 0 && rl[1].vcn > last_vcn)) {
+			s64 s1 = last_vcn + 1;
+			if (unlikely(rl[1].vcn > s1))
+				length = s1 - rl->vcn;
+			the_end = TRUE;
+		}
+		delta = first_vcn - rl->vcn;
 		/* Write length. */
 		len_len = ntfs_write_significant_bytes(dst + 1, dst_max,
-				rl->length - delta);
-		if (len_len < 0)
+				length - delta);
+		if (unlikely(len_len < 0))
 			goto size_err;
 		/*
 		 * If the logical cluster number (lcn) denotes a hole and we
@@ -1277,19 +1332,19 @@ int ntfs_mapping_pairs_build(const ntfs_volume *vol, s8 *dst,
 		 * case on NT4. - We assume that we just need to write the lcn
 		 * change until someone tells us otherwise... (AIA)
 		 */
-		if (rl->lcn >= 0 || vol->major_ver < 3) {
+		if (likely(rl->lcn >= 0 || vol->major_ver < 3)) {
 			prev_lcn = rl->lcn;
-			if (rl->lcn >= 0)
+			if (likely(rl->lcn >= 0))
 				prev_lcn += delta;
 			/* Write change in lcn. */
 			lcn_len = ntfs_write_significant_bytes(dst + 1 +
 					len_len, dst_max, prev_lcn);
-			if (lcn_len < 0)
+			if (unlikely(lcn_len < 0))
 				goto size_err;
 		} else
 			lcn_len = 0;
 		dst_next = dst + len_len + lcn_len + 1;
-		if (dst_next > dst_max)
+		if (unlikely(dst_next > dst_max))
 			goto size_err;
 		/* Update header byte. */
 		*dst = lcn_len << 4 | len_len;
@@ -1299,13 +1354,25 @@ int ntfs_mapping_pairs_build(const ntfs_volume *vol, s8 *dst,
 		rl++;
 	}
 	/* Do the full runs. */
-	for (; rl->length; rl++) {
-		if (rl->length < 0 || rl->lcn < LCN_HOLE)
+	for (; rl->length && !the_end; rl++) {
+		s64 length = rl->length;
+
+		if (unlikely(length < 0 || rl->lcn < LCN_HOLE))
 			goto err_out;
+		/*
+		 * If @stop_vcn is given and finishes inside this run, cap the
+		 * run length.
+		 */
+		if (unlikely(last_vcn >= 0 && rl[1].vcn > last_vcn)) {
+			s64 s1 = last_vcn + 1;
+			if (unlikely(rl[1].vcn > s1))
+				length = s1 - rl->vcn;
+			the_end = TRUE;
+		}
 		/* Write length. */
 		len_len = ntfs_write_significant_bytes(dst + 1, dst_max,
-				rl->length);
-		if (len_len < 0)
+				length);
+		if (unlikely(len_len < 0))
 			goto size_err;
 		/*
 		 * If the logical cluster number (lcn) denotes a hole and we
@@ -1316,17 +1383,17 @@ int ntfs_mapping_pairs_build(const ntfs_volume *vol, s8 *dst,
 		 * case on NT4. - We assume that we just need to write the lcn
 		 * change until someone tells us otherwise... (AIA)
 		 */
-		if (rl->lcn >= 0 || vol->major_ver < 3) {
+		if (likely(rl->lcn >= 0 || vol->major_ver < 3)) {
 			/* Write change in lcn. */
 			lcn_len = ntfs_write_significant_bytes(dst + 1 +
 					len_len, dst_max, rl->lcn - prev_lcn);
-			if (lcn_len < 0)
+			if (unlikely(lcn_len < 0))
 				goto size_err;
 			prev_lcn = rl->lcn;
 		} else
 			lcn_len = 0;
 		dst_next = dst + len_len + lcn_len + 1;
-		if (dst_next > dst_max)
+		if (unlikely(dst_next > dst_max))
 			goto size_err;
 		/* Update header byte. */
 		*dst = lcn_len << 4 | len_len;
diff --git a/fs/ntfs/runlist.h b/fs/ntfs/runlist.h
index cf5c1b44bea8..aa0ee6540e7c 100644
--- a/fs/ntfs/runlist.h
+++ b/fs/ntfs/runlist.h
@@ -84,11 +84,12 @@ extern runlist_element *ntfs_rl_find_vcn_nolock(runlist_element *rl,
 		const VCN vcn);
 
 extern int ntfs_get_size_for_mapping_pairs(const ntfs_volume *vol,
-		const runlist_element *rl, const VCN start_vcn);
+		const runlist_element *rl, const VCN first_vcn,
+		const VCN last_vcn);
 
 extern int ntfs_mapping_pairs_build(const ntfs_volume *vol, s8 *dst,
 		const int dst_len, const runlist_element *rl,
-		const VCN start_vcn, VCN *const stop_vcn);
+		const VCN first_vcn, const VCN last_vcn, VCN *const stop_vcn);
 
 extern int ntfs_rl_truncate_nolock(const ntfs_volume *vol,
 		runlist *const runlist, const s64 new_length);

From 4757d7dff65b56f2115038ad1615725f31806787 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Sat, 25 Jun 2005 17:24:08 +0100
Subject: [PATCH 045/113] NTFS: Change ntfs_map_runlist_nolock() to only
 decompress the mapping pairs       if the requested vcn is inside it. 
 Otherwise we get into problems       when we try to map an out of bounds vcn
 because we then try to map       the already mapped runlist fragment which
 causes       ntfs_mapping_pairs_decompress() to fail and return error. 
 Update       ntfs_attr_find_vcn_nolock() accordingly.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 fs/ntfs/ChangeLog |  6 +++++
 fs/ntfs/attrib.c  | 61 ++++++++++++++++++++++++++++++++---------------
 2 files changed, 48 insertions(+), 19 deletions(-)

diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 67994c9c248f..a6d2b943a148 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -139,6 +139,12 @@ ToDo/Notes:
 	  and ntfs_mapping_pairs_build() to allow the runlist encoding to be
 	  partial which is desirable when filling holes in sparse attributes.
 	  Update all callers.
+	- Change ntfs_map_runlist_nolock() to only decompress the mapping pairs
+	  if the requested vcn is inside it.  Otherwise we get into problems
+	  when we try to map an out of bounds vcn because we then try to map
+	  the already mapped runlist fragment which causes
+	  ntfs_mapping_pairs_decompress() to fail and return error.  Update
+	  ntfs_attr_find_vcn_nolock() accordingly.
 
 2.1.22 - Many bug and race fixes and error handling improvements.
 
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index c6b2bb64d651..543d47fa5fc9 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -39,15 +39,19 @@
  *
  * Map the part of a runlist containing the @vcn of the ntfs inode @ni.
  *
- * Return 0 on success and -errno on error.
+ * Return 0 on success and -errno on error.  There is one special error code
+ * which is not an error as such.  This is -ENOENT.  It means that @vcn is out
+ * of bounds of the runlist.
  *
  * Locking: - The runlist must be locked for writing.
  *	    - This function modifies the runlist.
  */
 int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn)
 {
+	VCN end_vcn;
 	ntfs_inode *base_ni;
-	MFT_RECORD *mrec;
+	MFT_RECORD *m;
+	ATTR_RECORD *a;
 	ntfs_attr_search_ctx *ctx;
 	runlist_element *rl;
 	int err = 0;
@@ -58,26 +62,43 @@ int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn)
 		base_ni = ni;
 	else
 		base_ni = ni->ext.base_ntfs_ino;
-	mrec = map_mft_record(base_ni);
-	if (IS_ERR(mrec))
-		return PTR_ERR(mrec);
-	ctx = ntfs_attr_get_search_ctx(base_ni, mrec);
+	m = map_mft_record(base_ni);
+	if (IS_ERR(m))
+		return PTR_ERR(m);
+	ctx = ntfs_attr_get_search_ctx(base_ni, m);
 	if (unlikely(!ctx)) {
 		err = -ENOMEM;
 		goto err_out;
 	}
 	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
 			CASE_SENSITIVE, vcn, NULL, 0, ctx);
-	if (likely(!err)) {
-		rl = ntfs_mapping_pairs_decompress(ni->vol, ctx->attr,
-				ni->runlist.rl);
-		if (IS_ERR(rl))
-			err = PTR_ERR(rl);
-		else
-			ni->runlist.rl = rl;
+	if (unlikely(err)) {
+		if (err == -ENOENT)
+			err = -EIO;
+		goto err_out;
 	}
-	ntfs_attr_put_search_ctx(ctx);
+	a = ctx->attr;
+	/*
+	 * Only decompress the mapping pairs if @vcn is inside it.  Otherwise
+	 * we get into problems when we try to map an out of bounds vcn because
+	 * we then try to map the already mapped runlist fragment and
+	 * ntfs_mapping_pairs_decompress() fails.
+	 */
+	end_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn) + 1;
+	if (unlikely(!a->data.non_resident.lowest_vcn && end_vcn <= 1))
+		end_vcn = ni->allocated_size >> ni->vol->cluster_size_bits;
+	if (unlikely(vcn >= end_vcn)) {
+		err = -ENOENT;
+		goto err_out;
+	}
+	rl = ntfs_mapping_pairs_decompress(ni->vol, a, ni->runlist.rl);
+	if (IS_ERR(rl))
+		err = PTR_ERR(rl);
+	else
+		ni->runlist.rl = rl;
 err_out:
+	if (likely(ctx))
+		ntfs_attr_put_search_ctx(ctx);
 	unmap_mft_record(base_ni);
 	return err;
 }
@@ -89,7 +110,9 @@ err_out:
  *
  * Map the part of a runlist containing the @vcn of the ntfs inode @ni.
  *
- * Return 0 on success and -errno on error.
+ * Return 0 on success and -errno on error.  There is one special error code
+ * which is not an error as such.  This is -ENOENT.  It means that @vcn is out
+ * of bounds of the runlist.
  *
  * Locking: - The runlist must be unlocked on entry and is unlocked on return.
  *	    - This function takes the runlist lock for writing and modifies the
@@ -287,11 +310,11 @@ retry_remap:
 			goto retry_remap;
 		}
 		/*
-		 * -EINVAL and -ENOENT coming from a failed mapping attempt are
-		 * equivalent to i/o errors for us as they should not happen in
-		 * our code paths.
+		 * -EINVAL coming from a failed mapping attempt is equivalent
+		 * to i/o error for us as it should not happen in our code
+		 * paths.
 		 */
-		if (err == -EINVAL || err == -ENOENT)
+		if (err == -EINVAL)
 			err = -EIO;
 	} else if (!err)
 		err = -EIO;

From af859a42d798f047fbfe198ed315a942662c39d2 Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Sat, 25 Jun 2005 21:07:27 +0100
Subject: [PATCH 046/113] NTFS: Prepare for 2.1.23 release: Update
 documentation and bump version.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 Documentation/filesystems/ntfs.txt | 15 +++++++++++++++
 fs/ntfs/ChangeLog                  | 25 ++++++-------------------
 fs/ntfs/Makefile                   |  2 +-
 fs/ntfs/attrib.c                   |  2 +-
 4 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/Documentation/filesystems/ntfs.txt b/Documentation/filesystems/ntfs.txt
index cb3cb8c06e9d..1415b96ed491 100644
--- a/Documentation/filesystems/ntfs.txt
+++ b/Documentation/filesystems/ntfs.txt
@@ -439,6 +439,21 @@ ChangeLog
 
 Note, a technical ChangeLog aimed at kernel hackers is in fs/ntfs/ChangeLog.
 
+2.1.23:
+	- Stamp the user space journal, aka transaction log, aka $UsnJrnl, if
+	  it is present and active thus telling Windows and applications using
+	  the transaction log that changes can have happened on the volume
+	  which are not recorded in $UsnJrnl.
+	- Detect the case when Windows has been hibernated (suspended to disk)
+	  and if this is the case do not allow (re)mounting read-write to
+	  prevent data corruption when you boot back into the suspended
+	  Windows session.
+	- Implement extension of resident files using the normal file write
+	  code paths, i.e. most very small files can be extended to be a little
+	  bit bigger but not by much.
+	- Improve handling of ntfs volumes with errors and strange boot sectors
+	  in particular.
+	- Fix various bugs.
 2.1.22:
 	- Improve handling of ntfs volumes with errors.
 	- Fix various bugs and race conditions.
diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index a6d2b943a148..3d2cac4061d6 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -22,35 +22,22 @@ ToDo/Notes:
 	- Enable the code for setting the NT4 compatibility flag when we start
 	  making NTFS 1.2 specific modifications.
 
-2.1.23-WIP
+2.1.23 - Implement extension of resident files and make writing safe as well as
+	 many bug fixes, cleanups, and enhancements...
 
 	- Add printk rate limiting for ntfs_warning() and ntfs_error() when
 	  compiled without debug.  This avoids a possible denial of service
 	  attack.  Thanks to Carl-Daniel Hailfinger from SuSE for pointing this
 	  out.
 	- Fix compilation warnings on ia64.  (Randy Dunlap)
-	- Use i_size_read() in fs/ntfs/attrib.c::ntfs_attr_set().
-	- Use i_size_read() in fs/ntfs/logfile.c::ntfs_{check,empty}_logfile().
-	- Use i_size_read() once and then use the cached value in
-	  fs/ntfs/lcnalloc.c::ntfs_cluster_alloc().
-	- Use i_size_read() in fs/ntfs/file.c::ntfs_file_open().
+	- Use i_size_{read,write}() instead of reading i_size by hand and cache
+	  the value where apropriate.
 	- Add size_lock to the ntfs_inode structure.  This is an rw spinlock
 	  and it locks against access to the inode sizes.  Note, ->size_lock
 	  is also accessed from irq context so you must use the _irqsave and
-	  _irqrestore lock and unlock functions, respectively.
-	- Use i_size_read() in fs/ntfs/compress.c at the start of the read and
-	  use the cached value afterwards.  Cache the initialized_size in the
-	  same way and protect access to the two sizes using the size_lock.
-	- Use i_size_read() in fs/ntfs/dir.c once and then use the cached
-	  value afterwards.
-	- Use i_size_read() in fs/ntfs/super.c once and then use the cached
-	  value afterwards.  Cache the initialized_size in the same way and
-	  protect access to the two sizes using the size_lock.
+	  _irqrestore lock and unlock functions, respectively.  Protect all
+	  accesses to allocated_size, initialized_size, and compressed_size.
 	- Minor optimization to fs/ntfs/super.c::ntfs_statfs() and its helpers.
-	- Use i_size_read() in fs/ntfs/inode.c once and then use the cached
-	  value afterwards when reading the size of the bitmap inode.
-	- Use i_size_{read,write}() in fs/ntfs/{aops.c,mft.c} and protect
-	  access to the i_size and other size fields using the size_lock.
 	- Implement extension of resident files in the regular file write code
 	  paths (fs/ntfs/aops.c::ntfs_{prepare,commit}_write()).  At present
 	  this only works until the data attribute becomes too big for the mft
diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile
index 59f9606a82a1..f083f27d8b69 100644
--- a/fs/ntfs/Makefile
+++ b/fs/ntfs/Makefile
@@ -6,7 +6,7 @@ ntfs-objs := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \
 	     index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \
 	     unistr.o upcase.o
 
-EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.23-WIP\"
+EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.23\"
 
 ifeq ($(CONFIG_NTFS_DEBUG),y)
 EXTRA_CFLAGS += -DDEBUG
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index 543d47fa5fc9..cd0f9e740b14 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -1324,7 +1324,7 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
 		if (IS_ERR(rl)) {
 			err = PTR_ERR(rl);
 			ntfs_debug("Failed to allocate cluster%s, error code "
-					"%i.\n", (new_size >>
+					"%i.", (new_size >>
 					vol->cluster_size_bits) > 1 ? "s" : "",
 					err);
 			goto page_err_out;

From ba6d2377c85c9b8a793f455d8c9b6cf31985d70f Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cantab.net>
Date: Sun, 26 Jun 2005 22:12:02 +0100
Subject: [PATCH 047/113] NTFS: Fix a nasty deadlock that appeared in recent
 kernels.       The situation: VFS inode X on a mounted ntfs volume is dirty. 
 For       same inode X, the ntfs_inode is dirty and thus corresponding
 on-disk       inode, i.e. mft record, which is in a dirty PAGE_CACHE_PAGE
 belonging       to the table of inodes, i.e. $MFT, inode 0.       What
 happens:       Process 1: sys_sync()/umount()/whatever...  calls      
 __sync_single_inode() for $MFT -> do_writepages() -> write_page for       the
 dirty page containing the on-disk inode X, the page is now locked       ->
 ntfs_write_mst_block() which clears PageUptodate() on the page to      
 prevent anyone else getting hold of it whilst it does the write out.      
 This is necessary as the on-disk inode needs "fixups" applied before      
 the write to disk which are removed again after the write and      
 PageUptodate is then set again.  It then analyses the page looking       for
 dirty on-disk inodes and when it finds one it calls      
 ntfs_may_write_mft_record() to see if it is safe to write this       on-disk
 inode.  This then calls ilookup5() to check if the       corresponding VFS
 inode is in icache().  This in turn calls ifind()       which waits on the
 inode lock via wait_on_inode whilst holding the       global inode_lock.     
  Process 2: pdflush results in a call to __sync_single_inode for the      
 same VFS inode X on the ntfs volume.  This locks the inode (I_LOCK)      
 then calls write-inode -> ntfs_write_inode -> map_mft_record() ->      
 read_cache_page() for the page (in page cache of table of inodes       $MFT,
 inode 0) containing the on-disk inode.  This page has       PageUptodate()
 clear because of Process 1 (see above) so       read_cache_page() blocks when
 it tries to take the page lock for the       page so it can call
 ntfs_read_page().       Thus Process 1 is holding the page lock on the page
 containing the       on-disk inode X and it is waiting on the inode X to be
 unlocked in       ifind() so it can write the page out and then unlock the
 page.       And Process 2 is holding the inode lock on inode X and is waiting
 for       the page to be unlocked so it can call ntfs_readpage() or discover 
      that Process 1 set PageUptodate() again and use the page.       Thus we
 have a deadlock due to ifind() waiting on the inode lock.       The solution:
 The fix is to use the newly introduced       ilookup5_nowait() which does not
 wait on the inode's lock and hence       avoids the deadlock.  This is safe
 as we do not care about the VFS       inode and only use the fact that it is
 in the VFS inode cache and the       fact that the vfs and ntfs inodes are
 one struct in memory to find       the ntfs inode in memory if present. 
 Also, the ntfs inode has its       own locking so it does not matter if the
 vfs inode is locked.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
---
 Documentation/filesystems/ntfs.txt |  5 +++-
 fs/ntfs/ChangeLog                  | 42 ++++++++++++++++++++++++++++++
 fs/ntfs/mft.c                      | 29 ++++++++++++++-------
 3 files changed, 65 insertions(+), 11 deletions(-)

diff --git a/Documentation/filesystems/ntfs.txt b/Documentation/filesystems/ntfs.txt
index 1415b96ed491..eef4aca0c753 100644
--- a/Documentation/filesystems/ntfs.txt
+++ b/Documentation/filesystems/ntfs.txt
@@ -451,9 +451,12 @@ Note, a technical ChangeLog aimed at kernel hackers is in fs/ntfs/ChangeLog.
 	- Implement extension of resident files using the normal file write
 	  code paths, i.e. most very small files can be extended to be a little
 	  bit bigger but not by much.
+	- Add new mount option "disable_sparse".  (See list of mount options
+	  above for details.)
 	- Improve handling of ntfs volumes with errors and strange boot sectors
 	  in particular.
-	- Fix various bugs.
+	- Fix various bugs including a nasty deadlock that appeared in recent
+	  kernels (around 2.6.11-2.6.12 timeframe).
 2.1.22:
 	- Improve handling of ntfs volumes with errors.
 	- Fix various bugs and race conditions.
diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 3d2cac4061d6..9709fac6531d 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -132,6 +132,48 @@ ToDo/Notes:
 	  the already mapped runlist fragment which causes
 	  ntfs_mapping_pairs_decompress() to fail and return error.  Update
 	  ntfs_attr_find_vcn_nolock() accordingly.
+	- Fix a nasty deadlock that appeared in recent kernels.
+	  The situation: VFS inode X on a mounted ntfs volume is dirty.  For
+	  same inode X, the ntfs_inode is dirty and thus corresponding on-disk
+	  inode, i.e. mft record, which is in a dirty PAGE_CACHE_PAGE belonging
+	  to the table of inodes, i.e. $MFT, inode 0.
+	  What happens:
+	  Process 1: sys_sync()/umount()/whatever...  calls
+	  __sync_single_inode() for $MFT -> do_writepages() -> write_page for
+	  the dirty page containing the on-disk inode X, the page is now locked
+	  -> ntfs_write_mst_block() which clears PageUptodate() on the page to
+	  prevent anyone else getting hold of it whilst it does the write out.
+	  This is necessary as the on-disk inode needs "fixups" applied before
+	  the write to disk which are removed again after the write and
+	  PageUptodate is then set again.  It then analyses the page looking
+	  for dirty on-disk inodes and when it finds one it calls
+	  ntfs_may_write_mft_record() to see if it is safe to write this
+	  on-disk inode.  This then calls ilookup5() to check if the
+	  corresponding VFS inode is in icache().  This in turn calls ifind()
+	  which waits on the inode lock via wait_on_inode whilst holding the
+	  global inode_lock.
+	  Process 2: pdflush results in a call to __sync_single_inode for the
+	  same VFS inode X on the ntfs volume.  This locks the inode (I_LOCK)
+	  then calls write-inode -> ntfs_write_inode -> map_mft_record() ->
+	  read_cache_page() for the page (in page cache of table of inodes
+	  $MFT, inode 0) containing the on-disk inode.  This page has
+	  PageUptodate() clear because of Process 1 (see above) so
+	  read_cache_page() blocks when it tries to take the page lock for the
+	  page so it can call ntfs_read_page().
+	  Thus Process 1 is holding the page lock on the page containing the
+	  on-disk inode X and it is waiting on the inode X to be unlocked in
+	  ifind() so it can write the page out and then unlock the page.
+	  And Process 2 is holding the inode lock on inode X and is waiting for
+	  the page to be unlocked so it can call ntfs_readpage() or discover
+	  that Process 1 set PageUptodate() again and use the page.
+	  Thus we have a deadlock due to ifind() waiting on the inode lock.
+	  The solution: The fix is to use the newly introduced
+	  ilookup5_nowait() which does not wait on the inode's lock and hence
+	  avoids the deadlock.  This is safe as we do not care about the VFS
+	  inode and only use the fact that it is in the VFS inode cache and the
+	  fact that the vfs and ntfs inodes are one struct in memory to find
+	  the ntfs inode in memory if present.  Also, the ntfs inode has its
+	  own locking so it does not matter if the vfs inode is locked.
 
 2.1.22 - Many bug and race fixes and error handling improvements.
 
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 3d0ba8e60adc..ac9ff39aa834 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -948,20 +948,23 @@ BOOL ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no,
 	na.name_len = 0;
 	na.type = AT_UNUSED;
 	/*
-	 * For inode 0, i.e. $MFT itself, we cannot use ilookup5() from here or
-	 * we deadlock because the inode is already locked by the kernel
-	 * (fs/fs-writeback.c::__sync_single_inode()) and ilookup5() waits
-	 * until the inode is unlocked before returning it and it never gets
-	 * unlocked because ntfs_should_write_mft_record() never returns.  )-:
-	 * Fortunately, we have inode 0 pinned in icache for the duration of
-	 * the mount so we can access it directly.
+	 * Optimize inode 0, i.e. $MFT itself, since we have it in memory and
+	 * we get here for it rather often.
 	 */
 	if (!mft_no) {
 		/* Balance the below iput(). */
 		vi = igrab(mft_vi);
 		BUG_ON(vi != mft_vi);
-	} else
-		vi = ilookup5(sb, mft_no, (test_t)ntfs_test_inode, &na);
+	} else {
+		/*
+		 * Have to use ilookup5_nowait() since ilookup5() waits for the
+		 * inode lock which causes ntfs to deadlock when a concurrent
+		 * inode write via the inode dirty code paths and the page
+		 * dirty code path of the inode dirty code path when writing
+		 * $MFT occurs.
+		 */
+		vi = ilookup5_nowait(sb, mft_no, (test_t)ntfs_test_inode, &na);
+	}
 	if (vi) {
 		ntfs_debug("Base inode 0x%lx is in icache.", mft_no);
 		/* The inode is in icache. */
@@ -1016,7 +1019,13 @@ BOOL ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no,
 	na.mft_no = MREF_LE(m->base_mft_record);
 	ntfs_debug("Mft record 0x%lx is an extent record.  Looking for base "
 			"inode 0x%lx in icache.", mft_no, na.mft_no);
-	vi = ilookup5(sb, na.mft_no, (test_t)ntfs_test_inode, &na);
+	if (!na.mft_no) {
+		/* Balance the below iput(). */
+		vi = igrab(mft_vi);
+		BUG_ON(vi != mft_vi);
+	} else
+		vi = ilookup5_nowait(sb, na.mft_no, (test_t)ntfs_test_inode,
+				&na);
 	if (!vi) {
 		/*
 		 * The base inode is not in icache, write this extent mft

From b38a3ab3d1bb0dc3288f73903d4dc4672b5cd2d0 Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@austin.ibm.com>
Date: Mon, 27 Jun 2005 15:35:37 -0500
Subject: [PATCH 048/113] JFS: Code cleanup - getting rid of never-used debug
 code

I'm finally getting around to cleaning out debug code that I've never used.
There has always been code ifdef'ed out by _JFS_DEBUG_DMAP, _JFS_DEBUG_IMAP,
_JFS_DEBUG_DTREE, and _JFS_DEBUG_XTREE, which I have personally never used,
and I doubt that anyone has since the design stage back in OS/2.  There is
also a function, xtGather, that has never been used, and I don't know why it
was ever there.

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
 fs/jfs/jfs_dmap.c  | 298 ++-------------------------------------
 fs/jfs/jfs_dtree.c | 199 --------------------------
 fs/jfs/jfs_dtree.h |   7 -
 fs/jfs/jfs_imap.c  | 105 --------------
 fs/jfs/jfs_xtree.c | 340 ---------------------------------------------
 fs/jfs/jfs_xtree.h |   6 -
 6 files changed, 8 insertions(+), 947 deletions(-)

diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index cced2fed9d0f..0732f206ca60 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -25,36 +25,6 @@
 #include "jfs_metapage.h"
 #include "jfs_debug.h"
 
-/*
- *	Debug code for double-checking block map
- */
-/* #define	_JFS_DEBUG_DMAP	1 */
-
-#ifdef	_JFS_DEBUG_DMAP
-#define DBINITMAP(size,ipbmap,results) \
-	DBinitmap(size,ipbmap,results)
-#define DBALLOC(dbmap,mapsize,blkno,nblocks) \
-	DBAlloc(dbmap,mapsize,blkno,nblocks)
-#define DBFREE(dbmap,mapsize,blkno,nblocks) \
-	DBFree(dbmap,mapsize,blkno,nblocks)
-#define DBALLOCCK(dbmap,mapsize,blkno,nblocks) \
-	DBAllocCK(dbmap,mapsize,blkno,nblocks)
-#define DBFREECK(dbmap,mapsize,blkno,nblocks) \
-	DBFreeCK(dbmap,mapsize,blkno,nblocks)
-
-static void DBinitmap(s64, struct inode *, u32 **);
-static void DBAlloc(uint *, s64, s64, s64);
-static void DBFree(uint *, s64, s64, s64);
-static void DBAllocCK(uint *, s64, s64, s64);
-static void DBFreeCK(uint *, s64, s64, s64);
-#else
-#define DBINITMAP(size,ipbmap,results)
-#define DBALLOC(dbmap, mapsize, blkno, nblocks)
-#define DBFREE(dbmap, mapsize, blkno, nblocks)
-#define DBALLOCCK(dbmap, mapsize, blkno, nblocks)
-#define DBFREECK(dbmap, mapsize, blkno, nblocks)
-#endif				/* _JFS_DEBUG_DMAP */
-
 /*
  *	SERIALIZATION of the Block Allocation Map.
  *
@@ -242,7 +212,6 @@ int dbMount(struct inode *ipbmap)
 	JFS_SBI(ipbmap->i_sb)->bmap = bmp;
 
 	memset(bmp->db_active, 0, sizeof(bmp->db_active));
-	DBINITMAP(bmp->db_mapsize, ipbmap, &bmp->db_DBmap);
 
 	/*
 	 * allocate/initialize the bmap lock
@@ -407,16 +376,12 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks)
 		 */
 		nb = min(rem, BPERDMAP - (blkno & (BPERDMAP - 1)));
 
-		DBALLOCCK(bmp->db_DBmap, bmp->db_mapsize, blkno, nb);
-
 		/* free the blocks. */
 		if ((rc = dbFreeDmap(bmp, dp, blkno, nb))) {
 			release_metapage(mp);
 			IREAD_UNLOCK(ipbmap);
 			return (rc);
 		}
-
-		DBFREE(bmp->db_DBmap, bmp->db_mapsize, blkno, nb);
 	}
 
 	/* write the last buffer. */
@@ -775,10 +740,6 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
 		IWRITE_LOCK(ipbmap);
 
 		rc = dbAllocAny(bmp, nblocks, l2nb, results);
-		if (rc == 0) {
-			DBALLOC(bmp->db_DBmap, bmp->db_mapsize, *results,
-				nblocks);
-		}
 
 		goto write_unlock;
 	}
@@ -836,8 +797,6 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
 		    != -ENOSPC) {
 			if (rc == 0) {
 				*results = blkno;
-				DBALLOC(bmp->db_DBmap, bmp->db_mapsize,
-					*results, nblocks);
 				mark_metapage_dirty(mp);
 			}
 
@@ -863,11 +822,8 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
 		if ((rc =
 		     dbAllocNear(bmp, dp, blkno, (int) nblocks, l2nb, results))
 		    != -ENOSPC) {
-			if (rc == 0) {
-				DBALLOC(bmp->db_DBmap, bmp->db_mapsize,
-					*results, nblocks);
+			if (rc == 0)
 				mark_metapage_dirty(mp);
-			}
 
 			release_metapage(mp);
 			goto read_unlock;
@@ -878,11 +834,8 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
 		 */
 		if ((rc = dbAllocDmapLev(bmp, dp, (int) nblocks, l2nb, results))
 		    != -ENOSPC) {
-			if (rc == 0) {
-				DBALLOC(bmp->db_DBmap, bmp->db_mapsize,
-					*results, nblocks);
+			if (rc == 0)
 				mark_metapage_dirty(mp);
-			}
 
 			release_metapage(mp);
 			goto read_unlock;
@@ -896,13 +849,9 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
 	 * the same allocation group as the hint.
 	 */
 	IWRITE_LOCK(ipbmap);
-	if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results))
-	    != -ENOSPC) {
-		if (rc == 0)
-			DBALLOC(bmp->db_DBmap, bmp->db_mapsize,
-				*results, nblocks);
+	if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) != -ENOSPC)
 		goto write_unlock;
-	}
+
 	IWRITE_UNLOCK(ipbmap);
 
 
@@ -918,9 +867,6 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
 	 */
 	if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) == -ENOSPC)
 		rc = dbAllocAny(bmp, nblocks, l2nb, results);
-	if (rc == 0) {
-		DBALLOC(bmp->db_DBmap, bmp->db_mapsize, *results, nblocks);
-	}
 
       write_unlock:
 	IWRITE_UNLOCK(ipbmap);
@@ -992,10 +938,9 @@ int dbAllocExact(struct inode *ip, s64 blkno, int nblocks)
 
 	IREAD_UNLOCK(ipbmap);
 
-	if (rc == 0) {
-		DBALLOC(bmp->db_DBmap, bmp->db_mapsize, blkno, nblocks);
+	if (rc == 0)
 		mark_metapage_dirty(mp);
-	}
+
 	release_metapage(mp);
 
 	return (rc);
@@ -1144,7 +1089,6 @@ static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks)
 		return -EIO;
 	}
 
-	DBALLOCCK(bmp->db_DBmap, bmp->db_mapsize, blkno, nblocks);
 	dp = (struct dmap *) mp->data;
 
 	/* try to allocate the blocks immediately following the
@@ -1155,11 +1099,9 @@ static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks)
 	IREAD_UNLOCK(ipbmap);
 
 	/* were we successful ? */
-	if (rc == 0) {
-		DBALLOC(bmp->db_DBmap, bmp->db_mapsize, extblkno,
-			addnblocks);
+	if (rc == 0)
 		write_metapage(mp);
-	} else
+	else
 		/* we were not successful */
 		release_metapage(mp);
 
@@ -3185,16 +3127,12 @@ int dbAllocBottomUp(struct inode *ip, s64 blkno, s64 nblocks)
 		 */
 		nb = min(rem, BPERDMAP - (blkno & (BPERDMAP - 1)));
 
-		DBFREECK(bmp->db_DBmap, bmp->db_mapsize, blkno, nb);
-
 		/* allocate the blocks. */
 		if ((rc = dbAllocDmapBU(bmp, dp, blkno, nb))) {
 			release_metapage(mp);
 			IREAD_UNLOCK(ipbmap);
 			return (rc);
 		}
-
-		DBALLOC(bmp->db_DBmap, bmp->db_mapsize, blkno, nb);
 	}
 
 	/* write the last buffer. */
@@ -4041,223 +3979,3 @@ s64 dbMapFileSizeToMapSize(struct inode * ipbmap)
 
 	return (nblocks);
 }
-
-
-#ifdef	_JFS_DEBUG_DMAP
-/*
- *	DBinitmap()
- */
-static void DBinitmap(s64 size, struct inode *ipbmap, u32 ** results)
-{
-	int npages;
-	u32 *dbmap, *d;
-	int n;
-	s64 lblkno, cur_block;
-	struct dmap *dp;
-	struct metapage *mp;
-
-	npages = size / 32768;
-	npages += (size % 32768) ? 1 : 0;
-
-	dbmap = (u32 *) xmalloc(npages * 4096, L2PSIZE, kernel_heap);
-	if (dbmap == NULL)
-		BUG();	/* Not robust since this is only unused debug code */
-
-	for (n = 0, d = dbmap; n < npages; n++, d += 1024)
-		bzero(d, 4096);
-
-	/* Need to initialize from disk map pages
-	 */
-	for (d = dbmap, cur_block = 0; cur_block < size;
-	     cur_block += BPERDMAP, d += LPERDMAP) {
-		lblkno = BLKTODMAP(cur_block,
-				   JFS_SBI(ipbmap->i_sb)->bmap->
-				   db_l2nbperpage);
-		mp = read_metapage(ipbmap, lblkno, PSIZE, 0);
-		if (mp == NULL) {
-			jfs_error(ipbmap->i_sb,
-				  "DBinitmap: could not read disk map page");
-			continue;
-		}
-		dp = (struct dmap *) mp->data;
-
-		for (n = 0; n < LPERDMAP; n++)
-			d[n] = le32_to_cpu(dp->wmap[n]);
-
-		release_metapage(mp);
-	}
-
-	*results = dbmap;
-}
-
-
-/*
- *	DBAlloc()
- */
-void DBAlloc(uint * dbmap, s64 mapsize, s64 blkno, s64 nblocks)
-{
-	int word, nb, bitno;
-	u32 mask;
-
-	assert(blkno > 0 && blkno < mapsize);
-	assert(nblocks > 0 && nblocks <= mapsize);
-
-	assert(blkno + nblocks <= mapsize);
-
-	dbmap += (blkno / 32);
-	while (nblocks > 0) {
-		bitno = blkno & (32 - 1);
-		nb = min(nblocks, 32 - bitno);
-
-		mask = (0xffffffff << (32 - nb) >> bitno);
-		assert((mask & *dbmap) == 0);
-		*dbmap |= mask;
-
-		dbmap++;
-		blkno += nb;
-		nblocks -= nb;
-	}
-}
-
-
-/*
- *	DBFree()
- */
-static void DBFree(uint * dbmap, s64 mapsize, s64 blkno, s64 nblocks)
-{
-	int word, nb, bitno;
-	u32 mask;
-
-	assert(blkno > 0 && blkno < mapsize);
-	assert(nblocks > 0 && nblocks <= mapsize);
-
-	assert(blkno + nblocks <= mapsize);
-
-	dbmap += (blkno / 32);
-	while (nblocks > 0) {
-		bitno = blkno & (32 - 1);
-		nb = min(nblocks, 32 - bitno);
-
-		mask = (0xffffffff << (32 - nb) >> bitno);
-		assert((mask & *dbmap) == mask);
-		*dbmap &= ~mask;
-
-		dbmap++;
-		blkno += nb;
-		nblocks -= nb;
-	}
-}
-
-
-/*
- *	DBAllocCK()
- */
-static void DBAllocCK(uint * dbmap, s64 mapsize, s64 blkno, s64 nblocks)
-{
-	int word, nb, bitno;
-	u32 mask;
-
-	assert(blkno > 0 && blkno < mapsize);
-	assert(nblocks > 0 && nblocks <= mapsize);
-
-	assert(blkno + nblocks <= mapsize);
-
-	dbmap += (blkno / 32);
-	while (nblocks > 0) {
-		bitno = blkno & (32 - 1);
-		nb = min(nblocks, 32 - bitno);
-
-		mask = (0xffffffff << (32 - nb) >> bitno);
-		assert((mask & *dbmap) == mask);
-
-		dbmap++;
-		blkno += nb;
-		nblocks -= nb;
-	}
-}
-
-
-/*
- *	DBFreeCK()
- */
-static void DBFreeCK(uint * dbmap, s64 mapsize, s64 blkno, s64 nblocks)
-{
-	int word, nb, bitno;
-	u32 mask;
-
-	assert(blkno > 0 && blkno < mapsize);
-	assert(nblocks > 0 && nblocks <= mapsize);
-
-	assert(blkno + nblocks <= mapsize);
-
-	dbmap += (blkno / 32);
-	while (nblocks > 0) {
-		bitno = blkno & (32 - 1);
-		nb = min(nblocks, 32 - bitno);
-
-		mask = (0xffffffff << (32 - nb) >> bitno);
-		assert((mask & *dbmap) == 0);
-
-		dbmap++;
-		blkno += nb;
-		nblocks -= nb;
-	}
-}
-
-
-/*
- *	dbPrtMap()
- */
-static void dbPrtMap(struct bmap * bmp)
-{
-	printk("   mapsize:   %d%d\n", bmp->db_mapsize);
-	printk("   nfree:     %d%d\n", bmp->db_nfree);
-	printk("   numag:     %d\n", bmp->db_numag);
-	printk("   agsize:    %d%d\n", bmp->db_agsize);
-	printk("   agl2size:  %d\n", bmp->db_agl2size);
-	printk("   agwidth:   %d\n", bmp->db_agwidth);
-	printk("   agstart:   %d\n", bmp->db_agstart);
-	printk("   agheigth:  %d\n", bmp->db_agheigth);
-	printk("   aglevel:   %d\n", bmp->db_aglevel);
-	printk("   maxlevel:  %d\n", bmp->db_maxlevel);
-	printk("   maxag:     %d\n", bmp->db_maxag);
-	printk("   agpref:    %d\n", bmp->db_agpref);
-	printk("   l2nbppg:   %d\n", bmp->db_l2nbperpage);
-}
-
-
-/*
- *	dbPrtCtl()
- */
-static void dbPrtCtl(struct dmapctl * dcp)
-{
-	int i, j, n;
-
-	printk("   height:    %08x\n", le32_to_cpu(dcp->height));
-	printk("   leafidx:   %08x\n", le32_to_cpu(dcp->leafidx));
-	printk("   budmin:    %08x\n", dcp->budmin);
-	printk("   nleafs:    %08x\n", le32_to_cpu(dcp->nleafs));
-	printk("   l2nleafs:  %08x\n", le32_to_cpu(dcp->l2nleafs));
-
-	printk("\n Tree:\n");
-	for (i = 0; i < CTLLEAFIND; i += 8) {
-		n = min(8, CTLLEAFIND - i);
-
-		for (j = 0; j < n; j++)
-			printf("  [%03x]: %02x", i + j,
-			       (char) dcp->stree[i + j]);
-		printf("\n");
-	}
-
-	printk("\n Tree Leaves:\n");
-	for (i = 0; i < LPERCTL; i += 8) {
-		n = min(8, LPERCTL - i);
-
-		for (j = 0; j < n; j++)
-			printf("  [%03x]: %02x",
-			       i + j,
-			       (char) dcp->stree[i + j + CTLLEAFIND]);
-		printf("\n");
-	}
-}
-#endif				/* _JFS_DEBUG_DMAP */
diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c
index 8676aee3ae48..73b5fc7eda80 100644
--- a/fs/jfs/jfs_dtree.c
+++ b/fs/jfs/jfs_dtree.c
@@ -4554,202 +4554,3 @@ int dtModify(tid_t tid, struct inode *ip,
 
 	return 0;
 }
-
-#ifdef _JFS_DEBUG_DTREE
-/*
- *	dtDisplayTree()
- *
- * function: traverse forward
- */
-int dtDisplayTree(struct inode *ip)
-{
-	int rc;
-	struct metapage *mp;
-	dtpage_t *p;
-	s64 bn, pbn;
-	int index, lastindex, v, h;
-	pxd_t *xd;
-	struct btstack btstack;
-	struct btframe *btsp;
-	struct btframe *parent;
-	u8 *stbl;
-	int psize = 256;
-
-	printk("display B+-tree.\n");
-
-	/* clear stack */
-	btsp = btstack.stack;
-
-	/*
-	 * start with root
-	 *
-	 * root resides in the inode
-	 */
-	bn = 0;
-	v = h = 0;
-
-	/*
-	 * first access of each page:
-	 */
-      newPage:
-	DT_GETPAGE(ip, bn, mp, psize, p, rc);
-	if (rc)
-		return rc;
-
-	/* process entries forward from first index */
-	index = 0;
-	lastindex = p->header.nextindex - 1;
-
-	if (p->header.flag & BT_INTERNAL) {
-		/*
-		 * first access of each internal page
-		 */
-		printf("internal page ");
-		dtDisplayPage(ip, bn, p);
-
-		goto getChild;
-	} else {		/* (p->header.flag & BT_LEAF) */
-
-		/*
-		 * first access of each leaf page
-		 */
-		printf("leaf page ");
-		dtDisplayPage(ip, bn, p);
-
-		/*
-		 * process leaf page entries
-		 *
-		 for ( ; index <= lastindex; index++)
-		 {
-		 }
-		 */
-
-		/* unpin the leaf page */
-		DT_PUTPAGE(mp);
-	}
-
-	/*
-	 * go back up to the parent page
-	 */
-      getParent:
-	/* pop/restore parent entry for the current child page */
-	if ((parent = (btsp == btstack.stack ? NULL : --btsp)) == NULL)
-		/* current page must have been root */
-		return;
-
-	/*
-	 * parent page scan completed
-	 */
-	if ((index = parent->index) == (lastindex = parent->lastindex)) {
-		/* go back up to the parent page */
-		goto getParent;
-	}
-
-	/*
-	 * parent page has entries remaining
-	 */
-	/* get back the parent page */
-	bn = parent->bn;
-	/* v = parent->level; */
-	DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
-	if (rc)
-		return rc;
-
-	/* get next parent entry */
-	index++;
-
-	/*
-	 * internal page: go down to child page of current entry
-	 */
-      getChild:
-	/* push/save current parent entry for the child page */
-	btsp->bn = pbn = bn;
-	btsp->index = index;
-	btsp->lastindex = lastindex;
-	/* btsp->level = v; */
-	/* btsp->node = h; */
-	++btsp;
-
-	/* get current entry for the child page */
-	stbl = DT_GETSTBL(p);
-	xd = (pxd_t *) & p->slot[stbl[index]];
-
-	/*
-	 * first access of each internal entry:
-	 */
-
-	/* get child page */
-	bn = addressPXD(xd);
-	psize = lengthPXD(xd) << ip->i_ipmnt->i_l2bsize;
-
-	printk("traverse down 0x%Lx[%d]->0x%Lx\n", pbn, index, bn);
-	v++;
-	h = index;
-
-	/* release parent page */
-	DT_PUTPAGE(mp);
-
-	/* process the child page */
-	goto newPage;
-}
-
-
-/*
- *	dtDisplayPage()
- *
- * function: display page
- */
-int dtDisplayPage(struct inode *ip, s64 bn, dtpage_t * p)
-{
-	int rc;
-	struct metapage *mp;
-	struct ldtentry *lh;
-	struct idtentry *ih;
-	pxd_t *xd;
-	int i, j;
-	u8 *stbl;
-	wchar_t name[JFS_NAME_MAX + 1];
-	struct component_name key = { 0, name };
-	int freepage = 0;
-
-	if (p == NULL) {
-		freepage = 1;
-		DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
-		if (rc)
-			return rc;
-	}
-
-	/* display page control */
-	printk("bn:0x%Lx flag:0x%08x nextindex:%d\n",
-	       bn, p->header.flag, p->header.nextindex);
-
-	/* display entries */
-	stbl = DT_GETSTBL(p);
-	for (i = 0, j = 1; i < p->header.nextindex; i++, j++) {
-		dtGetKey(p, i, &key, JFS_SBI(ip->i_sb)->mntflag);
-		key.name[key.namlen] = '\0';
-		if (p->header.flag & BT_LEAF) {
-			lh = (struct ldtentry *) & p->slot[stbl[i]];
-			printf("\t[%d] %s:%d", i, key.name,
-			       le32_to_cpu(lh->inumber));
-		} else {
-			ih = (struct idtentry *) & p->slot[stbl[i]];
-			xd = (pxd_t *) ih;
-			bn = addressPXD(xd);
-			printf("\t[%d] %s:0x%Lx", i, key.name, bn);
-		}
-
-		if (j == 4) {
-			printf("\n");
-			j = 0;
-		}
-	}
-
-	printf("\n");
-
-	if (freepage)
-		DT_PUTPAGE(mp);
-
-	return 0;
-}
-#endif				/* _JFS_DEBUG_DTREE */
diff --git a/fs/jfs/jfs_dtree.h b/fs/jfs/jfs_dtree.h
index 273a80130c9d..13e4fdf07724 100644
--- a/fs/jfs/jfs_dtree.h
+++ b/fs/jfs/jfs_dtree.h
@@ -269,11 +269,4 @@ extern int dtModify(tid_t tid, struct inode *ip, struct component_name * key,
 		    ino_t * orig_ino, ino_t new_ino, int flag);
 
 extern int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir);
-
-#ifdef  _JFS_DEBUG_DTREE
-extern int dtDisplayTree(struct inode *ip);
-
-extern int dtDisplayPage(struct inode *ip, s64 bn, dtpage_t * p);
-#endif				/* _JFS_DEBUG_DTREE */
-
 #endif				/* !_H_JFS_DTREE */
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 971af2977eff..4021d46da7e3 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -86,25 +86,6 @@ static int diIAGRead(struct inomap * imap, int, struct metapage **);
 static int copy_from_dinode(struct dinode *, struct inode *);
 static void copy_to_dinode(struct dinode *, struct inode *);
 
-/*
- *	debug code for double-checking inode map
- */
-/* #define	_JFS_DEBUG_IMAP	1 */
-
-#ifdef	_JFS_DEBUG_IMAP
-#define DBG_DIINIT(imap)	DBGdiInit(imap)
-#define DBG_DIALLOC(imap, ino)	DBGdiAlloc(imap, ino)
-#define DBG_DIFREE(imap, ino)	DBGdiFree(imap, ino)
-
-static void *DBGdiInit(struct inomap * imap);
-static void DBGdiAlloc(struct inomap * imap, ino_t ino);
-static void DBGdiFree(struct inomap * imap, ino_t ino);
-#else
-#define DBG_DIINIT(imap)
-#define DBG_DIALLOC(imap, ino)
-#define DBG_DIFREE(imap, ino)
-#endif				/* _JFS_DEBUG_IMAP */
-
 /*
  * NAME:        diMount()
  *
@@ -188,8 +169,6 @@ int diMount(struct inode *ipimap)
 	imap->im_ipimap = ipimap;
 	JFS_IP(ipimap)->i_imap = imap;
 
-//      DBG_DIINIT(imap);
-
 	return (0);
 }
 
@@ -1043,7 +1022,6 @@ int diFree(struct inode *ip)
 		/* update the bitmap.
 		 */
 		iagp->wmap[extno] = cpu_to_le32(bitmap);
-		DBG_DIFREE(imap, inum);
 
 		/* update the free inode counts at the iag, ag and
 		 * map level.
@@ -1231,7 +1209,6 @@ int diFree(struct inode *ip)
 		jfs_error(ip->i_sb, "diFree: the pmap does not show inode free");
 	}
 	iagp->wmap[extno] = 0;
-	DBG_DIFREE(imap, inum);
 	PXDlength(&iagp->inoext[extno], 0);
 	PXDaddress(&iagp->inoext[extno], 0);
 
@@ -1350,7 +1327,6 @@ diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp)
 	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
 
 	ip->i_ino = (iagno << L2INOSPERIAG) + ino;
-	DBG_DIALLOC(JFS_IP(ipimap)->i_imap, ip->i_ino);
 	jfs_ip->ixpxd = iagp->inoext[extno];
 	jfs_ip->agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi);
 	jfs_ip->active_ag = -1;
@@ -3185,84 +3161,3 @@ static void copy_to_dinode(struct dinode * dip, struct inode *ip)
 	if (S_ISCHR(ip->i_mode) || S_ISBLK(ip->i_mode))
 		dip->di_rdev = cpu_to_le32(jfs_ip->dev);
 }
-
-#ifdef	_JFS_DEBUG_IMAP
-/*
- *	DBGdiInit()
- */
-static void *DBGdiInit(struct inomap * imap)
-{
-	u32 *dimap;
-	int size;
-	size = 64 * 1024;
-	if ((dimap = (u32 *) xmalloc(size, L2PSIZE, kernel_heap)) == NULL)
-		assert(0);
-	bzero((void *) dimap, size);
-	imap->im_DBGdimap = dimap;
-}
-
-/*
- *	DBGdiAlloc()
- */
-static void DBGdiAlloc(struct inomap * imap, ino_t ino)
-{
-	u32 *dimap = imap->im_DBGdimap;
-	int w, b;
-	u32 m;
-	w = ino >> 5;
-	b = ino & 31;
-	m = 0x80000000 >> b;
-	assert(w < 64 * 256);
-	if (dimap[w] & m) {
-		printk("DEBUG diAlloc: duplicate alloc ino:0x%x\n", ino);
-	}
-	dimap[w] |= m;
-}
-
-/*
- *	DBGdiFree()
- */
-static void DBGdiFree(struct inomap * imap, ino_t ino)
-{
-	u32 *dimap = imap->im_DBGdimap;
-	int w, b;
-	u32 m;
-	w = ino >> 5;
-	b = ino & 31;
-	m = 0x80000000 >> b;
-	assert(w < 64 * 256);
-	if ((dimap[w] & m) == 0) {
-		printk("DEBUG diFree: duplicate free ino:0x%x\n", ino);
-	}
-	dimap[w] &= ~m;
-}
-
-static void dump_cp(struct inomap * ipimap, char *function, int line)
-{
-	printk("\n* ********* *\nControl Page %s %d\n", function, line);
-	printk("FreeIAG %d\tNextIAG %d\n", ipimap->im_freeiag,
-	       ipimap->im_nextiag);
-	printk("NumInos %d\tNumFree %d\n",
-	       atomic_read(&ipimap->im_numinos),
-	       atomic_read(&ipimap->im_numfree));
-	printk("AG InoFree %d\tAG ExtFree %d\n",
-	       ipimap->im_agctl[0].inofree, ipimap->im_agctl[0].extfree);
-	printk("AG NumInos %d\tAG NumFree %d\n",
-	       ipimap->im_agctl[0].numinos, ipimap->im_agctl[0].numfree);
-}
-
-static void dump_iag(struct iag * iag, char *function, int line)
-{
-	printk("\n* ********* *\nIAG %s %d\n", function, line);
-	printk("IagNum %d\tIAG Free %d\n", le32_to_cpu(iag->iagnum),
-	       le32_to_cpu(iag->iagfree));
-	printk("InoFreeFwd %d\tInoFreeBack %d\n",
-	       le32_to_cpu(iag->inofreefwd),
-	       le32_to_cpu(iag->inofreeback));
-	printk("ExtFreeFwd %d\tExtFreeBack %d\n",
-	       le32_to_cpu(iag->extfreefwd),
-	       le32_to_cpu(iag->extfreeback));
-	printk("NFreeInos %d\tNFreeExts %d\n", le32_to_cpu(iag->nfreeinos),
-	       le32_to_cpu(iag->nfreeexts));
-}
-#endif				/* _JFS_DEBUG_IMAP */
diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c
index 31b34db4519e..a7fe2f2b969f 100644
--- a/fs/jfs/jfs_xtree.c
+++ b/fs/jfs/jfs_xtree.c
@@ -135,14 +135,6 @@ static int xtSearchNode(struct inode *ip,
 static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * fp);
 #endif				/*  _STILL_TO_PORT */
 
-/* External references */
-
-/*
- *      debug control
- */
-/*      #define _JFS_DEBUG_XTREE        1 */
-
-
 /*
  *      xtLookup()
  *
@@ -4140,338 +4132,6 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
 	return 0;
 }
 
-
-#ifdef _JFS_DEBUG_XTREE
-/*
- *      xtDisplayTree()
- *
- * function: traverse forward
- */
-int xtDisplayTree(struct inode *ip)
-{
-	int rc = 0;
-	struct metapage *mp;
-	xtpage_t *p;
-	s64 bn, pbn;
-	int index, lastindex, v, h;
-	xad_t *xad;
-	struct btstack btstack;
-	struct btframe *btsp;
-	struct btframe *parent;
-
-	printk("display B+-tree.\n");
-
-	/* clear stack */
-	btsp = btstack.stack;
-
-	/*
-	 * start with root
-	 *
-	 * root resides in the inode
-	 */
-	bn = 0;
-	v = h = 0;
-
-	/*
-	 * first access of each page:
-	 */
-      getPage:
-	XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
-	if (rc)
-		return rc;
-
-	/* process entries forward from first index */
-	index = XTENTRYSTART;
-	lastindex = le16_to_cpu(p->header.nextindex) - 1;
-
-	if (p->header.flag & BT_INTERNAL) {
-		/*
-		 * first access of each internal page
-		 */
-		goto getChild;
-	} else {		/* (p->header.flag & BT_LEAF) */
-
-		/*
-		 * first access of each leaf page
-		 */
-		printf("leaf page ");
-		xtDisplayPage(ip, bn, p);
-
-		/* unpin the leaf page */
-		XT_PUTPAGE(mp);
-	}
-
-	/*
-	 * go back up to the parent page
-	 */
-      getParent:
-	/* pop/restore parent entry for the current child page */
-	if ((parent = (btsp == btstack.stack ? NULL : --btsp)) == NULL)
-		/* current page must have been root */
-		return;
-
-	/*
-	 * parent page scan completed
-	 */
-	if ((index = parent->index) == (lastindex = parent->lastindex)) {
-		/* go back up to the parent page */
-		goto getParent;
-	}
-
-	/*
-	 * parent page has entries remaining
-	 */
-	/* get back the parent page */
-	bn = parent->bn;
-	/* v = parent->level; */
-	XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
-	if (rc)
-		return rc;
-
-	/* get next parent entry */
-	index++;
-
-	/*
-	 * internal page: go down to child page of current entry
-	 */
-      getChild:
-	/* push/save current parent entry for the child page */
-	btsp->bn = pbn = bn;
-	btsp->index = index;
-	btsp->lastindex = lastindex;
-	/* btsp->level = v; */
-	/* btsp->node = h; */
-	++btsp;
-
-	/* get child page */
-	xad = &p->xad[index];
-	bn = addressXAD(xad);
-
-	/*
-	 * first access of each internal entry:
-	 */
-	/* release parent page */
-	XT_PUTPAGE(mp);
-
-	printk("traverse down 0x%lx[%d]->0x%lx\n", (ulong) pbn, index,
-	       (ulong) bn);
-	v++;
-	h = index;
-
-	/* process the child page */
-	goto getPage;
-}
-
-
-/*
- *      xtDisplayPage()
- *
- * function: display page
- */
-int xtDisplayPage(struct inode *ip, s64 bn, xtpage_t * p)
-{
-	int rc = 0;
-	xad_t *xad;
-	s64 xaddr, xoff;
-	int xlen, i, j;
-
-	/* display page control */
-	printf("bn:0x%lx flag:0x%x nextindex:%d\n",
-	       (ulong) bn, p->header.flag,
-	       le16_to_cpu(p->header.nextindex));
-
-	/* display entries */
-	xad = &p->xad[XTENTRYSTART];
-		for (i = XTENTRYSTART, j = 1; i < le16_to_cpu(p->header.nextindex);
-		     i++, xad++, j++) {
-			xoff = offsetXAD(xad);
-			xaddr = addressXAD(xad);
-			xlen = lengthXAD(xad);
-			printf("\t[%d] 0x%lx:0x%lx(0x%x)", i, (ulong) xoff,
-			       (ulong) xaddr, xlen);
-
-			if (j == 4) {
-				printf("\n");
-				j = 0;
-		}
-	}
-
-	printf("\n");
-}
-#endif				/* _JFS_DEBUG_XTREE */
-
-
-#ifdef _JFS_WIP
-/*
- *      xtGather()
- *
- * function:
- *      traverse for allocation acquiring tlock at commit time
- *      (vs at the time of update) logging backward top down
- *
- * note:
- *      problem - establishing that all new allocation have been
- *      processed both for append and random write in sparse file
- *      at the current entry at the current subtree root page
- *
- */
-int xtGather(btree_t *t)
-{
-	int rc = 0;
-	xtpage_t *p;
-	u64 bn;
-	int index;
-	btentry_t *e;
-	struct btstack btstack;
-	struct btsf *parent;
-
-	/* clear stack */
-	BT_CLR(&btstack);
-
-	/*
-	 * start with root
-	 *
-	 * root resides in the inode
-	 */
-	bn = 0;
-	XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
-	if (rc)
-		return rc;
-
-	/* new root is NOT pointed by a new entry
-	   if (p->header.flag & NEW)
-	   allocate new page lock;
-	   write a NEWPAGE log;
-	 */
-
-      dopage:
-	/*
-	 * first access of each page:
-	 */
-	/* process entries backward from last index */
-	index = le16_to_cpu(p->header.nextindex) - 1;
-
-	if (p->header.flag & BT_LEAF) {
-		/*
-		 * first access of each leaf page
-		 */
-		/* process leaf page entries backward */
-		for (; index >= XTENTRYSTART; index--) {
-			e = &p->xad[index];
-			/*
-			 * if newpage, log NEWPAGE.
-			 *
-			 if (e->flag & XAD_NEW) {
-			 nfound =+ entry->length;
-			 update current page lock for the entry;
-			 newpage(entry);
-			 *
-			 * if moved, log move.
-			 *
-			 } else if (e->flag & XAD_MOVED) {
-			 reset flag;
-			 update current page lock for the entry;
-			 }
-			 */
-		}
-
-		/* unpin the leaf page */
-		XT_PUTPAGE(mp);
-
-		/*
-		 * go back up to the parent page
-		 */
-	      getParent:
-		/* restore parent entry for the current child page */
-		if ((parent = BT_POP(&btstack)) == NULL)
-			/* current page must have been root */
-			return 0;
-
-		if ((index = parent->index) == XTENTRYSTART) {
-			/*
-			 * parent page scan completed
-			 */
-			/* go back up to the parent page */
-			goto getParent;
-		} else {
-			/*
-			 * parent page has entries remaining
-			 */
-			/* get back the parent page */
-			bn = parent->bn;
-			XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
-			if (rc)
-				return -EIO;
-
-			/* first subroot page which
-			 * covers all new allocated blocks
-			 * itself not new/modified.
-			 * (if modified from split of descendent,
-			 * go down path of split page)
-
-			 if (nfound == nnew &&
-			 !(p->header.flag & (NEW | MOD)))
-			 exit scan;
-			 */
-
-			/* process parent page entries backward */
-			index--;
-		}
-	} else {
-		/*
-		 * first access of each internal page
-		 */
-	}
-
-	/*
-	 * internal page: go down to child page of current entry
-	 */
-
-	/* save current parent entry for the child page */
-	BT_PUSH(&btstack, bn, index);
-
-	/* get current entry for the child page */
-	e = &p->xad[index];
-
-	/*
-	 * first access of each internal entry:
-	 */
-	/*
-	 * if new entry, log btree_tnewentry.
-	 *
-	 if (e->flag & XAD_NEW)
-	 update parent page lock for the entry;
-	 */
-
-	/* release parent page */
-	XT_PUTPAGE(mp);
-
-	/* get child page */
-	bn = e->bn;
-	XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
-	if (rc)
-		return rc;
-
-	/*
-	 * first access of each non-root page:
-	 */
-	/*
-	 * if new, log btree_newpage.
-	 *
-	 if (p->header.flag & NEW)
-	 allocate new page lock;
-	 write a NEWPAGE log (next, prev);
-	 */
-
-	/* process the child page */
-	goto dopage;
-
-      out:
-	return 0;
-}
-#endif				/* _JFS_WIP */
-
-
 #ifdef CONFIG_JFS_STATISTICS
 int jfs_xtstat_read(char *buffer, char **start, off_t offset, int length,
 		    int *eof, void *data)
diff --git a/fs/jfs/jfs_xtree.h b/fs/jfs/jfs_xtree.h
index a69784254fe7..af668a80b40f 100644
--- a/fs/jfs/jfs_xtree.h
+++ b/fs/jfs/jfs_xtree.h
@@ -131,10 +131,4 @@ extern int xtRelocate(tid_t tid, struct inode *ip,
 extern int xtAppend(tid_t tid,
 		    struct inode *ip, int xflag, s64 xoff, int maxblocks,
 		    int *xlenp, s64 * xaddrp, int flag);
-
-#ifdef  _JFS_DEBUG_XTREE
-extern int xtDisplayTree(struct inode *ip);
-extern int xtDisplayPage(struct inode *ip, s64 bn, xtpage_t * p);
-#endif				/* _JFS_DEBUG_XTREE */
-
 #endif				/* !_H_JFS_XTREE */

From 7343c9ba8ed9d504cd2daf960da58971c6631601 Mon Sep 17 00:00:00 2001
From: Olaf Hering <olh@suse.de>
Date: Mon, 11 Jul 2005 10:17:06 +0100
Subject: [PATCH 049/113] [PATCH] ARM: remove linux/version.h include from
 arch/arm

Changing CONFIG_LOCALVERSION rebuilds too much, for no appearent reason.

Signed-off-by: Olaf Hering <olh@suse.de>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mach-omap1/leds-h2p2-debug.c | 1 -
 arch/arm/nwfpe/fpmodule.c             | 1 -
 arch/arm/plat-omap/ocpi.c             | 1 -
 3 files changed, 3 deletions(-)

diff --git a/arch/arm/mach-omap1/leds-h2p2-debug.c b/arch/arm/mach-omap1/leds-h2p2-debug.c
index 6e98290cca5c..ec0d8285f243 100644
--- a/arch/arm/mach-omap1/leds-h2p2-debug.c
+++ b/arch/arm/mach-omap1/leds-h2p2-debug.c
@@ -13,7 +13,6 @@
 #include <linux/init.h>
 #include <linux/kernel_stat.h>
 #include <linux/sched.h>
-#include <linux/version.h>
 
 #include <asm/io.h>
 #include <asm/hardware.h>
diff --git a/arch/arm/nwfpe/fpmodule.c b/arch/arm/nwfpe/fpmodule.c
index a806fea5c3ed..a8efcf34888e 100644
--- a/arch/arm/nwfpe/fpmodule.c
+++ b/arch/arm/nwfpe/fpmodule.c
@@ -24,7 +24,6 @@
 #include "fpa11.h"
 
 #include <linux/module.h>
-#include <linux/version.h>
 #include <linux/config.h>
 
 /* XXX */
diff --git a/arch/arm/plat-omap/ocpi.c b/arch/arm/plat-omap/ocpi.c
index 1fb16f9edfd5..2ede2ee8cae4 100644
--- a/arch/arm/plat-omap/ocpi.c
+++ b/arch/arm/plat-omap/ocpi.c
@@ -25,7 +25,6 @@
 
 #include <linux/config.h>
 #include <linux/module.h>
-#include <linux/version.h>
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>

From d12734d14e5602816f0b16b17a8cef5ea70afb5a Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Mon, 11 Jul 2005 17:38:36 +0100
Subject: [PATCH 050/113] [PATCH] ARM SMP: Rename cpu_present_mask to
 cpu_possible_map

The kernel's terminology for this is cpu_possible_map not
cpu_present_mask.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/smp.c              | 4 ++--
 arch/arm/mach-integrator/platsmp.c | 2 +-
 include/asm-arm/smp.h              | 3 ---
 3 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index a931409c8fe4..44a27b2a3c29 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -36,7 +36,7 @@
  * The present bitmask indicates that the CPU is physically present.
  * The online bitmask indicates that the CPU is up and running.
  */
-cpumask_t cpu_present_mask;
+cpumask_t cpu_possible_map;
 cpumask_t cpu_online_map;
 
 /*
@@ -235,7 +235,7 @@ void __init smp_prepare_boot_cpu(void)
 {
 	unsigned int cpu = smp_processor_id();
 
-	cpu_set(cpu, cpu_present_mask);
+	cpu_set(cpu, cpu_possible_map);
 	cpu_set(cpu, cpu_online_map);
 }
 
diff --git a/arch/arm/mach-integrator/platsmp.c b/arch/arm/mach-integrator/platsmp.c
index ead15dfcb53d..88f40a5189cd 100644
--- a/arch/arm/mach-integrator/platsmp.c
+++ b/arch/arm/mach-integrator/platsmp.c
@@ -178,7 +178,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	 * be present.
 	 */
 	for (i = 0; i < max_cpus; i++) {
-		cpu_set(i, cpu_present_mask);
+		cpu_set(i, cpu_possible_map);
 	}
 
 	/*
diff --git a/include/asm-arm/smp.h b/include/asm-arm/smp.h
index 6c6c60adbbaa..dbb4d859c586 100644
--- a/include/asm-arm/smp.h
+++ b/include/asm-arm/smp.h
@@ -23,9 +23,6 @@
 
 #define raw_smp_processor_id() (current_thread_info()->cpu)
 
-extern cpumask_t cpu_present_mask;
-#define cpu_possible_map cpu_present_mask
-
 /*
  * at the moment, there's not a big penalty for changing CPUs
  * (the >big< penalty is running SMP in the first place)

From e11b2236eace94ad9a2e421904742e83976405ed Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Mon, 11 Jul 2005 19:26:31 +0100
Subject: [PATCH 051/113] [PATCH] ARM SMP: We list IRQs for present CPUs, not
 online CPUs

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/smp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 44a27b2a3c29..78c24a1266b1 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -355,7 +355,7 @@ void show_ipi_list(struct seq_file *p)
 
 	seq_puts(p, "IPI:");
 
-	for_each_online_cpu(cpu)
+	for_each_present_cpu(cpu)
 		seq_printf(p, " %10lu", per_cpu(ipi_data, cpu).ipi_count);
 
 	seq_putc(p, '\n');

From 73eb7d9e8cfd16813eec94d0ec8fa2a5262a85cc Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Mon, 11 Jul 2005 19:42:58 +0100
Subject: [PATCH 052/113] [PATCH] ARM SMP: Initialise cpu_present_map

Rather than relying on the fixup code in init/main.c, explicitly
initialise cpu_present_map.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/smp.c              | 1 +
 arch/arm/mach-integrator/platsmp.c | 6 ++++--
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 78c24a1266b1..7ae45c3fc834 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -236,6 +236,7 @@ void __init smp_prepare_boot_cpu(void)
 	unsigned int cpu = smp_processor_id();
 
 	cpu_set(cpu, cpu_possible_map);
+	cpu_set(cpu, cpu_present_map);
 	cpu_set(cpu, cpu_online_map);
 }
 
diff --git a/arch/arm/mach-integrator/platsmp.c b/arch/arm/mach-integrator/platsmp.c
index 88f40a5189cd..2ba025777098 100644
--- a/arch/arm/mach-integrator/platsmp.c
+++ b/arch/arm/mach-integrator/platsmp.c
@@ -174,11 +174,13 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 		max_cpus = ncores;
 
 	/*
-	 * Initialise the present mask - this tells us which CPUs should
-	 * be present.
+	 * Initialise the possible/present maps.
+	 * cpu_possible_map describes the set of CPUs which may be present
+	 * cpu_present_map describes the set of CPUs populated
 	 */
 	for (i = 0; i < max_cpus; i++) {
 		cpu_set(i, cpu_possible_map);
+		cpu_set(i, cpu_present_map);
 	}
 
 	/*

From 1ca70351af02b1f0eb9cd2e7eb7a547f8ad5d893 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@tglx.tec.linutronix.de>
Date: Tue, 12 Jul 2005 17:51:06 +0200
Subject: [PATCH 053/113] [MTD] Make XIP support depend on CONFIG_ARM

ARM is the only known user of this at the moment.
Prevent allyes builds for other archs from failing

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/mtd/chips/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/chips/Kconfig b/drivers/mtd/chips/Kconfig
index b5dc59389bb3..df95d2158b16 100644
--- a/drivers/mtd/chips/Kconfig
+++ b/drivers/mtd/chips/Kconfig
@@ -300,7 +300,7 @@ config MTD_JEDEC
 
 config MTD_XIP
 	bool "XIP aware MTD support"
-	depends on !SMP && (MTD_CFI_INTELEXT || MTD_CFI_AMDSTD) && EXPERIMENTAL
+	depends on !SMP && (MTD_CFI_INTELEXT || MTD_CFI_AMDSTD) && EXPERIMENTAL && ARM
 	default y if XIP_KERNEL
 	help
 	  This allows MTD support to work with flash memory which is also

From 514604c6d1779c55d3e97dc4e9a71c117c1ccbcd Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Thu, 7 Jul 2005 16:59:00 -0700
Subject: [PATCH 054/113] [IA64] pcibus_to_node implementation for IA64

pcibus_to_node provides a way for the Linux kernel to identify to which
node a certain pcibus connects to. Allocations of control structures
for devices can then be made on the node where the pci bus is located
to allow local access during interrupt and other device manipulation.

This patch provides a new "node" field in the the pci_controller
structure. The node field will be set based on ACPI information (thanks
to Alex Williamson  <alex.williamson@hp.com for that piece).

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/pci/pci.c         | 12 ++++++++++--
 include/asm-ia64/pci.h      |  1 +
 include/asm-ia64/topology.h |  5 +++++
 3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index 720a861f88be..54d9ed444e4a 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -157,6 +157,7 @@ alloc_pci_controller (int seg)
 
 	memset(controller, 0, sizeof(*controller));
 	controller->segment = seg;
+	controller->node = -1;
 	return controller;
 }
 
@@ -288,6 +289,7 @@ pci_acpi_scan_root(struct acpi_device *device, int domain, int bus)
 	unsigned int windows = 0;
 	struct pci_bus *pbus;
 	char *name;
+	int pxm;
 
 	controller = alloc_pci_controller(domain);
 	if (!controller)
@@ -295,10 +297,16 @@ pci_acpi_scan_root(struct acpi_device *device, int domain, int bus)
 
 	controller->acpi_handle = device->handle;
 
+	pxm = acpi_get_pxm(controller->acpi_handle);
+#ifdef CONFIG_NUMA
+	if (pxm >= 0)
+		controller->node = pxm_to_nid_map[pxm];
+#endif
+
 	acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_window,
 			&windows);
-	controller->window = kmalloc(sizeof(*controller->window) * windows,
-			GFP_KERNEL);
+	controller->window = kmalloc_node(sizeof(*controller->window) * windows,
+			GFP_KERNEL, controller->node);
 	if (!controller->window)
 		goto out2;
 
diff --git a/include/asm-ia64/pci.h b/include/asm-ia64/pci.h
index 0c4c5d801d3f..8b7237cc84c1 100644
--- a/include/asm-ia64/pci.h
+++ b/include/asm-ia64/pci.h
@@ -128,6 +128,7 @@ struct pci_controller {
 	void *acpi_handle;
 	void *iommu;
 	int segment;
+	int node;		/* nearest node with memory or -1 for global allocation */
 
 	unsigned int windows;
 	struct pci_window *window;
diff --git a/include/asm-ia64/topology.h b/include/asm-ia64/topology.h
index 4e64c2a6b369..399bc29729fd 100644
--- a/include/asm-ia64/topology.h
+++ b/include/asm-ia64/topology.h
@@ -40,6 +40,11 @@
  */
 #define node_to_first_cpu(node) (__ffs(node_to_cpumask(node)))
 
+/*
+ * Determines the node for a given pci bus
+ */
+#define pcibus_to_node(bus) PCI_CONTROLLER(bus)->node
+
 void build_cpu_to_node_map(void);
 
 #define SD_CPU_INIT (struct sched_domain) {		\

From 60a762b6a6dec17cc4339b60154902fd04c2f9f2 Mon Sep 17 00:00:00 2001
From: Greg Edwards <edwardsg@sgi.com>
Date: Tue, 28 Jun 2005 11:13:00 -0700
Subject: [PATCH 055/113] [IA64] remove CONFIG_IA64_SGI_SN_SIM

This patch removes the CONFIG_IA64_SGI_SN_SIM option entirely, allowing
any kernel bootable on sn2 to also be booted in the simulator.

Boot tested on Altix and HP rx2600.

Signed-off-by: Greg Edwards <edwardsg@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/Kconfig               | 7 -------
 arch/ia64/configs/sn2_defconfig | 1 -
 include/asm-ia64/sn/simulator.h | 9 +--------
 3 files changed, 1 insertion(+), 16 deletions(-)

diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 01b78e7f992e..bfdd062dd827 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -220,13 +220,6 @@ config IOSAPIC
 	depends on !IA64_HP_SIM
 	default y
 
-config IA64_SGI_SN_SIM
-	bool "SGI Medusa Simulator Support"
-	depends on IA64_SGI_SN2 || IA64_GENERIC
-	help
-	  If you are compiling a kernel that will run under SGI's IA-64
-	  simulator (Medusa) then say Y, otherwise say N.
-
 config IA64_SGI_SN_XP
 	tristate "Support communication between SGI SSIs"
 	select IA64_UNCACHED_ALLOCATOR
diff --git a/arch/ia64/configs/sn2_defconfig b/arch/ia64/configs/sn2_defconfig
index c05613980300..04d0b00a2b8c 100644
--- a/arch/ia64/configs/sn2_defconfig
+++ b/arch/ia64/configs/sn2_defconfig
@@ -81,7 +81,6 @@ CONFIG_HOLES_IN_ZONE=y
 CONFIG_ARCH_DISCONTIGMEM_ENABLE=y
 # CONFIG_IA64_CYCLONE is not set
 CONFIG_IOSAPIC=y
-CONFIG_IA64_SGI_SN_SIM=y
 CONFIG_FORCE_MAX_ZONEORDER=18
 CONFIG_SMP=y
 CONFIG_NR_CPUS=512
diff --git a/include/asm-ia64/sn/simulator.h b/include/asm-ia64/sn/simulator.h
index cf770e246af5..16a48b5a039c 100644
--- a/include/asm-ia64/sn/simulator.h
+++ b/include/asm-ia64/sn/simulator.h
@@ -13,16 +13,9 @@
 #define SNMAGIC 0xaeeeeeee8badbeefL
 #define IS_MEDUSA()			({long sn; asm("mov %0=cpuid[%1]" : "=r"(sn) : "r"(2)); sn == SNMAGIC;})
 
-#ifdef CONFIG_IA64_SGI_SN_SIM
 #define SIMULATOR_SLEEP()		asm("nop.i 0x8beef")
-#define IS_RUNNING_ON_SIMULATOR() 	(sn_prom_type)
+#define IS_RUNNING_ON_SIMULATOR()	(sn_prom_type)
 #define IS_RUNNING_ON_FAKE_PROM()	(sn_prom_type == 2)
 extern int sn_prom_type;		/* 0=hardware, 1=medusa/realprom, 2=medusa/fakeprom */
-#else
-#define IS_RUNNING_ON_SIMULATOR()	(0)
-#define IS_RUNNING_ON_FAKE_PROM()	(0)
-#define SIMULATOR_SLEEP()
-
-#endif
 
 #endif /* _ASM_IA64_SN_SIMULATOR_H */

From 336d2ff7115bd4391108a16f476c739cb4a505b0 Mon Sep 17 00:00:00 2001
From: "Artem B. Bityuckiy" <dedekind@infradead.org>
Date: Thu, 7 Jul 2005 16:45:32 +0100
Subject: [PATCH 056/113] [JFFS2] Avoid alloc/dealloc for zero sized nodes

Signed-off-by: Artem B. Bityuckiy <dedekind@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 fs/jffs2/readinode.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 081656c1d49e..8f0be5557ae9 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -7,7 +7,7 @@
  *
  * For licensing information, see the file 'LICENCE' in this directory.
  *
- * $Id: readinode.c,v 1.120 2005/07/05 21:03:07 dwmw2 Exp $
+ * $Id: readinode.c,v 1.124 2005/07/07 15:45:29 dedekind Exp $
  *
  */
 
@@ -151,6 +151,9 @@ int jffs2_add_full_dnode_to_inode(struct jffs2_sb_info *c, struct jffs2_inode_in
 
 	D1(printk(KERN_DEBUG "jffs2_add_full_dnode_to_inode(ino #%u, f %p, fn %p)\n", f->inocache->ino, f, fn));
 
+	if (unlikely(!fn->size))
+		return 0;
+
 	newfrag = jffs2_alloc_node_frag();
 	if (unlikely(!newfrag))
 		return -ENOMEM;
@@ -158,11 +161,6 @@ int jffs2_add_full_dnode_to_inode(struct jffs2_sb_info *c, struct jffs2_inode_in
 	D2(printk(KERN_DEBUG "adding node %04x-%04x @0x%08x on flash, newfrag *%p\n",
 		  fn->ofs, fn->ofs+fn->size, ref_offset(fn->raw), newfrag));
 	
-	if (unlikely(!fn->size)) {
-		jffs2_free_node_frag(newfrag);
-		return 0;
-	}
-
 	newfrag->ofs = fn->ofs;
 	newfrag->size = fn->size;
 	newfrag->node = fn;

From 86ffc0d5f5f0c8a6fe70eccade3f7ae8e9630edf Mon Sep 17 00:00:00 2001
From: "Artem B. Bityuckiy" <dedekind@infradead.org>
Date: Sun, 10 Jul 2005 14:13:58 +0100
Subject: [PATCH 057/113] [JFFS2] Remove needless variable initialization

Signed-off-by: Artem B. Bityuckiy <dedekind@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 fs/jffs2/readinode.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 8f0be5557ae9..5b2a83599d73 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -7,7 +7,7 @@
  *
  * For licensing information, see the file 'LICENCE' in this directory.
  *
- * $Id: readinode.c,v 1.124 2005/07/07 15:45:29 dedekind Exp $
+ * $Id: readinode.c,v 1.125 2005/07/10 13:13:55 dedekind Exp $
  *
  */
 
@@ -558,7 +558,6 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
 		}
 	next_tn:
 		BUG_ON(rb->rb_left);
-		repl_rb = NULL;
 		if (rb->rb_parent && rb->rb_parent->rb_left == rb) {
 			/* We were then left-hand child of our parent. We need
 			   to move our own right-hand child into our place. */

From e4fef6618992bea48b7856ebbd236796b744964f Mon Sep 17 00:00:00 2001
From: "Artem B. Bityuckiy" <dedekind@infradead.org>
Date: Sun, 10 Jul 2005 16:15:36 +0100
Subject: [PATCH 058/113] [JFFS2] Rename function and update comments

We recently changed the method of collecting and sorting of
tmp_dnode objects to use a temporary RB-tree instead of a
temporary list. Rename function and update comments.

Signed-off-by: Artem B. Bityuckiy <dedekind@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 fs/jffs2/nodelist.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/fs/jffs2/nodelist.c b/fs/jffs2/nodelist.c
index c7bbdeec93a6..4991c348f6ec 100644
--- a/fs/jffs2/nodelist.c
+++ b/fs/jffs2/nodelist.c
@@ -7,7 +7,7 @@
  *
  * For licensing information, see the file 'LICENCE' in this directory.
  *
- * $Id: nodelist.c,v 1.97 2005/07/06 15:18:41 dwmw2 Exp $
+ * $Id: nodelist.c,v 1.98 2005/07/10 15:15:32 dedekind Exp $
  *
  */
 
@@ -55,11 +55,11 @@ void jffs2_add_fd_to_list(struct jffs2_sb_info *c, struct jffs2_full_dirent *new
 	});
 }
 
-/* Put a new tmp_dnode_info into the list, keeping the list in 
-   order of increasing version
-*/
-
-static void jffs2_add_tn_to_list(struct jffs2_tmp_dnode_info *tn, struct rb_root *list)
+/* 
+ * Put a new tmp_dnode_info into the temporaty RB-tree, keeping the list in 
+ * order of increasing version.
+ */
+static void jffs2_add_tn_to_tree(struct jffs2_tmp_dnode_info *tn, struct rb_root *list)
 {
 	struct rb_node **p = &list->rb_node;
 	struct rb_node * parent = NULL;
@@ -420,7 +420,7 @@ int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
 			D1(printk(KERN_DEBUG "dnode @%08x: ver %u, offset %04x, dsize %04x\n",
 				  ref_offset(ref), je32_to_cpu(node.i.version),
 				  je32_to_cpu(node.i.offset), je32_to_cpu(node.i.dsize)));
-			jffs2_add_tn_to_list(tn, &ret_tn);
+			jffs2_add_tn_to_tree(tn, &ret_tn);
 			break;
 
 		default:

From b62205986a73a40e4fc174d2ef668e3895e925bc Mon Sep 17 00:00:00 2001
From: "Artem B. Bityuckiy" <dedekind@infradead.org>
Date: Tue, 12 Jul 2005 17:37:12 +0100
Subject: [PATCH 059/113] [JFFS2] Init locks early during mount

In case of a mount error locks might be uninitialized but
accessed by the resulting call to jffs2_kill_sb().

Signed-off-by: Artem B. Bityuckiy <dedekind@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 fs/jffs2/build.c |  9 +--------
 fs/jffs2/super.c | 11 ++++++++++-
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c
index 3dd5394921c9..97dc39796e2c 100644
--- a/fs/jffs2/build.c
+++ b/fs/jffs2/build.c
@@ -7,7 +7,7 @@
  *
  * For licensing information, see the file 'LICENCE' in this directory.
  *
- * $Id: build.c,v 1.70 2005/02/28 08:21:05 dedekind Exp $
+ * $Id: build.c,v 1.71 2005/07/12 16:37:08 dedekind Exp $
  *
  */
 
@@ -336,13 +336,6 @@ int jffs2_do_mount_fs(struct jffs2_sb_info *c)
 		c->blocks[i].bad_count = 0;
 	}
 
-	init_MUTEX(&c->alloc_sem);
-	init_MUTEX(&c->erase_free_sem);
-	init_waitqueue_head(&c->erase_wait);
-	init_waitqueue_head(&c->inocache_wq);
-	spin_lock_init(&c->erase_completion_lock);
-	spin_lock_init(&c->inocache_lock);
-
 	INIT_LIST_HEAD(&c->clean_list);
 	INIT_LIST_HEAD(&c->very_dirty_list);
 	INIT_LIST_HEAD(&c->dirty_list);
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 2cf14cf8b35a..aaf9475cfb6a 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -7,7 +7,7 @@
  *
  * For licensing information, see the file 'LICENCE' in this directory.
  *
- * $Id: super.c,v 1.106 2005/05/18 11:37:25 dedekind Exp $
+ * $Id: super.c,v 1.107 2005/07/12 16:37:08 dedekind Exp $
  *
  */
 
@@ -140,6 +140,15 @@ static struct super_block *jffs2_get_sb_mtd(struct file_system_type *fs_type,
 	D1(printk(KERN_DEBUG "jffs2_get_sb_mtd(): New superblock for device %d (\"%s\")\n",
 		  mtd->index, mtd->name));
 
+	/* Initialize JFFS2 superblock locks, the further initialization will be
+	 * done later */
+	init_MUTEX(&c->alloc_sem);
+	init_MUTEX(&c->erase_free_sem);
+	init_waitqueue_head(&c->erase_wait);
+	init_waitqueue_head(&c->inocache_wq);
+	spin_lock_init(&c->erase_completion_lock);
+	spin_lock_init(&c->inocache_lock);
+
 	sb->s_op = &jffs2_super_operations;
 	sb->s_flags = flags | MS_NOATIME;
 

From 751382dd5cb2702368d281a50b55c2d6c4e8fbfc Mon Sep 17 00:00:00 2001
From: Todd Poynor <tpoynor@mvista.com>
Date: Tue, 12 Jul 2005 03:34:39 +0100
Subject: [PATCH 060/113] [JFFS2] Avoid compiler warnings when
 JFFS2_FS_WRITEBUFFER=n

Signed-off-by: Todd Poynor <tpoynor@mvista.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 fs/jffs2/os-linux.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index 7bf72e012c94..d900c8929b09 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -7,7 +7,7 @@
  *
  * For licensing information, see the file 'LICENCE' in this directory.
  *
- * $Id: os-linux.h,v 1.57 2005/07/06 12:13:09 dwmw2 Exp $
+ * $Id: os-linux.h,v 1.58 2005/07/12 02:34:35 tpoynor Exp $
  *
  */
 
@@ -86,6 +86,8 @@ static inline void jffs2_init_inode_info(struct jffs2_inode_info *f)
 #define jffs2_dataflash(c) (0)
 #define jffs2_nor_ecc_flash_setup(c) (0)
 #define jffs2_nor_ecc_flash_cleanup(c) do {} while (0)
+#define jffs2_dataflash_setup(c) (0)
+#define jffs2_dataflash_cleanup(c) do {} while (0)
 
 #else /* NAND and/or ECC'd NOR support present */
 

From 08357f82d4decc48bbfd39ae30d5fe0754f7f576 Mon Sep 17 00:00:00 2001
From: Zoltan Menyhart <Zoltan.Menyhart@bull.net>
Date: Fri, 3 Jun 2005 05:36:00 -0700
Subject: [PATCH 061/113] [IA64] improve flush_icache_range()

Check with PAL to see what the i-cache line size is for
each level of the cache, and so use the correct stride
when flushing the cache.

Acked-by: David Mosberger
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/setup.c | 37 ++++++++++++++++++++++++++++++--
 arch/ia64/lib/flush.S    | 46 +++++++++++++++++++++++++++++-----------
 2 files changed, 69 insertions(+), 14 deletions(-)

diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 2693e1522d7c..7fc891aca446 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -20,6 +20,7 @@
  * 02/01/00 R.Seth	fixed get_cpuinfo for SMP
  * 01/07/99 S.Eranian	added the support for command line argument
  * 06/24/99 W.Drummond	added boot_cpu_data.
+ * 05/28/05 Z. Menyhart	Dynamic stride size for "flush_icache_range()"
  */
 #include <linux/config.h>
 #include <linux/module.h>
@@ -82,6 +83,13 @@ struct io_space io_space[MAX_IO_SPACES];
 EXPORT_SYMBOL(io_space);
 unsigned int num_io_spaces;
 
+/*
+ * "flush_icache_range()" needs to know what processor dependent stride size to use
+ * when it makes i-cache(s) coherent with d-caches.
+ */
+#define	I_CACHE_STRIDE_SHIFT	5	/* Safest way to go: 32 bytes by 32 bytes */
+unsigned long ia64_i_cache_stride_shift = ~0;
+
 /*
  * The merge_mask variable needs to be set to (max(iommu_page_size(iommu)) - 1).  This
  * mask specifies a mask of address bits that must be 0 in order for two buffers to be
@@ -626,6 +634,12 @@ setup_per_cpu_areas (void)
 	/* start_kernel() requires this... */
 }
 
+/*
+ * Calculate the max. cache line size.
+ *
+ * In addition, the minimum of the i-cache stride sizes is calculated for
+ * "flush_icache_range()".
+ */
 static void
 get_max_cacheline_size (void)
 {
@@ -639,6 +653,8 @@ get_max_cacheline_size (void)
                 printk(KERN_ERR "%s: ia64_pal_cache_summary() failed (status=%ld)\n",
                        __FUNCTION__, status);
                 max = SMP_CACHE_BYTES;
+		/* Safest setup for "flush_icache_range()" */
+		ia64_i_cache_stride_shift = I_CACHE_STRIDE_SHIFT;
 		goto out;
         }
 
@@ -647,14 +663,31 @@ get_max_cacheline_size (void)
 						    &cci);
 		if (status != 0) {
 			printk(KERN_ERR
-			       "%s: ia64_pal_cache_config_info(l=%lu) failed (status=%ld)\n",
+			       "%s: ia64_pal_cache_config_info(l=%lu, 2) failed (status=%ld)\n",
 			       __FUNCTION__, l, status);
 			max = SMP_CACHE_BYTES;
+			/* The safest setup for "flush_icache_range()" */
+			cci.pcci_stride = I_CACHE_STRIDE_SHIFT;
+			cci.pcci_unified = 1;
 		}
 		line_size = 1 << cci.pcci_line_size;
 		if (line_size > max)
 			max = line_size;
-        }
+		if (!cci.pcci_unified) {
+			status = ia64_pal_cache_config_info(l,
+						    /* cache_type (instruction)= */ 1,
+						    &cci);
+			if (status != 0) {
+				printk(KERN_ERR
+				"%s: ia64_pal_cache_config_info(l=%lu, 1) failed (status=%ld)\n",
+					__FUNCTION__, l, status);
+				/* The safest setup for "flush_icache_range()" */
+				cci.pcci_stride = I_CACHE_STRIDE_SHIFT;
+			}
+		}
+		if (cci.pcci_stride < ia64_i_cache_stride_shift)
+			ia64_i_cache_stride_shift = cci.pcci_stride;
+	}
   out:
 	if (max > ia64_max_cacheline_size)
 		ia64_max_cacheline_size = max;
diff --git a/arch/ia64/lib/flush.S b/arch/ia64/lib/flush.S
index a1af9146cfdb..3e2cfa2c6d39 100644
--- a/arch/ia64/lib/flush.S
+++ b/arch/ia64/lib/flush.S
@@ -3,37 +3,59 @@
  *
  * Copyright (C) 1999-2001, 2005 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 05/28/05 Zoltan Menyhart	Dynamic stride size
  */
+
 #include <asm/asmmacro.h>
-#include <asm/page.h>
+
 
 	/*
 	 * flush_icache_range(start,end)
-	 *	Must flush range from start to end-1 but nothing else (need to
+	 *
+	 *	Make i-cache(s) coherent with d-caches.
+	 *
+	 *	Must deal with range from start to end-1 but nothing else (need to
 	 *	be careful not to touch addresses that may be unmapped).
+	 *
+	 *	Note: "in0" and "in1" are preserved for debugging purposes.
 	 */
 GLOBAL_ENTRY(flush_icache_range)
+
 	.prologue
-	alloc r2=ar.pfs,2,0,0,0
-	sub r8=in1,in0,1
+	alloc	r2=ar.pfs,2,0,0,0
+	movl	r3=ia64_i_cache_stride_shift
+ 	mov	r21=1
 	;;
-	shr.u r8=r8,5			// we flush 32 bytes per iteration
-	.save ar.lc, r3
-	mov r3=ar.lc			// save ar.lc
+	ld8	r20=[r3]		// r20: stride shift
+	sub	r22=in1,r0,1		// last byte address
+	;;
+	shr.u	r23=in0,r20		// start / (stride size)
+	shr.u	r22=r22,r20		// (last byte address) / (stride size)
+	shl	r21=r21,r20		// r21: stride size of the i-cache(s)
+	;;
+	sub	r8=r22,r23		// number of strides - 1
+	shl	r24=r23,r20		// r24: addresses for "fc.i" =
+					//	"start" rounded down to stride boundary
+	.save	ar.lc,r3
+	mov	r3=ar.lc		// save ar.lc
 	;;
 
 	.body
-
-	mov ar.lc=r8
+	mov	ar.lc=r8
 	;;
-.Loop:	fc.i in0			// issuable on M2 only
-	add in0=32,in0
+	/*
+	 * 32 byte aligned loop, even number of (actually 2) bundles
+	 */
+.Loop:	fc.i	r24			// issuable on M0 only
+	add	r24=r21,r24		// we flush "stride size" bytes per iteration
+	nop.i	0
 	br.cloop.sptk.few .Loop
 	;;
 	sync.i
 	;;
 	srlz.i
 	;;
-	mov ar.lc=r3			// restore ar.lc
+	mov	ar.lc=r3		// restore ar.lc
 	br.ret.sptk.many rp
 END(flush_icache_range)

From 7c2a6c62c013a4ea57243536fc7f3987e4ba04bc Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Tue, 12 Jul 2005 16:03:00 -0700
Subject: [PATCH 062/113] [IA64] Altix pcibus_to_node implementation

The Altix subarch does not provide node information via ACPI. Instead hooks
are used to fixup pci structures. This patch determines the nodes for Altix
PCI busses.

Remote Bridges:
---------------
Altix supports remote I/O nodes without memory or processors but with bridges.
The TIOCA type of bridge is an AGP bridge and the PROM provides information
about the closest node. That information will be returned by pcibus_to_node.

The TIOCP remote bridge type is a PCI bridge but the PROM does not provide a
closest node id. pcibus_to_node will return -1 for devices on those bridges
meaning that device control structures may be allocated on any node.

Safeguard:
----------
Should the fixups result in invalid node information for a pci controller then
a warning will be printed and pcibus_to_node will return -1.


This patch also fixes the "FIXME" in sn_dma_alloc_coherent. This means that
dma_alloc_coherent will now use alloc_pages_node to allocate memory local to
the node that the PCI device is connected to.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/sn/kernel/io_init.c              | 20 ++++++++++++++++++--
 arch/ia64/sn/pci/pci_dma.c                 | 16 +++++++++++++---
 arch/ia64/sn/pci/pcibr/pcibr_provider.c    | 10 +++++++++-
 arch/ia64/sn/pci/tioca_provider.c          |  4 +++-
 include/asm-ia64/sn/pcibr_provider.h       |  2 +-
 include/asm-ia64/sn/pcibus_provider_defs.h |  3 ++-
 6 files changed, 46 insertions(+), 9 deletions(-)

diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c
index a67f39e448cb..a6649baf629a 100644
--- a/arch/ia64/sn/kernel/io_init.c
+++ b/arch/ia64/sn/kernel/io_init.c
@@ -61,7 +61,7 @@ sn_default_pci_unmap(struct pci_dev *pdev, dma_addr_t addr, int direction)
 }
 
 static void *
-sn_default_pci_bus_fixup(struct pcibus_bussoft *soft)
+sn_default_pci_bus_fixup(struct pcibus_bussoft *soft, struct pci_controller *controller)
 {
 	return NULL;
 }
@@ -362,7 +362,7 @@ void sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus)
 
 	provider_soft = NULL;
 	if (provider->bus_fixup)
-		provider_soft = (*provider->bus_fixup) (prom_bussoft_ptr);
+		provider_soft = (*provider->bus_fixup) (prom_bussoft_ptr, controller);
 
 	if (provider_soft == NULL)
 		return;		/* fixup failed or not applicable */
@@ -380,6 +380,22 @@ void sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus)
 	SN_PCIBUS_BUSSOFT(bus)->bs_xwidget_info =
 	    &(hubdev_info->hdi_xwidget_info[SN_PCIBUS_BUSSOFT(bus)->bs_xid]);
 
+	/*
+	 * If the node information we obtained during the fixup phase is invalid
+	 * then set controller->node to -1 (undetermined)
+	 */
+	if (controller->node >= num_online_nodes()) {
+		struct pcibus_bussoft *b = SN_PCIBUS_BUSSOFT(bus);
+
+		printk(KERN_WARNING "Device ASIC=%u XID=%u PBUSNUM=%lu"
+				    "L_IO=%lx L_MEM=%lx BASE=%lx\n",
+			b->bs_asic_type, b->bs_xid, b->bs_persist_busnum,
+			b->bs_legacy_io, b->bs_legacy_mem, b->bs_base);
+		printk(KERN_WARNING "on node %d but only %d nodes online."
+			"Association set to undetermined.\n",
+			controller->node, num_online_nodes());
+		controller->node = -1;
+	}
 	return;
 
 error_return:
diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c
index a2f7a88aefbb..0e4b9ad9ef02 100644
--- a/arch/ia64/sn/pci/pci_dma.c
+++ b/arch/ia64/sn/pci/pci_dma.c
@@ -79,6 +79,7 @@ void *sn_dma_alloc_coherent(struct device *dev, size_t size,
 {
 	void *cpuaddr;
 	unsigned long phys_addr;
+	int node;
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev);
 
@@ -86,10 +87,19 @@ void *sn_dma_alloc_coherent(struct device *dev, size_t size,
 
 	/*
 	 * Allocate the memory.
-	 * FIXME: We should be doing alloc_pages_node for the node closest
-	 *        to the PCI device.
 	 */
-	if (!(cpuaddr = (void *)__get_free_pages(GFP_ATOMIC, get_order(size))))
+	node = pcibus_to_node(pdev->bus);
+	if (likely(node >=0)) {
+		struct page *p = alloc_pages_node(node, GFP_ATOMIC, get_order(size));
+
+		if (likely(p))
+			cpuaddr = page_address(p);
+		else
+			return NULL;
+	} else
+		cpuaddr = (void *)__get_free_pages(GFP_ATOMIC, get_order(size));
+
+	if (unlikely(!cpuaddr))
 		return NULL;
 
 	memset(cpuaddr, 0x0, size);
diff --git a/arch/ia64/sn/pci/pcibr/pcibr_provider.c b/arch/ia64/sn/pci/pcibr/pcibr_provider.c
index 9813da56d311..b95e928636a1 100644
--- a/arch/ia64/sn/pci/pcibr/pcibr_provider.c
+++ b/arch/ia64/sn/pci/pcibr/pcibr_provider.c
@@ -85,7 +85,7 @@ pcibr_error_intr_handler(int irq, void *arg, struct pt_regs *regs)
 }
 
 void *
-pcibr_bus_fixup(struct pcibus_bussoft *prom_bussoft)
+pcibr_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *controller)
 {
 	int nasid, cnode, j;
 	struct hubdev_info *hubdev_info;
@@ -158,6 +158,14 @@ pcibr_bus_fixup(struct pcibus_bussoft *prom_bussoft)
 	memset(soft->pbi_int_ate_resource.ate, 0,
  	       (soft->pbi_int_ate_size * sizeof(uint64_t)));
 
+	if (prom_bussoft->bs_asic_type == PCIIO_ASIC_TYPE_TIOCP)
+		/*
+		 * TIO PCI Bridge with no closest node information.
+		 * FIXME: Find another way to determine the closest node
+		 */
+		controller->node = -1;
+	else
+		controller->node = cnode;
 	return soft;
 }
 
diff --git a/arch/ia64/sn/pci/tioca_provider.c b/arch/ia64/sn/pci/tioca_provider.c
index 51cc4e63092c..5d76a7581465 100644
--- a/arch/ia64/sn/pci/tioca_provider.c
+++ b/arch/ia64/sn/pci/tioca_provider.c
@@ -581,7 +581,7 @@ tioca_error_intr_handler(int irq, void *arg, struct pt_regs *pt)
  * the caller.
  */
 static void *
-tioca_bus_fixup(struct pcibus_bussoft *prom_bussoft)
+tioca_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *controller)
 {
 	struct tioca_common *tioca_common;
 	struct tioca_kernel *tioca_kern;
@@ -646,6 +646,8 @@ tioca_bus_fixup(struct pcibus_bussoft *prom_bussoft)
 		       __FUNCTION__, SGI_TIOCA_ERROR,
 		       (int)tioca_common->ca_common.bs_persist_busnum);
 
+	/* Setup locality information */
+	controller->node = tioca_kern->ca_closest_node;
 	return tioca_common;
 }
 
diff --git a/include/asm-ia64/sn/pcibr_provider.h b/include/asm-ia64/sn/pcibr_provider.h
index f9b8d2164007..2b42d9ece26b 100644
--- a/include/asm-ia64/sn/pcibr_provider.h
+++ b/include/asm-ia64/sn/pcibr_provider.h
@@ -128,7 +128,7 @@ pcibr_lock(struct pcibus_info *pcibus_info)
 #define pcibr_unlock(pcibus_info, flag)  spin_unlock_irqrestore(&pcibus_info->pbi_lock, flag)
 
 extern int  pcibr_init_provider(void);
-extern void *pcibr_bus_fixup(struct pcibus_bussoft *);
+extern void *pcibr_bus_fixup(struct pcibus_bussoft *, struct pci_controller *);
 extern dma_addr_t pcibr_dma_map(struct pci_dev *, unsigned long, size_t);
 extern dma_addr_t pcibr_dma_map_consistent(struct pci_dev *, unsigned long, size_t);
 extern void pcibr_dma_unmap(struct pci_dev *, dma_addr_t, int);
diff --git a/include/asm-ia64/sn/pcibus_provider_defs.h b/include/asm-ia64/sn/pcibus_provider_defs.h
index 04e27d5b3820..976f5eff0539 100644
--- a/include/asm-ia64/sn/pcibus_provider_defs.h
+++ b/include/asm-ia64/sn/pcibus_provider_defs.h
@@ -37,6 +37,7 @@ struct pcibus_bussoft {
 	struct xwidget_info	*bs_xwidget_info;
 };
 
+struct pci_controller;
 /*
  * SN pci bus indirection
  */
@@ -45,7 +46,7 @@ struct sn_pcibus_provider {
 	dma_addr_t	(*dma_map)(struct pci_dev *, unsigned long, size_t);
 	dma_addr_t	(*dma_map_consistent)(struct pci_dev *, unsigned long, size_t);
 	void		(*dma_unmap)(struct pci_dev *, dma_addr_t, int);
-	void *		(*bus_fixup)(struct pcibus_bussoft *);
+	void *		(*bus_fixup)(struct pcibus_bussoft *, struct pci_controller *);
 };
 
 extern struct sn_pcibus_provider *sn_pci_provider[];

From 6211502d7ee9e515e4458d0c0ebfbb70553dc7de Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@austin.ibm.com>
Date: Wed, 13 Jul 2005 09:07:53 -0500
Subject: [PATCH 063/113] JFS: Allow security.* xattrs to be set on symlinks

All of the different xattr namespaces have different rules.
user.* and ACL's are not allowed on symlinks, and since these were the
first xattrs implemented, I assumed there was no need to support xattrs
on symlinks.  This one-line patch should fix it.

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
 fs/jfs/xattr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index ee438d429d45..fdd8f3f5a72f 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -781,7 +781,7 @@ static int can_set_xattr(struct inode *inode, const char *name,
 	if (IS_RDONLY(inode))
 		return -EROFS;
 
-	if (IS_IMMUTABLE(inode) || IS_APPEND(inode) || S_ISLNK(inode->i_mode))
+	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
 		return -EPERM;
 
 	if(strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) == 0)

From 59192ed9e7aa81b06a1803661419f7261afca1ad Mon Sep 17 00:00:00 2001
From: Ian Dall <ian@beware.dropbear.id.au>
Date: Wed, 13 Jul 2005 09:15:18 -0500
Subject: [PATCH 064/113] JFS: Need to be root to create files with security
 context

It turns out this is due to some inverted logic in xattr.c

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
 fs/jfs/xattr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index fdd8f3f5a72f..554ec739e49b 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -790,12 +790,12 @@ static int can_set_xattr(struct inode *inode, const char *name,
 		 */
 		return can_set_system_xattr(inode, name, value, value_len);
 
-	if(strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0)
+	if(strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) == 0)
 		return (capable(CAP_SYS_ADMIN) ? 0 : -EPERM);
 
 #ifdef CONFIG_JFS_SECURITY
 	if (strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)
-	    != 0)
+	    == 0)
 		return 0;	/* Leave it to the security module */
 #endif
 		

From 6a8b4d319c52f8a3fdca46b185d001fbf0939911 Mon Sep 17 00:00:00 2001
From: Joern Engel <joern@wohnheim.fh-wedel.de>
Date: Wed, 13 Jul 2005 16:45:43 +0100
Subject: [PATCH 065/113] [MTD] cfi_cmdset_0002: Plugged a mem leak.

Signed-off-by: Joern Engel <joern@wohnheim.fh-wedel.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/mtd/chips/cfi_cmdset_0020.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/chips/cfi_cmdset_0020.c b/drivers/mtd/chips/cfi_cmdset_0020.c
index 8c24e18db3b4..c894f8801578 100644
--- a/drivers/mtd/chips/cfi_cmdset_0020.c
+++ b/drivers/mtd/chips/cfi_cmdset_0020.c
@@ -4,7 +4,7 @@
  *
  * (C) 2000 Red Hat. GPL'd
  *
- * $Id: cfi_cmdset_0020.c,v 1.17 2004/11/20 12:49:04 dwmw2 Exp $
+ * $Id: cfi_cmdset_0020.c,v 1.19 2005/07/13 15:52:45 dwmw2 Exp $
  * 
  * 10/10/2000	Nicolas Pitre <nico@cam.org>
  * 	- completely revamped method functions so they are aware and
@@ -16,6 +16,8 @@
  *	- modified Intel Command Set 0x0001 to support ST Advanced Architecture
  *	  (command set 0x0020)
  *	- added a writev function
+ * 07/13/2005	Joern Engel <joern@wh.fh-wedel.de>
+ * 	- Plugged memory leak in cfi_staa_writev().
  */
 
 #include <linux/version.h>
@@ -719,6 +721,7 @@ cfi_staa_writev(struct mtd_info *mtd, const struct kvec *vecs,
 write_error:
 	if (retlen)
 		*retlen = totlen;
+	kfree(buffer);
 	return ret;
 }
 

From c0ac515e2951dd2a999cfd21dea3fdc1055f1573 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Wed, 13 Jul 2005 11:55:42 -0400
Subject: [PATCH 066/113] [PATCH] Lindent: ignore .indent.pro

 When I recently submitted a Lindent patch, it turned out that my .indent.pro
 options were also applied to the tree. This patch directs indent(1) to ignore
 the .indent.pro directives and only use options specified on the command
 line.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 scripts/Lindent | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/Lindent b/scripts/Lindent
index 34ed785116b6..7d8d8896e309 100755
--- a/scripts/Lindent
+++ b/scripts/Lindent
@@ -1,2 +1,2 @@
 #!/bin/sh
-indent -kr -i8 -ts8 -sob -l80 -ss -ncs "$@"
+indent -npro -kr -i8 -ts8 -sob -l80 -ss -ncs "$@"

From 153f805781d35c91ab2f54aa2b8930cc4cfc7e89 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@SteelEye.com>
Date: Wed, 13 Jul 2005 09:38:05 -0400
Subject: [PATCH 067/113] [PATCH] fix voyager subarchitecture EXPORT_SYMBOL
 breakage caused by i386_ksym reduction

This patch:

	[PATCH] Remove i386_ksyms.c, almost

made files like smp.c do their own EXPORT_SYMBOLS.  This means that all
subarchitectures that override these symbols now have to do the exports
themselves.  This patch adds the exports for voyager (which is the most
affected since it has a separate smp harness).  However, someone should
audit all the other subarchitectures to see if any others got broken.

Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/i386/mach-voyager/voyager_basic.c | 1 +
 arch/i386/mach-voyager/voyager_smp.c   | 6 ++++++
 2 files changed, 7 insertions(+)

diff --git a/arch/i386/mach-voyager/voyager_basic.c b/arch/i386/mach-voyager/voyager_basic.c
index 3e439ce5e1b2..8680080a6a89 100644
--- a/arch/i386/mach-voyager/voyager_basic.c
+++ b/arch/i386/mach-voyager/voyager_basic.c
@@ -36,6 +36,7 @@
  * Power off function, if any
  */
 void (*pm_power_off)(void);
+EXPORT_SYMBOL(pm_power_off);
 
 int voyager_level = 0;
 
diff --git a/arch/i386/mach-voyager/voyager_smp.c b/arch/i386/mach-voyager/voyager_smp.c
index 8c8527593da0..0e1f4208b07c 100644
--- a/arch/i386/mach-voyager/voyager_smp.c
+++ b/arch/i386/mach-voyager/voyager_smp.c
@@ -10,6 +10,7 @@
  * the voyager hal to provide the functionality
  */
 #include <linux/config.h>
+#include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/kernel_stat.h>
 #include <linux/delay.h>
@@ -40,6 +41,7 @@ static unsigned long cpu_irq_affinity[NR_CPUS] __cacheline_aligned = { [0 ... NR
 /* per CPU data structure (for /proc/cpuinfo et al), visible externally
  * indexed physically */
 struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
+EXPORT_SYMBOL(cpu_data);
 
 /* physical ID of the CPU used to boot the system */
 unsigned char boot_cpu_id;
@@ -72,6 +74,7 @@ static volatile unsigned long smp_invalidate_needed;
 /* Bitmask of currently online CPUs - used by setup.c for
    /proc/cpuinfo, visible externally but still physical */
 cpumask_t cpu_online_map = CPU_MASK_NONE;
+EXPORT_SYMBOL(cpu_online_map);
 
 /* Bitmask of CPUs present in the system - exported by i386_syms.c, used
  * by scheduler but indexed physically */
@@ -238,6 +241,7 @@ static cpumask_t smp_commenced_mask = CPU_MASK_NONE;
 /* This is for the new dynamic CPU boot code */
 cpumask_t cpu_callin_map = CPU_MASK_NONE;
 cpumask_t cpu_callout_map = CPU_MASK_NONE;
+EXPORT_SYMBOL(cpu_callout_map);
 
 /* The per processor IRQ masks (these are usually kept in sync) */
 static __u16 vic_irq_mask[NR_CPUS] __cacheline_aligned;
@@ -978,6 +982,7 @@ void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
 
 	preempt_enable();
 }
+EXPORT_SYMBOL(flush_tlb_page);
 
 /* enable the requested IRQs */
 static void
@@ -1109,6 +1114,7 @@ smp_call_function (void (*func) (void *info), void *info, int retry,
 
 	return 0;
 }
+EXPORT_SYMBOL(smp_call_function);
 
 /* Sorry about the name.  In an APIC based system, the APICs
  * themselves are programmed to send a timer interrupt.  This is used

From 0399cb08c54708db231d616f106f64d920e0b723 Mon Sep 17 00:00:00 2001
From: Robert Love <rml@novell.com>
Date: Wed, 13 Jul 2005 12:38:18 -0400
Subject: [PATCH 068/113] [PATCH] inotify: move sysctl

This moves the inotify sysctl knobs to "/proc/sys/fs/inotify" from
"/proc/sys/fs".  Also some related cleanup.

Signed-off-by: Robert Love <rml@novell.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/inotify.c           | 49 ++++++++++++++++++++++++++++++++++++----
 include/linux/sysctl.h | 12 +++++-----
 kernel/sysctl.c        | 51 +++++++++---------------------------------
 3 files changed, 62 insertions(+), 50 deletions(-)

diff --git a/fs/inotify.c b/fs/inotify.c
index e423bfe0c86f..fb4803131423 100644
--- a/fs/inotify.c
+++ b/fs/inotify.c
@@ -45,8 +45,8 @@ static kmem_cache_t *event_cachep;
 
 static struct vfsmount *inotify_mnt;
 
-/* These are configurable via /proc/sys/inotify */
-int inotify_max_user_devices;
+/* these are configurable via /proc/sys/fs/inotify/ */
+int inotify_max_user_instances;
 int inotify_max_user_watches;
 int inotify_max_queued_events;
 
@@ -125,6 +125,47 @@ struct inotify_watch {
 	u32			mask;	/* event mask for this watch */
 };
 
+#ifdef CONFIG_SYSCTL
+
+#include <linux/sysctl.h>
+
+static int zero;
+
+ctl_table inotify_table[] = {
+	{
+		.ctl_name	= INOTIFY_MAX_USER_INSTANCES,
+		.procname	= "max_user_instances",
+		.data		= &inotify_max_user_instances,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &zero,
+	},
+	{
+		.ctl_name	= INOTIFY_MAX_USER_WATCHES,
+		.procname	= "max_user_watches",
+		.data		= &inotify_max_user_watches,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &zero, 
+	},
+	{
+		.ctl_name	= INOTIFY_MAX_QUEUED_EVENTS,
+		.procname	= "max_queued_events",
+		.data		= &inotify_max_queued_events,
+		.maxlen		= sizeof(int),
+		.mode		= 0644, 
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec, 
+		.extra1		= &zero
+	},
+	{ .ctl_name = 0 }
+};
+#endif /* CONFIG_SYSCTL */
+
 static inline void get_inotify_dev(struct inotify_device *dev)
 {
 	atomic_inc(&dev->count);
@@ -842,7 +883,7 @@ asmlinkage long sys_inotify_init(void)
 
 	user = get_uid(current->user);
 
-	if (unlikely(atomic_read(&user->inotify_devs) >= inotify_max_user_devices)) {
+	if (unlikely(atomic_read(&user->inotify_devs) >= inotify_max_user_instances)) {
 		ret = -EMFILE;
 		goto out_err;
 	}
@@ -979,7 +1020,7 @@ static int __init inotify_init(void)
 	inotify_mnt = kern_mount(&inotify_fs_type);
 
 	inotify_max_queued_events = 8192;
-	inotify_max_user_devices = 128;
+	inotify_max_user_instances = 8;
 	inotify_max_user_watches = 8192;
 
 	atomic_set(&inotify_cookie, 0);
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index ce19a2aa0b21..bfbbe94b297d 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -61,8 +61,7 @@ enum
 	CTL_DEV=7,		/* Devices */
 	CTL_BUS=8,		/* Busses */
 	CTL_ABI=9,		/* Binary emulation */
-	CTL_CPU=10,		/* CPU stuff (speed scaling, etc) */
-	CTL_INOTIFY=11		/* Inotify */
+	CTL_CPU=10		/* CPU stuff (speed scaling, etc) */
 };
 
 /* CTL_BUS names: */
@@ -71,12 +70,12 @@ enum
 	CTL_BUS_ISA=1		/* ISA */
 };
 
-/* CTL_INOTIFY names: */
+/* /proc/sys/fs/inotify/ */
 enum
 {
-	INOTIFY_MAX_USER_DEVICES=1,	/* max number of inotify device instances per user */
-	INOTIFY_MAX_USER_WATCHES=2,	/* max number of inotify watches per user */
-	INOTIFY_MAX_QUEUED_EVENTS=3	/* Max number of queued events per inotify device instance */
+	INOTIFY_MAX_USER_INSTANCES=1,	/* max instances per user */
+	INOTIFY_MAX_USER_WATCHES=2,	/* max watches per user */
+	INOTIFY_MAX_QUEUED_EVENTS=3	/* max queued events per instance */
 };
 
 /* CTL_KERN names: */
@@ -685,6 +684,7 @@ enum
 	FS_XFS=17,	/* struct: control xfs parameters */
 	FS_AIO_NR=18,	/* current system-wide number of aio requests */
 	FS_AIO_MAX_NR=19,	/* system-wide maximum number of aio requests */
+	FS_INOTIFY=20,	/* inotify submenu */
 };
 
 /* /proc/sys/fs/quota/ */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index b240e2cb86fc..e60b9c36f1f0 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -67,12 +67,6 @@ extern int printk_ratelimit_jiffies;
 extern int printk_ratelimit_burst;
 extern int pid_max_min, pid_max_max;
 
-#ifdef CONFIG_INOTIFY
-extern int inotify_max_user_devices;
-extern int inotify_max_user_watches;
-extern int inotify_max_queued_events;
-#endif
-
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
 int unknown_nmi_panic;
 extern int proc_unknown_nmi_panic(ctl_table *, int, struct file *,
@@ -152,6 +146,9 @@ extern ctl_table random_table[];
 #ifdef CONFIG_UNIX98_PTYS
 extern ctl_table pty_table[];
 #endif
+#ifdef CONFIG_INOTIFY
+extern ctl_table inotify_table[];
+#endif
 
 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
 int sysctl_legacy_va_layout;
@@ -957,6 +954,14 @@ static ctl_table fs_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
+#ifdef CONFIG_INOTIFY
+	{
+		.ctl_name	= FS_INOTIFY,
+		.procname	= "inotify",
+		.mode		= 0555,
+		.child		= inotify_table,
+	},
+#endif	
 #endif
 	{
 		.ctl_name	= KERN_SETUID_DUMPABLE,
@@ -966,40 +971,6 @@ static ctl_table fs_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
-#ifdef CONFIG_INOTIFY
-	{
-		.ctl_name	= INOTIFY_MAX_USER_DEVICES,
-		.procname	= "max_user_devices",
-		.data		= &inotify_max_user_devices,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_minmax,
-		.strategy	= &sysctl_intvec,
-		.extra1		= &zero,
-	},
-
-	{
-		.ctl_name	= INOTIFY_MAX_USER_WATCHES,
-		.procname	= "max_user_watches",
-		.data		= &inotify_max_user_watches,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_minmax,
-		.strategy	= &sysctl_intvec,
-		.extra1		= &zero, 
-	},
-
-	{
-		.ctl_name	= INOTIFY_MAX_QUEUED_EVENTS,
-		.procname	= "max_queued_events",
-		.data		= &inotify_max_queued_events,
-		.maxlen		= sizeof(int),
-		.mode		= 0644, 
-		.proc_handler	= &proc_dointvec_minmax,
-		.strategy	= &sysctl_intvec, 
-		.extra1		= &zero
-	},
-#endif
 	{ .ctl_name = 0 }
 };
 

From 5995f16b4a464c8a57de7c9d5ddf4758dbacad41 Mon Sep 17 00:00:00 2001
From: Robert Love <rml@novell.com>
Date: Wed, 13 Jul 2005 12:38:39 -0400
Subject: [PATCH 069/113] [PATCH] inotify: event ordering

This rearranges the event ordering for "open" to be consistent with the
ordering of the other events.

Signed-off-by: Robert Love <rml@novell.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/fsnotify.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index eb581b6cfca9..d07a92c94776 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -125,8 +125,8 @@ static inline void fsnotify_open(struct dentry *dentry)
 	if (S_ISDIR(inode->i_mode))
 		mask |= IN_ISDIR;
 
-	inotify_inode_queue_event(inode, mask, 0, NULL);
 	inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name);
+	inotify_inode_queue_event(inode, mask, 0, NULL);	
 }
 
 /*

From 9a556e89081b0c1c2f83cee915363b15a68a6f2d Mon Sep 17 00:00:00 2001
From: Robert Love <rml@novell.com>
Date: Wed, 13 Jul 2005 13:49:23 -0400
Subject: [PATCH 070/113] [PATCH] inotify: misc cleanup

Really simple, basic cleanup.

Signed-off-by: Robert Love <rml@novell.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/inotify.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/fs/inotify.c b/fs/inotify.c
index fb4803131423..54757be888b6 100644
--- a/fs/inotify.c
+++ b/fs/inotify.c
@@ -29,8 +29,6 @@
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/poll.h>
-#include <linux/device.h>
-#include <linux/miscdevice.h>
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/writeback.h>
@@ -934,7 +932,7 @@ asmlinkage long sys_inotify_add_watch(int fd, const char *path, u32 mask)
 
 	dev = filp->private_data;
 
-	ret = find_inode ((const char __user*)path, &nd);
+	ret = find_inode((const char __user*) path, &nd);
 	if (ret)
 		goto fput_and_out;
 
@@ -991,8 +989,9 @@ asmlinkage long sys_inotify_rm_watch(int fd, u32 wd)
 	if (!filp)
 		return -EBADF;
 	dev = filp->private_data;
-	ret = inotify_ignore (dev, wd);
+	ret = inotify_ignore(dev, wd);
 	fput(filp);
+
 	return ret;
 }
 
@@ -1032,8 +1031,6 @@ static int __init inotify_init(void)
 					 sizeof(struct inotify_kernel_event),
 					 0, SLAB_PANIC, NULL, NULL);
 
-	printk(KERN_INFO "inotify syscall\n");
-
 	return 0;
 }
 

From 88bd5121d635136e01369141367f315665534b3c Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cam.ac.uk>
Date: Wed, 13 Jul 2005 01:10:44 -0700
Subject: [PATCH 071/113] [PATCH] Fix soft lockup due to NTFS: VFS part and
 explanation

Something has changed in the core kernel such that we now get concurrent
inode write outs, one e.g via pdflush and one via sys_sync or whatever.
This causes a nasty deadlock in ntfs.  The only clean solution
unfortunately requires a minor vfs api extension.

First the deadlock analysis:

Prerequisive knowledge: NTFS has a file $MFT (inode 0) loaded at mount
time.  The NTFS driver uses the page cache for storing the file contents as
usual.  More interestingly this file contains the table of on-disk inodes
as a sequence of MFT_RECORDs.  Thus NTFS driver accesses the on-disk inodes
by accessing the MFT_RECORDs in the page cache pages of the loaded inode
$MFT.

The situation: VFS inode X on a mounted ntfs volume is dirty.  For same
inode X, the ntfs_inode is dirty and thus corresponding on-disk inode,
which is as explained above in a dirty PAGE_CACHE_PAGE belonging to the
table of inodes ($MFT, inode 0).

What happens:

Process 1: sys_sync()/umount()/whatever...  calls __sync_single_inode() for
$MFT -> do_writepages() -> write_page for the dirty page containing the
on-disk inode X, the page is now locked -> ntfs_write_mst_block() which
clears PageUptodate() on the page to prevent anyone else getting hold of it
whilst it does the write out (this is necessary as the on-disk inode needs
"fixups" applied before the write to disk which are removed again after the
write and PageUptodate is then set again).  It then analyses the page
looking for dirty on-disk inodes and when it finds one it calls
ntfs_may_write_mft_record() to see if it is safe to write this on-disk
inode.  This then calls ilookup5() to check if the corresponding VFS inode
is in icache().  This in turn calls ifind() which waits on the inode lock
via wait_on_inode whilst holding the global inode_lock.

Process 2: pdflush results in a call to __sync_single_inode for the same
VFS inode X on the ntfs volume.  This locks the inode (I_LOCK) then calls
write-inode -> ntfs_write_inode -> map_mft_record() -> read_cache_page() of
the page (in page cache of table of inodes $MFT, inode 0) containing the
on-disk inode.  This page has PageUptodate() clear because of Process 1
(see above) so read_cache_page() blocks when tries to take the page lock
for the page so it can call ntfs_read_page().

Thus Process 1 is holding the page lock on the page containing the on-disk
inode X and it is waiting on the inode X to be unlocked in ifind() so it
can write the page out and then unlock the page.

And Process 2 is holding the inode lock on inode X and is waiting for the
page to be unlocked so it can call ntfs_readpage() or discover that
Process 1 set PageUptodate() again and use the page.

Thus we have a deadlock due to ifind() waiting on the inode lock.

The only sensible solution: NTFS does not care whether the VFS inode is
locked or not when it calls ilookup5() (it doesn't use the VFS inode at
all, it just uses it to find the corresponding ntfs_inode which is of
course attached to the VFS inode (both are one single struct); and it uses
the ntfs_inode which is subject to its own locking so I_LOCK is irrelevant)
hence we want a modified ilookup5_nowait() which is the same as ilookup5()
but it does not wait on the inode lock.

Without such functionality I would have to keep my own ntfs_inode cache in
the NTFS driver just so I can find ntfs_inodes independent of their VFS
inodes which would be slow, memory and cpu cycle wasting, and incredibly
stupid given the icache already exists in the VFS.

Below is a patch that does the ilookup5_nowait() implementation in
fs/inode.c and exports it.

ilookup5_nowait.diff:

Introduce ilookup5_nowait() which is basically the same as ilookup5() but
it does not wait on the inode's lock (i.e. it omits the wait_on_inode()
done in ifind()).

This is needed to avoid a nasty deadlock in NTFS.

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/inode.c         | 45 +++++++++++++++++++++++++++++++++++++++------
 include/linux/fs.h |  3 +++
 2 files changed, 42 insertions(+), 6 deletions(-)

diff --git a/fs/inode.c b/fs/inode.c
index 96364fae0844..e57f1724db3e 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -757,6 +757,7 @@ EXPORT_SYMBOL(igrab);
  * @head:       the head of the list to search
  * @test:	callback used for comparisons between inodes
  * @data:	opaque data pointer to pass to @test
+ * @wait:	if true wait for the inode to be unlocked, if false do not
  *
  * ifind() searches for the inode specified by @data in the inode
  * cache. This is a generalized version of ifind_fast() for file systems where
@@ -771,7 +772,7 @@ EXPORT_SYMBOL(igrab);
  */
 static inline struct inode *ifind(struct super_block *sb,
 		struct hlist_head *head, int (*test)(struct inode *, void *),
-		void *data)
+		void *data, const int wait)
 {
 	struct inode *inode;
 
@@ -780,7 +781,8 @@ static inline struct inode *ifind(struct super_block *sb,
 	if (inode) {
 		__iget(inode);
 		spin_unlock(&inode_lock);
-		wait_on_inode(inode);
+		if (likely(wait))
+			wait_on_inode(inode);
 		return inode;
 	}
 	spin_unlock(&inode_lock);
@@ -820,7 +822,7 @@ static inline struct inode *ifind_fast(struct super_block *sb,
 }
 
 /**
- * ilookup5 - search for an inode in the inode cache
+ * ilookup5_nowait - search for an inode in the inode cache
  * @sb:		super block of file system to search
  * @hashval:	hash value (usually inode number) to search for
  * @test:	callback used for comparisons between inodes
@@ -832,7 +834,38 @@ static inline struct inode *ifind_fast(struct super_block *sb,
  * identification of an inode.
  *
  * If the inode is in the cache, the inode is returned with an incremented
- * reference count.
+ * reference count.  Note, the inode lock is not waited upon so you have to be
+ * very careful what you do with the returned inode.  You probably should be
+ * using ilookup5() instead.
+ *
+ * Otherwise NULL is returned.
+ *
+ * Note, @test is called with the inode_lock held, so can't sleep.
+ */
+struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
+		int (*test)(struct inode *, void *), void *data)
+{
+	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
+
+	return ifind(sb, head, test, data, 0);
+}
+
+EXPORT_SYMBOL(ilookup5_nowait);
+
+/**
+ * ilookup5 - search for an inode in the inode cache
+ * @sb:		super block of file system to search
+ * @hashval:	hash value (usually inode number) to search for
+ * @test:	callback used for comparisons between inodes
+ * @data:	opaque data pointer to pass to @test
+ *
+ * ilookup5() uses ifind() to search for the inode specified by @hashval and
+ * @data in the inode cache. This is a generalized version of ilookup() for
+ * file systems where the inode number is not sufficient for unique
+ * identification of an inode.
+ *
+ * If the inode is in the cache, the inode lock is waited upon and the inode is
+ * returned with an incremented reference count.
  *
  * Otherwise NULL is returned.
  *
@@ -843,7 +876,7 @@ struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
 {
 	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
 
-	return ifind(sb, head, test, data);
+	return ifind(sb, head, test, data, 1);
 }
 
 EXPORT_SYMBOL(ilookup5);
@@ -900,7 +933,7 @@ struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
 	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
 	struct inode *inode;
 
-	inode = ifind(sb, head, test, data);
+	inode = ifind(sb, head, test, data, 1);
 	if (inode)
 		return inode;
 	/*
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c9bf3746a9fb..0f53e0124941 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1441,6 +1441,9 @@ extern int inode_needs_sync(struct inode *inode);
 extern void generic_delete_inode(struct inode *inode);
 extern void generic_drop_inode(struct inode *inode);
 
+extern struct inode *ilookup5_nowait(struct super_block *sb,
+		unsigned long hashval, int (*test)(struct inode *, void *),
+		void *data);
 extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
 		int (*test)(struct inode *, void *), void *data);
 extern struct inode *ilookup(struct super_block *sb, unsigned long ino);

From ddca3b80cef36cc668f924ef5154a79acb19ebd7 Mon Sep 17 00:00:00 2001
From: Guillaume Autran <gautran@mrv.com>
Date: Wed, 13 Jul 2005 01:10:45 -0700
Subject: [PATCH 072/113] [PATCH] ppc32: fix destroy_context() race condition

Fix for a race condition when a task gets preempted by another task while
executing the destroy_context(...) in a FEW_CONTEXTS environment.
mm->context == NO_CONTEXT but the context_map may indicate all contexts are
in use.

The solution to this problem is to disable kernel preemption while
destroying a MMU context.

Signed-off-by: Guillaume Autran <gautran@mrv.com>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-ppc/mmu_context.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/asm-ppc/mmu_context.h b/include/asm-ppc/mmu_context.h
index ccabbce39d85..afe26ffc2e2d 100644
--- a/include/asm-ppc/mmu_context.h
+++ b/include/asm-ppc/mmu_context.h
@@ -149,6 +149,7 @@ static inline void get_mmu_context(struct mm_struct *mm)
  */
 static inline void destroy_context(struct mm_struct *mm)
 {
+	preempt_disable();
 	if (mm->context != NO_CONTEXT) {
 		clear_bit(mm->context, context_map);
 		mm->context = NO_CONTEXT;
@@ -156,6 +157,7 @@ static inline void destroy_context(struct mm_struct *mm)
 		atomic_inc(&nr_free_contexts);
 #endif
 	}
+	preempt_enable();
 }
 
 static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,

From 068e1b94bbd268f375349f68531829c8b7c210bc Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 13 Jul 2005 01:10:46 -0700
Subject: [PATCH 073/113] [PATCH] s390: fadvise hint values.

Add special case for the POSIX_FADV_DONTNEED and POSIX_FADV_NOREUSE hint
values for s390-64.  The user space values in the s390-64 glibc headers for
these two defines have always been 6 and 7 instead of 4 and 5.  All 64 bit
applications therefore use the "wrong" values.  To get these applications
working without recompiling the kernel needs to accept the "wrong" values.
Since the values for s390-31 are 4 and 5 the compat wrapper for fadvise64
and fadvise64_64 need to rewrite the values for 31 bit system calls.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/s390/kernel/compat_linux.c   | 38 +++++++++++++++++++++++++++++++
 arch/s390/kernel/compat_wrapper.S |  4 ++--
 include/linux/fadvise.h           | 10 ++++++++
 3 files changed, 50 insertions(+), 2 deletions(-)

diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index 614056222875..18610cea03a2 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -58,6 +58,7 @@
 #include <linux/compat.h>
 #include <linux/vfs.h>
 #include <linux/ptrace.h>
+#include <linux/fadvise.h>
 
 #include <asm/types.h>
 #include <asm/ipc.h>
@@ -1043,3 +1044,40 @@ sys32_timer_create(clockid_t which_clock, struct compat_sigevent *se32,
 
 	return ret;
 }
+
+/*
+ * 31 bit emulation wrapper functions for sys_fadvise64/fadvise64_64.
+ * These need to rewrite the advise values for POSIX_FADV_{DONTNEED,NOREUSE}
+ * because the 31 bit values differ from the 64 bit values.
+ */
+
+asmlinkage long
+sys32_fadvise64(int fd, loff_t offset, size_t len, int advise)
+{
+	if (advise == 4)
+		advise = POSIX_FADV_DONTNEED;
+	else if (advise == 5)
+		advise = POSIX_FADV_NOREUSE;
+	return sys_fadvise64(fd, offset, len, advise);
+}
+
+struct fadvise64_64_args {
+	int fd;
+	long long offset;
+	long long len;
+	int advice;
+};
+
+asmlinkage long
+sys32_fadvise64_64(struct fadvise64_64_args __user *args)
+{
+	struct fadvise64_64_args a;
+
+	if ( copy_from_user(&a, args, sizeof(a)) )
+		return -EFAULT;
+	if (a.advice == 4)
+		a.advice = POSIX_FADV_DONTNEED;
+	else if (a.advice == 5)
+		a.advice = POSIX_FADV_NOREUSE;
+	return sys_fadvise64_64(a.fd, a.offset, a.len, a.advice);
+}
diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
index bf529739c8ab..799a98eac92d 100644
--- a/arch/s390/kernel/compat_wrapper.S
+++ b/arch/s390/kernel/compat_wrapper.S
@@ -1251,12 +1251,12 @@ sys32_fadvise64_wrapper:
 	or	%r3,%r4			# get low word of 64bit loff_t
 	llgfr	%r4,%r5			# size_t (unsigned long)
 	lgfr	%r5,%r6			# int
-	jg	sys_fadvise64
+	jg	sys32_fadvise64
 
 	.globl	sys32_fadvise64_64_wrapper
 sys32_fadvise64_64_wrapper:
 	llgtr	%r2,%r2			# struct fadvise64_64_args *
-	jg	s390_fadvise64_64
+	jg	sys32_fadvise64_64
 
 	.globl	sys32_clock_settime_wrapper
 sys32_clock_settime_wrapper:
diff --git a/include/linux/fadvise.h b/include/linux/fadvise.h
index 6fc656dfb93d..e8e747139b9a 100644
--- a/include/linux/fadvise.h
+++ b/include/linux/fadvise.h
@@ -5,7 +5,17 @@
 #define POSIX_FADV_RANDOM	1 /* Expect random page references.  */
 #define POSIX_FADV_SEQUENTIAL	2 /* Expect sequential page references.  */
 #define POSIX_FADV_WILLNEED	3 /* Will need these pages.  */
+
+/*
+ * The advise values for POSIX_FADV_DONTNEED and POSIX_ADV_NOREUSE
+ * for s390-64 differ from the values for the rest of the world.
+ */
+#if defined(__s390x__)
+#define POSIX_FADV_DONTNEED	6 /* Don't need these pages.  */
+#define POSIX_FADV_NOREUSE	7 /* Data will be accessed once.  */
+#else
 #define POSIX_FADV_DONTNEED	4 /* Don't need these pages.  */
 #define POSIX_FADV_NOREUSE	5 /* Data will be accessed once.  */
+#endif
 
 #endif	/* FADVISE_H_INCLUDED */

From 7ee91ec14bd4def036b4084da29869382078e44b Mon Sep 17 00:00:00 2001
From: Steve Dickson <SteveD@redhat.com>
Date: Wed, 13 Jul 2005 01:10:47 -0700
Subject: [PATCH 074/113] [PATCH] NFS: procfs/sysctl interfaces for lockd do
 not work on x86_64

Allow the setting of NLM timeouts and grace periods through the proc and
sysclt interfaces on x86_64 architectures

Signed-off-by: Steve Dickson <steved@redhat.com>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/lockd/svc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 6e242556b903..12a857c29e25 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -331,7 +331,7 @@ static ctl_table nlm_sysctls[] = {
 		.ctl_name	= CTL_UNNUMBERED,
 		.procname	= "nlm_grace_period",
 		.data		= &nlm_grace_period,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
 		.proc_handler	= &proc_doulongvec_minmax,
 		.extra1		= (unsigned long *) &nlm_grace_period_min,
@@ -341,7 +341,7 @@ static ctl_table nlm_sysctls[] = {
 		.ctl_name	= CTL_UNNUMBERED,
 		.procname	= "nlm_timeout",
 		.data		= &nlm_timeout,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
 		.proc_handler	= &proc_doulongvec_minmax,
 		.extra1		= (unsigned long *) &nlm_timeout_min,

From f13487c66c75f5db004a0631047309d9e7c5aab7 Mon Sep 17 00:00:00 2001
From: Olaf Hering <olh@suse.de>
Date: Wed, 13 Jul 2005 01:11:41 -0700
Subject: [PATCH 075/113] [PATCH] ppc32: make -j12 all fails in uImage target

make -j zImage may call if_changed twice at the same time, the result is a
corrupted vmlinux.gz

Write to a temporary file for the time being until someone with make skills
fix the serialization properly.

Signed-off-by: Olaf Hering <olh@suse.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Sam Ravnborg <sam@ravnborg.org>
Acked-by: Tom Rini <trini@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/ppc/boot/images/Makefile | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/ppc/boot/images/Makefile b/arch/ppc/boot/images/Makefile
index c9ac5f5fa9e4..532e7ef1edb6 100644
--- a/arch/ppc/boot/images/Makefile
+++ b/arch/ppc/boot/images/Makefile
@@ -6,12 +6,17 @@ MKIMAGE		:= $(srctree)/scripts/mkuboot.sh
 
 extra-y		:= vmlinux.bin vmlinux.gz
 
+# two make processes may write to vmlinux.gz at the same time with make -j
+quiet_cmd_mygzip = GZIP    $@
+cmd_mygzip = gzip -f -9 < $< > $@.$$$$ && mv $@.$$$$ $@
+
+
 OBJCOPYFLAGS_vmlinux.bin := -O binary
 $(obj)/vmlinux.bin: vmlinux FORCE
 	$(call if_changed,objcopy)
 
 $(obj)/vmlinux.gz: $(obj)/vmlinux.bin FORCE
-	$(call if_changed,gzip)
+	$(call if_changed,mygzip)
 
 quiet_cmd_uimage = UIMAGE  $@
       cmd_uimage = $(CONFIG_SHELL) $(MKIMAGE) -A ppc -O linux -T kernel \

From 96e2844999f99878fc5b03b81ccaa60580005b81 Mon Sep 17 00:00:00 2001
From: David Gibson <david@gibson.dropbear.id.au>
Date: Wed, 13 Jul 2005 01:11:42 -0700
Subject: [PATCH 076/113] [PATCH] ppc64: kill bitfields in ppc64 hash code

This patch removes the use of bitfield types from the ppc64 hash table
manipulation code.

Signed-off-by: David Gibson <dwg@au1.ibm.com>
Acked-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/ppc64/kernel/iSeries_htab.c      |  51 +++++-----
 arch/ppc64/kernel/iSeries_setup.c     |  18 ++--
 arch/ppc64/kernel/pSeries_lpar.c      |  47 ++++------
 arch/ppc64/mm/hash_low.S              |   8 +-
 arch/ppc64/mm/hash_native.c           | 129 ++++++++++++--------------
 arch/ppc64/mm/hash_utils.c            |  16 ++--
 arch/ppc64/mm/hugetlbpage.c           |  16 ++--
 arch/ppc64/mm/init.c                  |   7 +-
 include/asm-ppc64/iSeries/HvCallHpt.h |  11 +--
 include/asm-ppc64/machdep.h           |   6 +-
 include/asm-ppc64/mmu.h               |  83 ++++++-----------
 11 files changed, 164 insertions(+), 228 deletions(-)

diff --git a/arch/ppc64/kernel/iSeries_htab.c b/arch/ppc64/kernel/iSeries_htab.c
index aa9e8fdd1a4f..b0250ae4a72a 100644
--- a/arch/ppc64/kernel/iSeries_htab.c
+++ b/arch/ppc64/kernel/iSeries_htab.c
@@ -38,11 +38,12 @@ static inline void iSeries_hunlock(unsigned long slot)
 }
 
 static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va,
-			 unsigned long prpn, int secondary,
-			 unsigned long hpteflags, int bolted, int large)
+				unsigned long prpn, unsigned long vflags,
+				unsigned long rflags)
 {
 	long slot;
-	HPTE lhpte;
+	hpte_t lhpte;
+	int secondary = 0;
 
 	/*
 	 * The hypervisor tries both primary and secondary.
@@ -50,13 +51,13 @@ static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va,
 	 * it means we have already tried both primary and secondary,
 	 * so we return failure immediately.
 	 */
-	if (secondary)
+	if (vflags & HPTE_V_SECONDARY)
 		return -1;
 
 	iSeries_hlock(hpte_group);
 
 	slot = HvCallHpt_findValid(&lhpte, va >> PAGE_SHIFT);
-	BUG_ON(lhpte.dw0.dw0.v);
+	BUG_ON(lhpte.v & HPTE_V_VALID);
 
 	if (slot == -1)	{ /* No available entry found in either group */
 		iSeries_hunlock(hpte_group);
@@ -64,19 +65,13 @@ static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va,
 	}
 
 	if (slot < 0) {		/* MSB set means secondary group */
+		vflags |= HPTE_V_VALID;
 		secondary = 1;
 		slot &= 0x7fffffffffffffff;
 	}
 
-	lhpte.dw1.dword1      = 0;
-	lhpte.dw1.dw1.rpn     = physRpn_to_absRpn(prpn);
-	lhpte.dw1.flags.flags = hpteflags;
-
-	lhpte.dw0.dword0      = 0;
-	lhpte.dw0.dw0.avpn    = va >> 23;
-	lhpte.dw0.dw0.h       = secondary;
-	lhpte.dw0.dw0.bolted  = bolted;
-	lhpte.dw0.dw0.v       = 1;
+	lhpte.v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID;
+	lhpte.r = (physRpn_to_absRpn(prpn) << HPTE_R_RPN_SHIFT) | rflags;
 
 	/* Now fill in the actual HPTE */
 	HvCallHpt_addValidate(slot, secondary, &lhpte);
@@ -88,20 +83,17 @@ static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va,
 
 static unsigned long iSeries_hpte_getword0(unsigned long slot)
 {
-	unsigned long dword0;
-	HPTE hpte;
+	hpte_t hpte;
 
 	HvCallHpt_get(&hpte, slot);
-	dword0 = hpte.dw0.dword0;
-
-	return dword0;
+	return hpte.v;
 }
 
 static long iSeries_hpte_remove(unsigned long hpte_group)
 {
 	unsigned long slot_offset;
 	int i;
-	HPTE lhpte;
+	unsigned long hpte_v;
 
 	/* Pick a random slot to start at */
 	slot_offset = mftb() & 0x7;
@@ -109,10 +101,9 @@ static long iSeries_hpte_remove(unsigned long hpte_group)
 	iSeries_hlock(hpte_group);
 
 	for (i = 0; i < HPTES_PER_GROUP; i++) {
-		lhpte.dw0.dword0 = 
-			iSeries_hpte_getword0(hpte_group + slot_offset);
+		hpte_v = iSeries_hpte_getword0(hpte_group + slot_offset);
 
-		if (!lhpte.dw0.dw0.bolted) {
+		if (! (hpte_v & HPTE_V_BOLTED)) {
 			HvCallHpt_invalidateSetSwBitsGet(hpte_group + 
 							 slot_offset, 0, 0);
 			iSeries_hunlock(hpte_group);
@@ -137,13 +128,13 @@ static long iSeries_hpte_remove(unsigned long hpte_group)
 static long iSeries_hpte_updatepp(unsigned long slot, unsigned long newpp,
 				  unsigned long va, int large, int local)
 {
-	HPTE hpte;
+	hpte_t hpte;
 	unsigned long avpn = va >> 23;
 
 	iSeries_hlock(slot);
 
 	HvCallHpt_get(&hpte, slot);
-	if ((hpte.dw0.dw0.avpn == avpn) && (hpte.dw0.dw0.v)) {
+	if ((HPTE_V_AVPN_VAL(hpte.v) == avpn) && (hpte.v & HPTE_V_VALID)) {
 		/*
 		 * Hypervisor expects bits as NPPP, which is
 		 * different from how they are mapped in our PP.
@@ -167,7 +158,7 @@ static long iSeries_hpte_updatepp(unsigned long slot, unsigned long newpp,
  */
 static long iSeries_hpte_find(unsigned long vpn)
 {
-	HPTE hpte;
+	hpte_t hpte;
 	long slot;
 
 	/*
@@ -177,7 +168,7 @@ static long iSeries_hpte_find(unsigned long vpn)
 	 * 0x80000000xxxxxxxx : Entry found in secondary group, slot x
 	 */
 	slot = HvCallHpt_findValid(&hpte, vpn); 
-	if (hpte.dw0.dw0.v) {
+	if (hpte.v & HPTE_V_VALID) {
 		if (slot < 0) {
 			slot &= 0x7fffffffffffffff;
 			slot = -slot;
@@ -212,7 +203,7 @@ static void iSeries_hpte_updateboltedpp(unsigned long newpp, unsigned long ea)
 static void iSeries_hpte_invalidate(unsigned long slot, unsigned long va,
 				    int large, int local)
 {
-	HPTE lhpte;
+	unsigned long hpte_v;
 	unsigned long avpn = va >> 23;
 	unsigned long flags;
 
@@ -220,9 +211,9 @@ static void iSeries_hpte_invalidate(unsigned long slot, unsigned long va,
 
 	iSeries_hlock(slot);
 
-	lhpte.dw0.dword0 = iSeries_hpte_getword0(slot);
+	hpte_v = iSeries_hpte_getword0(slot);
 	
-	if ((lhpte.dw0.dw0.avpn == avpn) && lhpte.dw0.dw0.v)
+	if ((HPTE_V_AVPN_VAL(hpte_v) == avpn) && (hpte_v & HPTE_V_VALID))
 		HvCallHpt_invalidateSetSwBitsGet(slot, 0, 0);
 
 	iSeries_hunlock(slot);
diff --git a/arch/ppc64/kernel/iSeries_setup.c b/arch/ppc64/kernel/iSeries_setup.c
index 077c82fc9f3a..a649edbb23b6 100644
--- a/arch/ppc64/kernel/iSeries_setup.c
+++ b/arch/ppc64/kernel/iSeries_setup.c
@@ -503,7 +503,7 @@ static void __init build_iSeries_Memory_Map(void)
 
 	/* Fill in the hashed page table hash mask */
 	num_ptegs = hptSizePages *
-		(PAGE_SIZE / (sizeof(HPTE) * HPTES_PER_GROUP));
+		(PAGE_SIZE / (sizeof(hpte_t) * HPTES_PER_GROUP));
 	htab_hash_mask = num_ptegs - 1;
 
 	/*
@@ -618,25 +618,23 @@ static void __init setup_iSeries_cache_sizes(void)
 static void iSeries_make_pte(unsigned long va, unsigned long pa,
 			     int mode)
 {
-	HPTE local_hpte, rhpte;
+	hpte_t local_hpte, rhpte;
 	unsigned long hash, vpn;
 	long slot;
 
 	vpn = va >> PAGE_SHIFT;
 	hash = hpt_hash(vpn, 0);
 
-	local_hpte.dw1.dword1 = pa | mode;
-	local_hpte.dw0.dword0 = 0;
-	local_hpte.dw0.dw0.avpn = va >> 23;
-	local_hpte.dw0.dw0.bolted = 1;		/* bolted */
-	local_hpte.dw0.dw0.v = 1;
+	local_hpte.r = pa | mode;
+	local_hpte.v = ((va >> 23) << HPTE_V_AVPN_SHIFT)
+		| HPTE_V_BOLTED | HPTE_V_VALID;
 
 	slot = HvCallHpt_findValid(&rhpte, vpn);
 	if (slot < 0) {
 		/* Must find space in primary group */
 		panic("hash_page: hpte already exists\n");
 	}
-	HvCallHpt_addValidate(slot, 0, (HPTE *)&local_hpte );
+	HvCallHpt_addValidate(slot, 0, &local_hpte);
 }
 
 /*
@@ -646,7 +644,7 @@ static void __init iSeries_bolt_kernel(unsigned long saddr, unsigned long eaddr)
 {
 	unsigned long pa;
 	unsigned long mode_rw = _PAGE_ACCESSED | _PAGE_COHERENT | PP_RWXX;
-	HPTE hpte;
+	hpte_t hpte;
 
 	for (pa = saddr; pa < eaddr ;pa += PAGE_SIZE) {
 		unsigned long ea = (unsigned long)__va(pa);
@@ -659,7 +657,7 @@ static void __init iSeries_bolt_kernel(unsigned long saddr, unsigned long eaddr)
 		if (!in_kernel_text(ea))
 			mode_rw |= HW_NO_EXEC;
 
-		if (hpte.dw0.dw0.v) {
+		if (hpte.v & HPTE_V_VALID) {
 			/* HPTE exists, so just bolt it */
 			HvCallHpt_setSwBits(slot, 0x10, 0);
 			/* And make sure the pp bits are correct */
diff --git a/arch/ppc64/kernel/pSeries_lpar.c b/arch/ppc64/kernel/pSeries_lpar.c
index 6534812db437..74dd144dcce8 100644
--- a/arch/ppc64/kernel/pSeries_lpar.c
+++ b/arch/ppc64/kernel/pSeries_lpar.c
@@ -277,31 +277,20 @@ void vpa_init(int cpu)
 
 long pSeries_lpar_hpte_insert(unsigned long hpte_group,
 			      unsigned long va, unsigned long prpn,
-			      int secondary, unsigned long hpteflags,
-			      int bolted, int large)
+			      unsigned long vflags, unsigned long rflags)
 {
 	unsigned long arpn = physRpn_to_absRpn(prpn);
 	unsigned long lpar_rc;
 	unsigned long flags;
 	unsigned long slot;
-	HPTE lhpte;
+	unsigned long hpte_v, hpte_r;
 	unsigned long dummy0, dummy1;
 
-	/* Fill in the local HPTE with absolute rpn, avpn and flags */
-	lhpte.dw1.dword1      = 0;
-	lhpte.dw1.dw1.rpn     = arpn;
-	lhpte.dw1.flags.flags = hpteflags;
+	hpte_v = ((va >> 23) << HPTE_V_AVPN_SHIFT) | vflags | HPTE_V_VALID;
+	if (vflags & HPTE_V_LARGE)
+		hpte_v &= ~(1UL << HPTE_V_AVPN_SHIFT);
 
-	lhpte.dw0.dword0      = 0;
-	lhpte.dw0.dw0.avpn    = va >> 23;
-	lhpte.dw0.dw0.h       = secondary;
-	lhpte.dw0.dw0.bolted  = bolted;
-	lhpte.dw0.dw0.v       = 1;
-
-	if (large) {
-		lhpte.dw0.dw0.l = 1;
-		lhpte.dw0.dw0.avpn &= ~0x1UL;
-	}
+	hpte_r = (arpn << HPTE_R_RPN_SHIFT) | rflags;
 
 	/* Now fill in the actual HPTE */
 	/* Set CEC cookie to 0         */
@@ -312,11 +301,11 @@ long pSeries_lpar_hpte_insert(unsigned long hpte_group,
 	flags = 0;
 
 	/* XXX why is this here? - Anton */
-	if (hpteflags & (_PAGE_GUARDED|_PAGE_NO_CACHE))
-		lhpte.dw1.flags.flags &= ~_PAGE_COHERENT;
+	if (rflags & (_PAGE_GUARDED|_PAGE_NO_CACHE))
+		hpte_r &= ~_PAGE_COHERENT;
 
-	lpar_rc = plpar_hcall(H_ENTER, flags, hpte_group, lhpte.dw0.dword0,
-			      lhpte.dw1.dword1, &slot, &dummy0, &dummy1);
+	lpar_rc = plpar_hcall(H_ENTER, flags, hpte_group, hpte_v,
+			      hpte_r, &slot, &dummy0, &dummy1);
 
 	if (unlikely(lpar_rc == H_PTEG_Full))
 		return -1;
@@ -332,7 +321,7 @@ long pSeries_lpar_hpte_insert(unsigned long hpte_group,
 	/* Because of iSeries, we have to pass down the secondary
 	 * bucket bit here as well
 	 */
-	return (slot & 7) | (secondary << 3);
+	return (slot & 7) | (!!(vflags & HPTE_V_SECONDARY) << 3);
 }
 
 static DEFINE_SPINLOCK(pSeries_lpar_tlbie_lock);
@@ -427,22 +416,18 @@ static long pSeries_lpar_hpte_find(unsigned long vpn)
 	unsigned long hash;
 	unsigned long i, j;
 	long slot;
-	union {
-		unsigned long dword0;
-		Hpte_dword0 dw0;
-	} hpte_dw0;
-	Hpte_dword0 dw0;
+	unsigned long hpte_v;
 
 	hash = hpt_hash(vpn, 0);
 
 	for (j = 0; j < 2; j++) {
 		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
 		for (i = 0; i < HPTES_PER_GROUP; i++) {
-			hpte_dw0.dword0 = pSeries_lpar_hpte_getword0(slot);
-			dw0 = hpte_dw0.dw0;
+			hpte_v = pSeries_lpar_hpte_getword0(slot);
 
-			if ((dw0.avpn == (vpn >> 11)) && dw0.v &&
-			    (dw0.h == j)) {
+			if ((HPTE_V_AVPN_VAL(hpte_v) == (vpn >> 11))
+			    && (hpte_v & HPTE_V_VALID)
+			    && (!!(hpte_v & HPTE_V_SECONDARY) == j)) {
 				/* HPTE matches */
 				if (j)
 					slot = -slot;
diff --git a/arch/ppc64/mm/hash_low.S b/arch/ppc64/mm/hash_low.S
index c23d46956dd9..fbff24827ae7 100644
--- a/arch/ppc64/mm/hash_low.S
+++ b/arch/ppc64/mm/hash_low.S
@@ -170,9 +170,7 @@ htab_insert_pte:
 	/* Call ppc_md.hpte_insert */
 	ld	r7,STK_PARM(r4)(r1)	/* Retreive new pp bits */
 	mr	r4,r29			/* Retreive va */
-	li	r6,0			/* primary slot */
-	li	r8,0			/* not bolted and not large */
-	li	r9,0
+	li	r6,0			/* no vflags */
 _GLOBAL(htab_call_hpte_insert1)
 	bl	.			/* Will be patched by htab_finish_init() */
 	cmpdi	0,r3,0
@@ -192,9 +190,7 @@ _GLOBAL(htab_call_hpte_insert1)
 	/* Call ppc_md.hpte_insert */
 	ld	r7,STK_PARM(r4)(r1)	/* Retreive new pp bits */
 	mr	r4,r29			/* Retreive va */
-	li	r6,1			/* secondary slot */
-	li	r8,0			/* not bolted and not large */
-	li	r9,0
+	li	r6,HPTE_V_SECONDARY@l	/* secondary slot */
 _GLOBAL(htab_call_hpte_insert2)
 	bl	.			/* Will be patched by htab_finish_init() */
 	cmpdi	0,r3,0
diff --git a/arch/ppc64/mm/hash_native.c b/arch/ppc64/mm/hash_native.c
index 4fec05817d66..a6abd3a979bf 100644
--- a/arch/ppc64/mm/hash_native.c
+++ b/arch/ppc64/mm/hash_native.c
@@ -27,9 +27,9 @@
 
 static DEFINE_SPINLOCK(native_tlbie_lock);
 
-static inline void native_lock_hpte(HPTE *hptep)
+static inline void native_lock_hpte(hpte_t *hptep)
 {
-	unsigned long *word = &hptep->dw0.dword0;
+	unsigned long *word = &hptep->v;
 
 	while (1) {
 		if (!test_and_set_bit(HPTE_LOCK_BIT, word))
@@ -39,32 +39,28 @@ static inline void native_lock_hpte(HPTE *hptep)
 	}
 }
 
-static inline void native_unlock_hpte(HPTE *hptep)
+static inline void native_unlock_hpte(hpte_t *hptep)
 {
-	unsigned long *word = &hptep->dw0.dword0;
+	unsigned long *word = &hptep->v;
 
 	asm volatile("lwsync":::"memory");
 	clear_bit(HPTE_LOCK_BIT, word);
 }
 
 long native_hpte_insert(unsigned long hpte_group, unsigned long va,
-			unsigned long prpn, int secondary,
-			unsigned long hpteflags, int bolted, int large)
+			unsigned long prpn, unsigned long vflags,
+			unsigned long rflags)
 {
 	unsigned long arpn = physRpn_to_absRpn(prpn);
-	HPTE *hptep = htab_address + hpte_group;
-	Hpte_dword0 dw0;
-	HPTE lhpte;
+	hpte_t *hptep = htab_address + hpte_group;
+	unsigned long hpte_v, hpte_r;
 	int i;
 
 	for (i = 0; i < HPTES_PER_GROUP; i++) {
-		dw0 = hptep->dw0.dw0;
-
-		if (!dw0.v) {
+		if (! (hptep->v & HPTE_V_VALID)) {
 			/* retry with lock held */
 			native_lock_hpte(hptep);
-			dw0 = hptep->dw0.dw0;
-			if (!dw0.v)
+			if (! (hptep->v & HPTE_V_VALID))
 				break;
 			native_unlock_hpte(hptep);
 		}
@@ -75,56 +71,45 @@ long native_hpte_insert(unsigned long hpte_group, unsigned long va,
 	if (i == HPTES_PER_GROUP)
 		return -1;
 
-	lhpte.dw1.dword1      = 0;
-	lhpte.dw1.dw1.rpn     = arpn;
-	lhpte.dw1.flags.flags = hpteflags;
-
-	lhpte.dw0.dword0      = 0;
-	lhpte.dw0.dw0.avpn    = va >> 23;
-	lhpte.dw0.dw0.h       = secondary;
-	lhpte.dw0.dw0.bolted  = bolted;
-	lhpte.dw0.dw0.v       = 1;
-
-	if (large) {
-		lhpte.dw0.dw0.l = 1;
-		lhpte.dw0.dw0.avpn &= ~0x1UL;
-	}
-
-	hptep->dw1.dword1 = lhpte.dw1.dword1;
+	hpte_v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID;
+	if (vflags & HPTE_V_LARGE)
+		va &= ~(1UL << HPTE_V_AVPN_SHIFT);
+	hpte_r = (arpn << HPTE_R_RPN_SHIFT) | rflags;
 
+	hptep->r = hpte_r;
 	/* Guarantee the second dword is visible before the valid bit */
 	__asm__ __volatile__ ("eieio" : : : "memory");
-
 	/*
 	 * Now set the first dword including the valid bit
 	 * NOTE: this also unlocks the hpte
 	 */
-	hptep->dw0.dword0 = lhpte.dw0.dword0;
+	hptep->v = hpte_v;
 
 	__asm__ __volatile__ ("ptesync" : : : "memory");
 
-	return i | (secondary << 3);
+	return i | (!!(vflags & HPTE_V_SECONDARY) << 3);
 }
 
 static long native_hpte_remove(unsigned long hpte_group)
 {
-	HPTE *hptep;
-	Hpte_dword0 dw0;
+	hpte_t *hptep;
 	int i;
 	int slot_offset;
+	unsigned long hpte_v;
 
 	/* pick a random entry to start at */
 	slot_offset = mftb() & 0x7;
 
 	for (i = 0; i < HPTES_PER_GROUP; i++) {
 		hptep = htab_address + hpte_group + slot_offset;
-		dw0 = hptep->dw0.dw0;
+		hpte_v = hptep->v;
 
-		if (dw0.v && !dw0.bolted) {
+		if ((hpte_v & HPTE_V_VALID) && !(hpte_v & HPTE_V_BOLTED)) {
 			/* retry with lock held */
 			native_lock_hpte(hptep);
-			dw0 = hptep->dw0.dw0;
-			if (dw0.v && !dw0.bolted)
+			hpte_v = hptep->v;
+			if ((hpte_v & HPTE_V_VALID)
+			    && !(hpte_v & HPTE_V_BOLTED))
 				break;
 			native_unlock_hpte(hptep);
 		}
@@ -137,15 +122,15 @@ static long native_hpte_remove(unsigned long hpte_group)
 		return -1;
 
 	/* Invalidate the hpte. NOTE: this also unlocks it */
-	hptep->dw0.dword0 = 0;
+	hptep->v = 0;
 
 	return i;
 }
 
-static inline void set_pp_bit(unsigned long pp, HPTE *addr)
+static inline void set_pp_bit(unsigned long pp, hpte_t *addr)
 {
 	unsigned long old;
-	unsigned long *p = &addr->dw1.dword1;
+	unsigned long *p = &addr->r;
 
 	__asm__ __volatile__(
 	"1:	ldarx	%0,0,%3\n\
@@ -163,11 +148,11 @@ static inline void set_pp_bit(unsigned long pp, HPTE *addr)
  */
 static long native_hpte_find(unsigned long vpn)
 {
-	HPTE *hptep;
+	hpte_t *hptep;
 	unsigned long hash;
 	unsigned long i, j;
 	long slot;
-	Hpte_dword0 dw0;
+	unsigned long hpte_v;
 
 	hash = hpt_hash(vpn, 0);
 
@@ -175,10 +160,11 @@ static long native_hpte_find(unsigned long vpn)
 		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
 		for (i = 0; i < HPTES_PER_GROUP; i++) {
 			hptep = htab_address + slot;
-			dw0 = hptep->dw0.dw0;
+			hpte_v = hptep->v;
 
-			if ((dw0.avpn == (vpn >> 11)) && dw0.v &&
-			    (dw0.h == j)) {
+			if ((HPTE_V_AVPN_VAL(hpte_v) == (vpn >> 11))
+			    && (hpte_v & HPTE_V_VALID)
+			    && ( !!(hpte_v & HPTE_V_SECONDARY) == j)) {
 				/* HPTE matches */
 				if (j)
 					slot = -slot;
@@ -195,20 +181,21 @@ static long native_hpte_find(unsigned long vpn)
 static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
 				 unsigned long va, int large, int local)
 {
-	HPTE *hptep = htab_address + slot;
-	Hpte_dword0 dw0;
+	hpte_t *hptep = htab_address + slot;
+	unsigned long hpte_v;
 	unsigned long avpn = va >> 23;
 	int ret = 0;
 
 	if (large)
-		avpn &= ~0x1UL;
+		avpn &= ~1;
 
 	native_lock_hpte(hptep);
 
-	dw0 = hptep->dw0.dw0;
+	hpte_v = hptep->v;
 
 	/* Even if we miss, we need to invalidate the TLB */
-	if ((dw0.avpn != avpn) || !dw0.v) {
+	if ((HPTE_V_AVPN_VAL(hpte_v) != avpn)
+	    || !(hpte_v & HPTE_V_VALID)) {
 		native_unlock_hpte(hptep);
 		ret = -1;
 	} else {
@@ -244,7 +231,7 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea)
 {
 	unsigned long vsid, va, vpn, flags = 0;
 	long slot;
-	HPTE *hptep;
+	hpte_t *hptep;
 	int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
 
 	vsid = get_kernel_vsid(ea);
@@ -269,26 +256,27 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea)
 static void native_hpte_invalidate(unsigned long slot, unsigned long va,
 				    int large, int local)
 {
-	HPTE *hptep = htab_address + slot;
-	Hpte_dword0 dw0;
+	hpte_t *hptep = htab_address + slot;
+	unsigned long hpte_v;
 	unsigned long avpn = va >> 23;
 	unsigned long flags;
 	int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
 
 	if (large)
-		avpn &= ~0x1UL;
+		avpn &= ~1;
 
 	local_irq_save(flags);
 	native_lock_hpte(hptep);
 
-	dw0 = hptep->dw0.dw0;
+	hpte_v = hptep->v;
 
 	/* Even if we miss, we need to invalidate the TLB */
-	if ((dw0.avpn != avpn) || !dw0.v) {
+	if ((HPTE_V_AVPN_VAL(hpte_v) != avpn)
+	    || !(hpte_v & HPTE_V_VALID)) {
 		native_unlock_hpte(hptep);
 	} else {
 		/* Invalidate the hpte. NOTE: this also unlocks it */
-		hptep->dw0.dword0 = 0;
+		hptep->v = 0;
 	}
 
 	/* Invalidate the tlb */
@@ -315,8 +303,8 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long va,
 static void native_hpte_clear(void)
 {
 	unsigned long slot, slots, flags;
-	HPTE *hptep = htab_address;
-	Hpte_dword0 dw0;
+	hpte_t *hptep = htab_address;
+	unsigned long hpte_v;
 	unsigned long pteg_count;
 
 	pteg_count = htab_hash_mask + 1;
@@ -336,11 +324,11 @@ static void native_hpte_clear(void)
 		 * running,  right?  and for crash dump, we probably
 		 * don't want to wait for a maybe bad cpu.
 		 */
-		dw0 = hptep->dw0.dw0;
+		hpte_v = hptep->v;
 
-		if (dw0.v) {
-			hptep->dw0.dword0 = 0;
-			tlbie(slot2va(dw0.avpn, dw0.l, dw0.h, slot), dw0.l);
+		if (hpte_v & HPTE_V_VALID) {
+			hptep->v = 0;
+			tlbie(slot2va(hpte_v, slot), hpte_v & HPTE_V_LARGE);
 		}
 	}
 
@@ -353,8 +341,8 @@ static void native_flush_hash_range(unsigned long context,
 {
 	unsigned long vsid, vpn, va, hash, secondary, slot, flags, avpn;
 	int i, j;
-	HPTE *hptep;
-	Hpte_dword0 dw0;
+	hpte_t *hptep;
+	unsigned long hpte_v;
 	struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
 
 	/* XXX fix for large ptes */
@@ -390,14 +378,15 @@ static void native_flush_hash_range(unsigned long context,
 
 		native_lock_hpte(hptep);
 
-		dw0 = hptep->dw0.dw0;
+		hpte_v = hptep->v;
 
 		/* Even if we miss, we need to invalidate the TLB */
-		if ((dw0.avpn != avpn) || !dw0.v) {
+		if ((HPTE_V_AVPN_VAL(hpte_v) != avpn)
+		    || !(hpte_v & HPTE_V_VALID)) {
 			native_unlock_hpte(hptep);
 		} else {
 			/* Invalidate the hpte. NOTE: this also unlocks it */
-			hptep->dw0.dword0 = 0;
+			hptep->v = 0;
 		}
 
 		j++;
diff --git a/arch/ppc64/mm/hash_utils.c b/arch/ppc64/mm/hash_utils.c
index 1647b1c6f28e..623b5d130c31 100644
--- a/arch/ppc64/mm/hash_utils.c
+++ b/arch/ppc64/mm/hash_utils.c
@@ -75,8 +75,8 @@
 extern unsigned long dart_tablebase;
 #endif /* CONFIG_U3_DART */
 
-HPTE		*htab_address;
-unsigned long	htab_hash_mask;
+hpte_t *htab_address;
+unsigned long htab_hash_mask;
 
 extern unsigned long _SDR1;
 
@@ -97,11 +97,15 @@ static inline void create_pte_mapping(unsigned long start, unsigned long end,
 	unsigned long addr;
 	unsigned int step;
 	unsigned long tmp_mode;
+	unsigned long vflags;
 
-	if (large)
+	if (large) {
 		step = 16*MB;
-	else
+		vflags = HPTE_V_BOLTED | HPTE_V_LARGE;
+	} else {
 		step = 4*KB;
+		vflags = HPTE_V_BOLTED;
+	}
 
 	for (addr = start; addr < end; addr += step) {
 		unsigned long vpn, hash, hpteg;
@@ -129,12 +133,12 @@ static inline void create_pte_mapping(unsigned long start, unsigned long end,
 		if (systemcfg->platform & PLATFORM_LPAR)
 			ret = pSeries_lpar_hpte_insert(hpteg, va,
 				virt_to_abs(addr) >> PAGE_SHIFT,
-				0, tmp_mode, 1, large);
+				vflags, tmp_mode);
 		else
 #endif /* CONFIG_PPC_PSERIES */
 			ret = native_hpte_insert(hpteg, va,
 				virt_to_abs(addr) >> PAGE_SHIFT,
-				0, tmp_mode, 1, large);
+				vflags, tmp_mode);
 
 		if (ret == -1) {
 			ppc64_terminate_msg(0x20, "create_pte_mapping");
diff --git a/arch/ppc64/mm/hugetlbpage.c b/arch/ppc64/mm/hugetlbpage.c
index fdcfe97c75c1..f9524602818d 100644
--- a/arch/ppc64/mm/hugetlbpage.c
+++ b/arch/ppc64/mm/hugetlbpage.c
@@ -583,7 +583,7 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
 	pte_t *ptep;
 	unsigned long va, vpn;
 	pte_t old_pte, new_pte;
-	unsigned long hpteflags, prpn;
+	unsigned long rflags, prpn;
 	long slot;
 	int err = 1;
 
@@ -626,9 +626,9 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
 	old_pte = *ptep;
 	new_pte = old_pte;
 
-	hpteflags = 0x2 | (! (pte_val(new_pte) & _PAGE_RW));
+	rflags = 0x2 | (! (pte_val(new_pte) & _PAGE_RW));
  	/* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */
-	hpteflags |= ((pte_val(new_pte) & _PAGE_EXEC) ? 0 : HW_NO_EXEC);
+	rflags |= ((pte_val(new_pte) & _PAGE_EXEC) ? 0 : HW_NO_EXEC);
 
 	/* Check if pte already has an hpte (case 2) */
 	if (unlikely(pte_val(old_pte) & _PAGE_HASHPTE)) {
@@ -641,7 +641,7 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
 		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
 		slot += (pte_val(old_pte) & _PAGE_GROUP_IX) >> 12;
 
-		if (ppc_md.hpte_updatepp(slot, hpteflags, va, 1, local) == -1)
+		if (ppc_md.hpte_updatepp(slot, rflags, va, 1, local) == -1)
 			pte_val(old_pte) &= ~_PAGE_HPTEFLAGS;
 	}
 
@@ -661,10 +661,10 @@ repeat:
 
 		/* Add in WIMG bits */
 		/* XXX We should store these in the pte */
-		hpteflags |= _PAGE_COHERENT;
+		rflags |= _PAGE_COHERENT;
 
-		slot = ppc_md.hpte_insert(hpte_group, va, prpn, 0,
-					  hpteflags, 0, 1);
+		slot = ppc_md.hpte_insert(hpte_group, va, prpn,
+					  HPTE_V_LARGE, rflags);
 
 		/* Primary is full, try the secondary */
 		if (unlikely(slot == -1)) {
@@ -672,7 +672,7 @@ repeat:
 			hpte_group = ((~hash & htab_hash_mask) *
 				      HPTES_PER_GROUP) & ~0x7UL; 
 			slot = ppc_md.hpte_insert(hpte_group, va, prpn,
-						  1, hpteflags, 0, 1);
+						  HPTE_V_LARGE, rflags);
 			if (slot == -1) {
 				if (mftb() & 0x1)
 					hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
diff --git a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c
index b50b3a446dbe..e58a24d42879 100644
--- a/arch/ppc64/mm/init.c
+++ b/arch/ppc64/mm/init.c
@@ -180,9 +180,10 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags)
 		hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
 
 		/* Panic if a pte grpup is full */
-		if (ppc_md.hpte_insert(hpteg, va, pa >> PAGE_SHIFT, 0,
-				       _PAGE_NO_CACHE|_PAGE_GUARDED|PP_RWXX,
-				       1, 0) == -1) {
+		if (ppc_md.hpte_insert(hpteg, va, pa >> PAGE_SHIFT,
+				       HPTE_V_BOLTED,
+				       _PAGE_NO_CACHE|_PAGE_GUARDED|PP_RWXX)
+		    == -1) {
 			panic("map_io_page: could not insert mapping");
 		}
 	}
diff --git a/include/asm-ppc64/iSeries/HvCallHpt.h b/include/asm-ppc64/iSeries/HvCallHpt.h
index 66f38222ff75..43a1969230b8 100644
--- a/include/asm-ppc64/iSeries/HvCallHpt.h
+++ b/include/asm-ppc64/iSeries/HvCallHpt.h
@@ -77,27 +77,26 @@ static inline u64 HvCallHpt_invalidateSetSwBitsGet(u32 hpteIndex, u8 bitson,
 	return compressedStatus;
 }
 
-static inline u64 HvCallHpt_findValid(HPTE *hpte, u64 vpn)
+static inline u64 HvCallHpt_findValid(hpte_t *hpte, u64 vpn)
 {
 	return HvCall3Ret16(HvCallHptFindValid, hpte, vpn, 0, 0);
 }
 
-static inline u64 HvCallHpt_findNextValid(HPTE *hpte, u32 hpteIndex,
+static inline u64 HvCallHpt_findNextValid(hpte_t *hpte, u32 hpteIndex,
 		u8 bitson, u8 bitsoff)
 {
 	return HvCall3Ret16(HvCallHptFindNextValid, hpte, hpteIndex,
 			bitson, bitsoff);
 }
 
-static inline void HvCallHpt_get(HPTE *hpte, u32 hpteIndex)
+static inline void HvCallHpt_get(hpte_t *hpte, u32 hpteIndex)
 {
 	HvCall2Ret16(HvCallHptGet, hpte, hpteIndex, 0);
 }
 
-static inline void HvCallHpt_addValidate(u32 hpteIndex, u32 hBit, HPTE *hpte)
+static inline void HvCallHpt_addValidate(u32 hpteIndex, u32 hBit, hpte_t *hpte)
 {
-	HvCall4(HvCallHptAddValidate, hpteIndex, hBit, (*((u64 *)hpte)),
-			(*(((u64 *)hpte)+1)));
+	HvCall4(HvCallHptAddValidate, hpteIndex, hBit, hpte->v, hpte->r);
 }
 
 #endif /* _HVCALLHPT_H */
diff --git a/include/asm-ppc64/machdep.h b/include/asm-ppc64/machdep.h
index 1e6ad4824132..f0c1d2d92672 100644
--- a/include/asm-ppc64/machdep.h
+++ b/include/asm-ppc64/machdep.h
@@ -53,10 +53,8 @@ struct machdep_calls {
 	long		(*hpte_insert)(unsigned long hpte_group,
 				       unsigned long va,
 				       unsigned long prpn,
-				       int secondary, 
-				       unsigned long hpteflags, 
-				       int bolted,
-				       int large);
+				       unsigned long vflags,
+				       unsigned long rflags);
 	long		(*hpte_remove)(unsigned long hpte_group);
 	void		(*flush_hash_range)(unsigned long context,
 					    unsigned long number,
diff --git a/include/asm-ppc64/mmu.h b/include/asm-ppc64/mmu.h
index f373de5e3dd9..3d07ddd11e3b 100644
--- a/include/asm-ppc64/mmu.h
+++ b/include/asm-ppc64/mmu.h
@@ -60,6 +60,22 @@
 
 #define HPTES_PER_GROUP 8
 
+#define HPTE_V_AVPN_SHIFT	7
+#define HPTE_V_AVPN		ASM_CONST(0xffffffffffffff80)
+#define HPTE_V_AVPN_VAL(x)	(((x) & HPTE_V_AVPN) >> HPTE_V_AVPN_SHIFT)
+#define HPTE_V_BOLTED		ASM_CONST(0x0000000000000010)
+#define HPTE_V_LOCK		ASM_CONST(0x0000000000000008)
+#define HPTE_V_LARGE		ASM_CONST(0x0000000000000004)
+#define HPTE_V_SECONDARY	ASM_CONST(0x0000000000000002)
+#define HPTE_V_VALID		ASM_CONST(0x0000000000000001)
+
+#define HPTE_R_PP0		ASM_CONST(0x8000000000000000)
+#define HPTE_R_TS		ASM_CONST(0x4000000000000000)
+#define HPTE_R_RPN_SHIFT	12
+#define HPTE_R_RPN		ASM_CONST(0x3ffffffffffff000)
+#define HPTE_R_FLAGS		ASM_CONST(0x00000000000003ff)
+#define HPTE_R_PP		ASM_CONST(0x0000000000000003)
+
 /* Values for PP (assumes Ks=0, Kp=1) */
 /* pp0 will always be 0 for linux     */
 #define PP_RWXX	0	/* Supervisor read/write, User none */
@@ -69,54 +85,13 @@
 
 #ifndef __ASSEMBLY__
 
-/* Hardware Page Table Entry */
 typedef struct {
-	unsigned long avpn:57; /* vsid | api == avpn  */
-	unsigned long :     2; /* Software use */
-	unsigned long bolted: 1; /* HPTE is "bolted" */
-	unsigned long lock: 1; /* lock on pSeries SMP */
-	unsigned long l:    1; /* Virtual page is large (L=1) or 4 KB (L=0) */
-	unsigned long h:    1; /* Hash function identifier */
-	unsigned long v:    1; /* Valid (v=1) or invalid (v=0) */
-} Hpte_dword0;
+	unsigned long v;
+	unsigned long r;
+} hpte_t;
 
-typedef struct {
-	unsigned long pp0:  1; /* Page protection bit 0 */
-	unsigned long ts:   1; /* Tag set bit */
-	unsigned long rpn: 50; /* Real page number */
-	unsigned long :     2; /* Reserved */
-	unsigned long ac:   1; /* Address compare */ 
-	unsigned long r:    1; /* Referenced */
-	unsigned long c:    1; /* Changed */
-	unsigned long w:    1; /* Write-thru cache mode */
-	unsigned long i:    1; /* Cache inhibited */
-	unsigned long m:    1; /* Memory coherence required */
-	unsigned long g:    1; /* Guarded */
-	unsigned long n:    1; /* No-execute */
-	unsigned long pp:   2; /* Page protection bits 1:2 */
-} Hpte_dword1;
-
-typedef struct {
-	char padding[6];	   	/* padding */
-	unsigned long :       6;	/* padding */ 
-	unsigned long flags: 10;	/* HPTE flags */
-} Hpte_dword1_flags;
-
-typedef struct {
-	union {
-		unsigned long dword0;
-		Hpte_dword0   dw0;
-	} dw0;
-
-	union {
-		unsigned long dword1;
-		Hpte_dword1 dw1;
-		Hpte_dword1_flags flags;
-	} dw1;
-} HPTE; 
-
-extern HPTE *		htab_address;
-extern unsigned long	htab_hash_mask;
+extern hpte_t *htab_address;
+extern unsigned long htab_hash_mask;
 
 static inline unsigned long hpt_hash(unsigned long vpn, int large)
 {
@@ -181,18 +156,18 @@ static inline void tlbiel(unsigned long va)
 	asm volatile("ptesync": : :"memory");
 }
 
-static inline unsigned long slot2va(unsigned long avpn, unsigned long large,
-		unsigned long secondary, unsigned long slot)
+static inline unsigned long slot2va(unsigned long hpte_v, unsigned long slot)
 {
+	unsigned long avpn = HPTE_V_AVPN_VAL(hpte_v);
 	unsigned long va;
 
 	va = avpn << 23;
 
-	if (!large) {
+	if (! (hpte_v & HPTE_V_LARGE)) {
 		unsigned long vpi, pteg;
 
 		pteg = slot / HPTES_PER_GROUP;
-		if (secondary)
+		if (hpte_v & HPTE_V_SECONDARY)
 			pteg = ~pteg;
 
 		vpi = ((va >> 28) ^ pteg) & htab_hash_mask;
@@ -219,11 +194,11 @@ extern void hpte_init_iSeries(void);
 
 extern long pSeries_lpar_hpte_insert(unsigned long hpte_group,
 				     unsigned long va, unsigned long prpn,
-				     int secondary, unsigned long hpteflags,
-				     int bolted, int large);
+				     unsigned long vflags,
+				     unsigned long rflags);
 extern long native_hpte_insert(unsigned long hpte_group, unsigned long va,
-			       unsigned long prpn, int secondary,
-			       unsigned long hpteflags, int bolted, int large);
+			       unsigned long prpn,
+			       unsigned long vflags, unsigned long rflags);
 
 #endif /* __ASSEMBLY__ */
 

From f264cc28245d4d635d586be8c2148140726756dd Mon Sep 17 00:00:00 2001
From: Olof Johansson <olof@lixom.net>
Date: Wed, 13 Jul 2005 01:11:44 -0700
Subject: [PATCH 077/113] [PATCH] ppc64: add 970MP PVR

Add PVR value and tests for 970MP.  Also switch to a simpler (but slightly
longer) check at init time for simplicity.

Signed-off-by: Olof Johansson <olof@austin.ibm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/ppc64/kernel/cpu_setup_power4.S | 36 ++++++++++++++++------------
 arch/ppc64/kernel/cputable.c         | 15 ++++++++++++
 2 files changed, 36 insertions(+), 15 deletions(-)

diff --git a/arch/ppc64/kernel/cpu_setup_power4.S b/arch/ppc64/kernel/cpu_setup_power4.S
index 42fc08cf87a0..0482c063c26e 100644
--- a/arch/ppc64/kernel/cpu_setup_power4.S
+++ b/arch/ppc64/kernel/cpu_setup_power4.S
@@ -31,10 +31,13 @@ _GLOBAL(__970_cpu_preinit)
 	 */
 	mfspr	r0,SPRN_PVR
 	srwi	r0,r0,16
-	cmpwi	cr0,r0,0x39
-	cmpwi	cr1,r0,0x3c
-	cror	4*cr0+eq,4*cr0+eq,4*cr1+eq
+	cmpwi	r0,0x39
+	beq	1f
+	cmpwi	r0,0x3c
+	beq	1f
+	cmpwi	r0,0x44
 	bnelr
+1:
 
 	/* Make sure HID4:rm_ci is off before MMU is turned off, that large
 	 * pages are enabled with HID4:61 and clear HID5:DCBZ_size and
@@ -133,12 +136,14 @@ _GLOBAL(__save_cpu_setup)
 	/* We only deal with 970 for now */
 	mfspr	r0,SPRN_PVR
 	srwi	r0,r0,16
-	cmpwi	cr0,r0,0x39
-	cmpwi	cr1,r0,0x3c
-	cror	4*cr0+eq,4*cr0+eq,4*cr1+eq
-	bne	1f
+	cmpwi	r0,0x39
+	beq	1f
+	cmpwi	r0,0x3c
+	beq	1f
+	cmpwi	r0,0x44
+	bne	2f
 
-	/* Save HID0,1,4 and 5 */
+1:	/* Save HID0,1,4 and 5 */
 	mfspr	r3,SPRN_HID0
 	std	r3,CS_HID0(r5)
 	mfspr	r3,SPRN_HID1
@@ -148,7 +153,7 @@ _GLOBAL(__save_cpu_setup)
 	mfspr	r3,SPRN_HID5
 	std	r3,CS_HID5(r5)
 	
-1:
+2:
 	mtcr	r7
 	blr
 
@@ -165,12 +170,14 @@ _GLOBAL(__restore_cpu_setup)
 	/* We only deal with 970 for now */
 	mfspr	r0,SPRN_PVR
 	srwi	r0,r0,16
-	cmpwi	cr0,r0,0x39
-	cmpwi	cr1,r0,0x3c
-	cror	4*cr0+eq,4*cr0+eq,4*cr1+eq
-	bne	1f
+	cmpwi	r0,0x39
+	beq	1f
+	cmpwi	r0,0x3c
+	beq	1f
+	cmpwi	r0,0x44
+	bnelr
 
-	/* Before accessing memory, we make sure rm_ci is clear */
+1:	/* Before accessing memory, we make sure rm_ci is clear */
 	li	r0,0
 	mfspr	r3,SPRN_HID4
 	rldimi	r3,r0,40,23	/* clear bit 23 (rm_ci) */
@@ -223,6 +230,5 @@ _GLOBAL(__restore_cpu_setup)
 	mtspr	SPRN_HID5,r3
 	sync
 	isync
-1:
 	blr
 
diff --git a/arch/ppc64/kernel/cputable.c b/arch/ppc64/kernel/cputable.c
index 8d4c46f6f0b6..77cec42f9525 100644
--- a/arch/ppc64/kernel/cputable.c
+++ b/arch/ppc64/kernel/cputable.c
@@ -183,6 +183,21 @@ struct cpu_spec	cpu_specs[] = {
 		.cpu_setup		= __setup_cpu_ppc970,
 		.firmware_features	= COMMON_PPC64_FW,
 	},
+	{	/* PPC970MP */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x00440000,
+		.cpu_name		= "PPC970MP",
+		.cpu_features		= CPU_FTR_SPLIT_ID_CACHE |
+			CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE |
+			CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP |
+			CPU_FTR_CAN_NAP | CPU_FTR_PMC8 | CPU_FTR_MMCRA,
+		.cpu_user_features	= COMMON_USER_PPC64 |
+			PPC_FEATURE_HAS_ALTIVEC_COMP,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.cpu_setup		= __setup_cpu_ppc970,
+		.firmware_features	= COMMON_PPC64_FW,
+	},
 	{	/* Power5 */
 		.pvr_mask		= 0xffff0000,
 		.pvr_value		= 0x003a0000,

From a61caa8523a76e497f6b2a05350c892bc5ee402c Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Wed, 13 Jul 2005 12:56:42 +0200
Subject: [PATCH 078/113] [PATCH] Amiga joystick: Fix typo introduced by the
 open/close race fixes

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/input/joystick/amijoy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/input/joystick/amijoy.c b/drivers/input/joystick/amijoy.c
index 033456bb9fe0..e996183c5b06 100644
--- a/drivers/input/joystick/amijoy.c
+++ b/drivers/input/joystick/amijoy.c
@@ -105,7 +105,7 @@ out:
 
 static void amijoy_close(struct input_dev *dev)
 {
-	down(&amijoysem);
+	down(&amijoy_sem);
 	if (!--amijoy_used)
 		free_irq(IRQ_AMIGA_VERTB, amijoy_interrupt);
 	up(&amijoy_sem);

From 59a0a8aa6ab6e46926e8d12694593267c0869440 Mon Sep 17 00:00:00 2001
From: Dean Nelson <dcn@sgi.com>
Date: Wed, 13 Jul 2005 05:45:00 -0700
Subject: [PATCH 079/113] [IA64] fix call of smp_processor_id() by XPC while

XPC calls smp_processor_id() twice from xpc_setup_infrastructure() with
preemption enabled, which gets flagged if 'DEBUG_PREEMPT=y'. This patch
replaces the two calls to smp_processor_id() by a single call to
raw_smp_processor_id() since any CPU within the partition will do.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Acked-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/sn/kernel/xpc_channel.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/ia64/sn/kernel/xpc_channel.c b/arch/ia64/sn/kernel/xpc_channel.c
index 6d02dac8056f..94698bea7be0 100644
--- a/arch/ia64/sn/kernel/xpc_channel.c
+++ b/arch/ia64/sn/kernel/xpc_channel.c
@@ -72,7 +72,7 @@ xpc_initialize_channels(struct xpc_partition *part, partid_t partid)
 enum xpc_retval
 xpc_setup_infrastructure(struct xpc_partition *part)
 {
-	int ret;
+	int ret, cpuid;
 	struct timer_list *timer;
 	partid_t partid = XPC_PARTID(part);
 
@@ -223,9 +223,9 @@ xpc_setup_infrastructure(struct xpc_partition *part)
 	xpc_vars_part[partid].openclose_args_pa =
 					__pa(part->local_openclose_args);
 	xpc_vars_part[partid].IPI_amo_pa = __pa(part->local_IPI_amo_va);
-	xpc_vars_part[partid].IPI_nasid = cpuid_to_nasid(smp_processor_id());
-	xpc_vars_part[partid].IPI_phys_cpuid =
-					cpu_physical_id(smp_processor_id());
+	cpuid = raw_smp_processor_id();	/* any CPU in this partition will do */
+	xpc_vars_part[partid].IPI_nasid = cpuid_to_nasid(cpuid);
+	xpc_vars_part[partid].IPI_phys_cpuid = cpu_physical_id(cpuid);
 	xpc_vars_part[partid].nchannels = part->nchannels;
 	xpc_vars_part[partid].magic = XPC_VP_MAGIC1;
 

From f62c4a96f74d6c6dd56d1742697e94a5c2085e87 Mon Sep 17 00:00:00 2001
From: David Mosberger-Tang <David.Mosberger@acm.org>
Date: Tue, 12 Jul 2005 21:51:00 -0700
Subject: [PATCH 080/113] [IA64] Make PCDP work again.

Mark's patch added "attribute((packed))" for pcdp_uart, without
accounting for the fact that the structure definition _relied_ on
implicit padding by 6 bytes.  Fix is to make the padding explicit.

Signed-off-by: David Mosberger-Tang <David.Mosberger@acm.org>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 drivers/firmware/pcdp.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/firmware/pcdp.h b/drivers/firmware/pcdp.h
index e72cc47de33b..ce910d68bd19 100644
--- a/drivers/firmware/pcdp.h
+++ b/drivers/firmware/pcdp.h
@@ -52,6 +52,8 @@ struct pcdp_uart {
 	u32				clock_rate;
 	u8				pci_prog_intfc;
 	u8				flags;
+	u16				conout_index;
+	u32				reserved;
 } __attribute__((packed));
 
 #define PCDP_IF_PCI	1

From 1e279dd855d15b72364b4103f872d67d8592647e Mon Sep 17 00:00:00 2001
From: Miles Bader <miles@gnu.org>
Date: Wed, 13 Jul 2005 14:40:03 +0900
Subject: [PATCH 081/113] [PATCH] v850: Align ___start___param to match
 parameter alignment

Signed-off-by: Miles Bader <miles@gnu.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/v850/kernel/vmlinux.lds.S | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/v850/kernel/vmlinux.lds.S b/arch/v850/kernel/vmlinux.lds.S
index bbd3429bcffc..c366a8b326ee 100644
--- a/arch/v850/kernel/vmlinux.lds.S
+++ b/arch/v850/kernel/vmlinux.lds.S
@@ -1,8 +1,8 @@
 /*
  * arch/v850/vmlinux.lds.S -- kernel linker script for v850 platforms
  *
- *  Copyright (C) 2002,03,04  NEC Electronics Corporation
- *  Copyright (C) 2002,03,04  Miles Bader <miles@gnu.org>
+ *  Copyright (C) 2002,03,04,05  NEC Electronics Corporation
+ *  Copyright (C) 2002,03,04,05  Miles Bader <miles@gnu.org>
  *
  * This file is subject to the terms and conditions of the GNU General
  * Public License.  See the file COPYING in the main directory of this
@@ -61,6 +61,7 @@
 			*(__kcrctab_gpl)				      \
 		___stop___kcrctab_gpl = .;				      \
 		/* Built-in module parameters */			      \
+		. = ALIGN (4) ;						      \
 		___start___param = .;					      \
 		*(__param)						      \
 		___stop___param = .;

From 3952db66efee4f22f3c6a0fd02a1e7071556a8d6 Mon Sep 17 00:00:00 2001
From: Michael Krufky <mkrufky@m1k.net>
Date: Thu, 14 Jul 2005 00:33:33 -0700
Subject: [PATCH 082/113] [PATCH] dvb: LGDT3302 QAM lock bug fix

Fix QAM lock bug.  Previously, it was necessary to first scan in VSB before
attempting to get a QAM lock.

Signed-off-by: Mac Michaels <wmichaels1@earthlink.net>
Signed-off-by: Michael Krufky <mkrufky@m1k.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/media/dvb/frontends/lgdt3302.c | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/drivers/media/dvb/frontends/lgdt3302.c b/drivers/media/dvb/frontends/lgdt3302.c
index 2eea03d218cd..c85a2a99df42 100644
--- a/drivers/media/dvb/frontends/lgdt3302.c
+++ b/drivers/media/dvb/frontends/lgdt3302.c
@@ -217,13 +217,11 @@ static int lgdt3302_set_parameters(struct dvb_frontend* fe,
 	static u8 demux_ctrl_cfg[] = { DEMUX_CONTROL, 0xfb };
 	static u8 agc_rf_cfg[]     = { AGC_RF_BANDWIDTH0, 0x40, 0x93, 0x00 };
 	static u8 agc_ctrl_cfg[]   = { AGC_FUNC_CTRL2, 0xc6, 0x40 };
-	static u8 agc_delay_cfg[]  = { AGC_DELAY0, 0x00, 0x00, 0x00 };
+	static u8 agc_delay_cfg[]  = { AGC_DELAY0, 0x07, 0x00, 0xfe };
 	static u8 agc_loop_cfg[]   = { AGC_LOOP_BANDWIDTH0, 0x08, 0x9a };
 
 	/* Change only if we are actually changing the modulation */
 	if (state->current_modulation != param->u.vsb.modulation) {
-		int value;
-
 		switch(param->u.vsb.modulation) {
 		case VSB_8:
 			dprintk("%s: VSB_8 MODE\n", __FUNCTION__);
@@ -276,16 +274,8 @@ static int lgdt3302_set_parameters(struct dvb_frontend* fe,
 		   recovery center frequency register */
 		i2c_writebytes(state, state->config->demod_address,
 				       vsb_freq_cfg, sizeof(vsb_freq_cfg));
-		/* Set the value of 'INLVTHD' register 0x2a/0x2c
-		   to value from 'IFACC' register 0x39/0x3b -1 */
-		i2c_selectreadbytes(state, AGC_RFIF_ACC0,
-				    &agc_delay_cfg[1], 3);
-		value = ((agc_delay_cfg[1] & 0x0f) << 8) | agc_delay_cfg[3];
-		value = value -1;
-		dprintk("%s IFACC -1 = 0x%03x\n", __FUNCTION__, value);
-		agc_delay_cfg[1] = (value >> 8) & 0x0f;
-		agc_delay_cfg[2] = 0x00;
-		agc_delay_cfg[3] = value & 0xff;
+
+		/* Set the value of 'INLVTHD' register 0x2a/0x2c to 0x7fe */
 		i2c_writebytes(state, state->config->demod_address,
 			       agc_delay_cfg, sizeof(agc_delay_cfg));
 

From bcb01b8a67476e6f748086e626df8424cc27036d Mon Sep 17 00:00:00 2001
From: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Date: Thu, 14 Jul 2005 00:33:34 -0700
Subject: [PATCH 083/113] [PATCH] uml: fix lvalue for gcc4

      Russell King <rmk+lkml@arm.linux.org.uk>

This construct is refused by GCC 4, so here's the (corrected) fix.  Thanks to
Russell for noticing a stupid mistake I did when first sending this.

As he noted, the code is largely suboptimal however it currently works, and
will be fixed shortly.  Just read the access_ok check on fp which is NULL, or
the pointer arithmetic below which should be done with a cast to void*:

 	frame = (struct rt_sigframe __user *)
 		round_down(stack_top - sizeof(struct rt_sigframe), 16) - 8;

The code shows clearly that has been taken from
arch/x86_64/kernel/signal.c:setup_rt_frame(), maybe in a bit of a hurry.

Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Cc: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/um/sys-x86_64/signal.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/um/sys-x86_64/signal.c b/arch/um/sys-x86_64/signal.c
index 73a7926f7370..8fdaed06c10d 100644
--- a/arch/um/sys-x86_64/signal.c
+++ b/arch/um/sys-x86_64/signal.c
@@ -168,7 +168,7 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
 
 	frame = (struct rt_sigframe __user *)
 		round_down(stack_top - sizeof(struct rt_sigframe), 16) - 8;
-	((unsigned char *) frame) -= 128;
+        frame = (struct rt_sigframe *) ((unsigned long) frame - 128);
 
 	if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate)))
 		goto out;

From 1feb8d2d7396748fb3bd6795d09bbfbdd4c03dff Mon Sep 17 00:00:00 2001
From: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Date: Thu, 14 Jul 2005 00:33:36 -0700
Subject: [PATCH 084/113] [PATCH] uml: workaround host bug in "TT mode vs. NPTL
 link fix"

A big bug has been diagnosed on hosts running the SKAS patch and built with
CONFIG_REGPARM, due to some missing prevent_tail_call().

On these hosts, this workaround is needed to avoid triggering that bug,
because "to" is kept by GCC only in EBX, which is corrupted at the return of
mmap2().

Since to trigger this bug int 0x80 must be used when doing the call, it rarely
manifests itself, so I'd prefer to get this merged to workaround that host
bug, since it should cause no functional change.  Still, you might prefer to
drop it, I'll leave this to you.

Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Cc: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/um/sys-i386/unmap.c   | 2 +-
 arch/um/sys-x86_64/unmap.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/um/sys-i386/unmap.c b/arch/um/sys-i386/unmap.c
index 136875263d27..1b0ad0e4adcd 100644
--- a/arch/um/sys-i386/unmap.c
+++ b/arch/um/sys-i386/unmap.c
@@ -15,7 +15,7 @@ int switcheroo(int fd, int prot, void *from, void *to, int size)
 	if(munmap(to, size) < 0){
 		return(-1);
 	}
-	if(mmap2(to, size, prot, MAP_SHARED | MAP_FIXED, fd, 0) != to){
+	if(mmap2(to, size, prot, MAP_SHARED | MAP_FIXED, fd, 0) == (void*) -1 ){
 		return(-1);
 	}
 	if(munmap(from, size) < 0){
diff --git a/arch/um/sys-x86_64/unmap.c b/arch/um/sys-x86_64/unmap.c
index bc7094cce47e..f4a4bffd8a18 100644
--- a/arch/um/sys-x86_64/unmap.c
+++ b/arch/um/sys-x86_64/unmap.c
@@ -15,7 +15,7 @@ int switcheroo(int fd, int prot, void *from, void *to, int size)
 	if(munmap(to, size) < 0){
 		return(-1);
 	}
-	if(mmap(to, size, prot, MAP_SHARED | MAP_FIXED, fd, 0) != to){
+	if(mmap(to, size, prot, MAP_SHARED | MAP_FIXED, fd, 0) == (void*) -1){
 		return(-1);
 	}
 	if(munmap(from, size) < 0){

From 2e5e55923e315e8198f46f24f7ca37e1fd9aa102 Mon Sep 17 00:00:00 2001
From: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Date: Thu, 14 Jul 2005 00:33:37 -0700
Subject: [PATCH 085/113] [PATCH] uml: consolidate modify_ldt

*) Reorganize the two cases of sys_modify_ldt to share all the reasonably
   common code.

*) Avoid memory allocation when unneeded (i.e.  when we are writing and the
   passed buffer size is known), thus not returning ENOMEM (which isn't
   allowed for this syscall, even if there is no strict "specification").

*) Add copy_{from,to}_user to modify_ldt for TT mode.

Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Cc: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/um/sys-i386/ldt.c | 114 ++++++++++++++++++++++-------------------
 include/asm-um/ldt.h   |   5 ++
 2 files changed, 67 insertions(+), 52 deletions(-)
 create mode 100644 include/asm-um/ldt.h

diff --git a/arch/um/sys-i386/ldt.c b/arch/um/sys-i386/ldt.c
index dc755b0b9db8..bd3c34aa52e5 100644
--- a/arch/um/sys-i386/ldt.c
+++ b/arch/um/sys-i386/ldt.c
@@ -4,96 +4,106 @@
  */
 
 #include "linux/config.h"
+#include "linux/sched.h"
 #include "linux/slab.h"
+#include "linux/types.h"
 #include "asm/uaccess.h"
 #include "asm/ptrace.h"
+#include "asm/smp.h"
+#include "asm/ldt.h"
 #include "choose-mode.h"
 #include "kern.h"
+#include "mode_kern.h"
 
 #ifdef CONFIG_MODE_TT
+
 extern int modify_ldt(int func, void *ptr, unsigned long bytecount);
 
-/* XXX this needs copy_to_user and copy_from_user */
-
-int sys_modify_ldt_tt(int func, void __user *ptr, unsigned long bytecount)
+static int do_modify_ldt_tt(int func, void *ptr, unsigned long bytecount)
 {
-	if (!access_ok(VERIFY_READ, ptr, bytecount))
-		return -EFAULT;
-
 	return modify_ldt(func, ptr, bytecount);
 }
+
 #endif
 
 #ifdef CONFIG_MODE_SKAS
-extern int userspace_pid[];
 
+#include "skas.h"
 #include "skas_ptrace.h"
 
-int sys_modify_ldt_skas(int func, void __user *ptr, unsigned long bytecount)
+static int do_modify_ldt_skas(int func, void *ptr, unsigned long bytecount)
 {
 	struct ptrace_ldt ldt;
-	void *buf;
-	int res, n;
+	u32 cpu;
+	int res;
 
-	buf = kmalloc(bytecount, GFP_KERNEL);
-	if(buf == NULL)
-		return(-ENOMEM);
+	ldt = ((struct ptrace_ldt) { .func	= func,
+				     .ptr	= ptr,
+				     .bytecount = bytecount });
 
-	res = 0;
+	cpu = get_cpu();
+	res = ptrace(PTRACE_LDT, userspace_pid[cpu], 0, (unsigned long) &ldt);
+	put_cpu();
+
+	return res;
+}
+#endif
+
+int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount)
+{
+	struct user_desc info;
+	int res = 0;
+	void *buf = NULL;
+	void *p = NULL; /* What we pass to host. */
 
 	switch(func){
 	case 1:
-	case 0x11:
-		res = copy_from_user(buf, ptr, bytecount);
-		break;
-	}
+	case 0x11: /* write_ldt */
+		/* Do this check now to avoid overflows. */
+		if (bytecount != sizeof(struct user_desc)) {
+			res = -EINVAL;
+			goto out;
+		}
 
-	if(res != 0){
-		res = -EFAULT;
+		if(copy_from_user(&info, ptr, sizeof(info))) {
+			res = -EFAULT;
+			goto out;
+		}
+
+		p = &info;
+		break;
+	case 0:
+	case 2: /* read_ldt */
+
+		/* The use of info avoids kmalloc on the write case, not on the
+		 * read one. */
+		buf = kmalloc(bytecount, GFP_KERNEL);
+		if (!buf) {
+			res = -ENOMEM;
+			goto out;
+		}
+		p = buf;
+	default:
+		res = -ENOSYS;
 		goto out;
 	}
 
-	ldt = ((struct ptrace_ldt) { .func	= func,
-				     .ptr	= buf,
-				     .bytecount = bytecount });
-#warning Need to look up userspace_pid by cpu
-	res = ptrace(PTRACE_LDT, userspace_pid[0], 0, (unsigned long) &ldt);
+	res = CHOOSE_MODE_PROC(do_modify_ldt_tt, do_modify_ldt_skas, func,
+				p, bytecount);
 	if(res < 0)
 		goto out;
 
 	switch(func){
 	case 0:
 	case 2:
-		n = res;
-		res = copy_to_user(ptr, buf, n);
-		if(res != 0)
+		/* Modify_ldt was for reading and returned the number of read
+		 * bytes.*/
+		if(copy_to_user(ptr, p, res))
 			res = -EFAULT;
-		else 
-			res = n;
 		break;
 	}
 
- out:
+out:
 	kfree(buf);
-	return(res);
+	return res;
 }
-#endif
-
-int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount)
-{
-	return(CHOOSE_MODE_PROC(sys_modify_ldt_tt, sys_modify_ldt_skas, func, 
-				ptr, bytecount));
-}
-
-
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/include/asm-um/ldt.h b/include/asm-um/ldt.h
new file mode 100644
index 000000000000..e908439d338a
--- /dev/null
+++ b/include/asm-um/ldt.h
@@ -0,0 +1,5 @@
+#ifndef __UM_LDT_H
+#define __UM_LDT_H
+
+#include "asm/arch/ldt.h"
+#endif

From 1c30385ae479ec4774bdc1048726aeb15cde0d21 Mon Sep 17 00:00:00 2001
From: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Date: Thu, 14 Jul 2005 00:33:38 -0700
Subject: [PATCH 086/113] [PATCH] uml: gcc 2.95 fix and Makefile cleanup

1) Cleanup an ugly hyper-nested code in Makefile (now only the arith.
   expression is passed through the host bash).

2) Fix a problem with GCC 2.95: according to a report from Raphael Bossek,
   .remap_data : { arch/um/sys-SUBARCH/unmap_fin.o (.data .bss) } is expanded
   into: .remap_data : { arch/um/sys-i386 /unmap_fin.o (.data .bss) }

(because I didn't use ## to join the two tokens), thus stopping linking.  Pass
the whole path from the Makefile as a simple and nice fix.

Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Cc: Raphael Bossek <raphael.bossek@gmx.de>
Cc: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/um/Makefile         | 9 +++++----
 arch/um/kernel/uml.lds.S | 4 ++--
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/arch/um/Makefile b/arch/um/Makefile
index 4a375bbac109..dfcc9eaafaa7 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -116,13 +116,14 @@ CONFIG_KERNEL_STACK_ORDER ?= 2
 STACK_SIZE := $(shell echo $$[ 4096 * (1 << $(CONFIG_KERNEL_STACK_ORDER)) ] )
 
 ifndef START
-  START = $$(($(TOP_ADDR) - $(SIZE)))
+  START = $(shell echo $$[ $(TOP_ADDR) - $(SIZE) ] )
 endif
 
-CPPFLAGS_vmlinux.lds = $(shell echo -U$(SUBARCH) \
+CPPFLAGS_vmlinux.lds = -U$(SUBARCH) \
 	-DSTART=$(START) -DELF_ARCH=$(ELF_ARCH) \
-	-DELF_FORMAT=\"$(ELF_FORMAT)\" $(CPP_MODE-y) \
-	-DKERNEL_STACK_SIZE=$(STACK_SIZE) -DSUBARCH=$(SUBARCH))
+	-DELF_FORMAT="$(ELF_FORMAT)" $(CPP_MODE-y) \
+	-DKERNEL_STACK_SIZE=$(STACK_SIZE) \
+	-DUNMAP_PATH=arch/um/sys-$(SUBARCH)/unmap_fin.o
 
 #The wrappers will select whether using "malloc" or the kernel allocator.
 LINK_WRAPS = -Wl,--wrap,malloc -Wl,--wrap,free -Wl,--wrap,calloc
diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S
index 163476a8cb1b..b03326d391c9 100644
--- a/arch/um/kernel/uml.lds.S
+++ b/arch/um/kernel/uml.lds.S
@@ -16,8 +16,8 @@ SECTIONS
   __binary_start = .;
 
 #ifdef MODE_TT
-  .remap_data : { arch/um/sys-SUBARCH/unmap_fin.o (.data .bss) }
-  .remap : { arch/um/sys-SUBARCH/unmap_fin.o (.text) }
+  .remap_data : { UNMAP_PATH (.data .bss) }
+  .remap : { UNMAP_PATH (.text) }
 
   . = ALIGN(4096);		/* Init code and data */
 #endif

From 8e0a21812427313f8ee8521688e81bfcc6f207a2 Mon Sep 17 00:00:00 2001
From: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Date: Thu, 14 Jul 2005 00:33:41 -0700
Subject: [PATCH 087/113] [PATCH] uml: fix hppfs error path

Fix the error message to refer to the error code, i.e.  err, not count, plus
add some cosmetical fixes.

Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Cc: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/hppfs/hppfs_kern.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c
index 6f553e17c375..ff150fedb981 100644
--- a/fs/hppfs/hppfs_kern.c
+++ b/fs/hppfs/hppfs_kern.c
@@ -233,7 +233,7 @@ static ssize_t read_proc(struct file *file, char *buf, ssize_t count,
 		set_fs(USER_DS);
 
 	if(ppos) *ppos = file->f_pos;
-	return(n);
+	return n;
 }
 
 static ssize_t hppfs_read_file(int fd, char *buf, ssize_t count)
@@ -254,7 +254,7 @@ static ssize_t hppfs_read_file(int fd, char *buf, ssize_t count)
 		err = os_read_file(fd, new_buf, cur);
 		if(err < 0){
 			printk("hppfs_read : read failed, errno = %d\n",
-			       count);
+			       err);
 			n = err;
 			goto out_free;
 		}
@@ -271,7 +271,7 @@ static ssize_t hppfs_read_file(int fd, char *buf, ssize_t count)
  out_free:
 	kfree(new_buf);
  out:
-	return(n);
+	return n;
 }
 
 static ssize_t hppfs_read(struct file *file, char *buf, size_t count,

From ecc354a90af90a80dcaccc750f5f3e5e7c83112d Mon Sep 17 00:00:00 2001
From: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Date: Thu, 14 Jul 2005 00:33:41 -0700
Subject: [PATCH 088/113] [PATCH] uml: reintroduce pcap support

The pcap support was not working because of some linking problems (expressing
the construct in Kbuild was a bit difficult) and because there was no user
request.  Now that this has come back, here's the support.

This has been tested and works on both 32 and 64-bit hosts, even when
"cross-"building 32-bit binaries.

Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Cc: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/um/Kconfig_net      |  2 +-
 arch/um/Makefile         | 16 ++++++++++------
 arch/um/drivers/Makefile | 17 ++++++++++++++---
 3 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/arch/um/Kconfig_net b/arch/um/Kconfig_net
index 1c2f9a70d91d..fa2ab2dd78b7 100644
--- a/arch/um/Kconfig_net
+++ b/arch/um/Kconfig_net
@@ -135,7 +135,7 @@ config UML_NET_MCAST
 
 config UML_NET_PCAP
 	bool "pcap transport"
-	depends on UML_NET && BROKEN
+	depends on UML_NET && EXPERIMENTAL
 	help
 	The pcap transport makes a pcap packet stream on the host look
 	like an ethernet device inside UML.  This is useful for making 
diff --git a/arch/um/Makefile b/arch/um/Makefile
index dfcc9eaafaa7..6a7a9f43cdef 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -56,17 +56,21 @@ include $(srctree)/$(ARCH_DIR)/Makefile-$(SUBARCH)
 core-y += $(SUBARCH_CORE)
 libs-y += $(SUBARCH_LIBS)
 
+# -Dvmap=kernel_vmap affects everything, and prevents anything from
+# referencing the libpcap.o symbol so named.
+
+CFLAGS += $(CFLAGS-y) -D__arch_um__ -DSUBARCH=\"$(SUBARCH)\" \
+	$(ARCH_INCLUDE) $(MODE_INCLUDE) -Dvmap=kernel_vmap
+
+USER_CFLAGS := $(patsubst -I%,,$(CFLAGS))
+USER_CFLAGS := $(patsubst -D__KERNEL__,,$(USER_CFLAGS)) $(ARCH_INCLUDE) \
+	$(MODE_INCLUDE) $(ARCH_USER_CFLAGS)
+
 # -Derrno=kernel_errno - This turns all kernel references to errno into
 # kernel_errno to separate them from the libc errno.  This allows -fno-common
 # in CFLAGS.  Otherwise, it would cause ld to complain about the two different
 # errnos.
 
-CFLAGS += $(CFLAGS-y) -D__arch_um__ -DSUBARCH=\"$(SUBARCH)\" \
-	$(ARCH_INCLUDE) $(MODE_INCLUDE)
-
-USER_CFLAGS := $(patsubst -I%,,$(CFLAGS))
-USER_CFLAGS := $(patsubst -D__KERNEL__,,$(USER_CFLAGS)) $(ARCH_INCLUDE) \
-	$(MODE_INCLUDE) $(ARCH_USER_CFLAGS)
 CFLAGS += -Derrno=kernel_errno -Dsigprocmask=kernel_sigprocmask
 CFLAGS += $(call cc-option,-fno-unit-at-a-time,)
 
diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile
index b2de9916c32c..d6c31a95b887 100644
--- a/arch/um/drivers/Makefile
+++ b/arch/um/drivers/Makefile
@@ -10,7 +10,6 @@ slip-objs := slip_kern.o slip_user.o
 slirp-objs := slirp_kern.o slirp_user.o
 daemon-objs := daemon_kern.o daemon_user.o
 mcast-objs := mcast_kern.o mcast_user.o
-#pcap-objs := pcap_kern.o pcap_user.o $(PCAP)
 net-objs := net_kern.o net_user.o
 mconsole-objs := mconsole_kern.o mconsole_user.o
 hostaudio-objs := hostaudio_kern.o
@@ -18,6 +17,17 @@ ubd-objs := ubd_kern.o ubd_user.o
 port-objs := port_kern.o port_user.o
 harddog-objs := harddog_kern.o harddog_user.o
 
+LDFLAGS_pcap.o := -r $(shell $(CC) $(CFLAGS) -print-file-name=libpcap.a)
+
+$(obj)/pcap.o: $(obj)/pcap_kern.o $(obj)/pcap_user.o
+	$(LD) -r -dp -o $@ $^ $(LDFLAGS) $(LDFLAGS_pcap.o)
+#XXX: The call below does not work because the flags are added before the
+# object name, so nothing from the library gets linked.
+#$(call if_changed,ld)
+
+# When the above is fixed, don't forget to add this too!
+#targets := $(obj)/pcap.o
+
 obj-y := stdio_console.o fd.o chan_kern.o chan_user.o line.o
 obj-$(CONFIG_SSL) += ssl.o
 obj-$(CONFIG_STDERR_CONSOLE) += stderr_console.o
@@ -26,7 +36,7 @@ obj-$(CONFIG_UML_NET_SLIP) += slip.o slip_common.o
 obj-$(CONFIG_UML_NET_SLIRP) += slirp.o slip_common.o
 obj-$(CONFIG_UML_NET_DAEMON) += daemon.o 
 obj-$(CONFIG_UML_NET_MCAST) += mcast.o 
-#obj-$(CONFIG_UML_NET_PCAP) += pcap.o $(PCAP)
+obj-$(CONFIG_UML_NET_PCAP) += pcap.o
 obj-$(CONFIG_UML_NET) += net.o 
 obj-$(CONFIG_MCONSOLE) += mconsole.o
 obj-$(CONFIG_MMAPPER) += mmapper_kern.o 
@@ -41,6 +51,7 @@ obj-$(CONFIG_UML_WATCHDOG) += harddog.o
 obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o
 obj-$(CONFIG_UML_RANDOM) += random.o
 
-USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o
+# pcap_user.o must be added explicitly.
+USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o
 
 include arch/um/scripts/Makefile.rules

From 20d0021394c1b070bf04b22c5bc8fdb437edd4c5 Mon Sep 17 00:00:00 2001
From: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Date: Thu, 14 Jul 2005 00:33:43 -0700
Subject: [PATCH 089/113] [PATCH] uml: allow building as 32-bit binary on 64bit
 host

This patch makes the command:

make ARCH=um SUBARCH=i386

work on x86_64 hosts (with support for building 32-bit binaries).  This is
especially needed since 64-bit UMLs don't support 32-bit emulation for guest
binaries, currently.  This has been tested in all possible cases and works.

Only exception is that I've built but not tested a 64-bit binary, because I
hadn't a 64-bit filesystem available.

Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Cc: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/um/Makefile               | 11 ++++-------
 arch/um/Makefile-i386          | 31 +++++++++++++++++++++----------
 arch/um/Makefile-x86_64        |  6 ++++--
 arch/um/scripts/Makefile.unmap |  4 ++--
 4 files changed, 31 insertions(+), 21 deletions(-)

diff --git a/arch/um/Makefile b/arch/um/Makefile
index 6a7a9f43cdef..eb4ac403bd93 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -51,11 +51,6 @@ MRPROPER_DIRS	+= $(ARCH_DIR)/include2
 endif
 SYS_DIR		:= $(ARCH_DIR)/include/sysdep-$(SUBARCH)
 
-include $(srctree)/$(ARCH_DIR)/Makefile-$(SUBARCH)
-
-core-y += $(SUBARCH_CORE)
-libs-y += $(SUBARCH_LIBS)
-
 # -Dvmap=kernel_vmap affects everything, and prevents anything from
 # referencing the libpcap.o symbol so named.
 
@@ -64,7 +59,7 @@ CFLAGS += $(CFLAGS-y) -D__arch_um__ -DSUBARCH=\"$(SUBARCH)\" \
 
 USER_CFLAGS := $(patsubst -I%,,$(CFLAGS))
 USER_CFLAGS := $(patsubst -D__KERNEL__,,$(USER_CFLAGS)) $(ARCH_INCLUDE) \
-	$(MODE_INCLUDE) $(ARCH_USER_CFLAGS)
+	$(MODE_INCLUDE)
 
 # -Derrno=kernel_errno - This turns all kernel references to errno into
 # kernel_errno to separate them from the libc errno.  This allows -fno-common
@@ -74,6 +69,8 @@ USER_CFLAGS := $(patsubst -D__KERNEL__,,$(USER_CFLAGS)) $(ARCH_INCLUDE) \
 CFLAGS += -Derrno=kernel_errno -Dsigprocmask=kernel_sigprocmask
 CFLAGS += $(call cc-option,-fno-unit-at-a-time,)
 
+include $(srctree)/$(ARCH_DIR)/Makefile-$(SUBARCH)
+
 #This will adjust *FLAGS accordingly to the platform.
 include $(srctree)/$(ARCH_DIR)/Makefile-os-$(OS)
 
@@ -132,7 +129,7 @@ CPPFLAGS_vmlinux.lds = -U$(SUBARCH) \
 #The wrappers will select whether using "malloc" or the kernel allocator.
 LINK_WRAPS = -Wl,--wrap,malloc -Wl,--wrap,free -Wl,--wrap,calloc
 
-CFLAGS_vmlinux = $(LINK-y) $(LINK_WRAPS)
+CFLAGS_vmlinux := $(LINK-y) $(LINK_WRAPS)
 define cmd_vmlinux__
 	$(CC) $(CFLAGS_vmlinux) -o $@ \
 	-Wl,-T,$(vmlinux-lds) $(vmlinux-init) \
diff --git a/arch/um/Makefile-i386 b/arch/um/Makefile-i386
index 301059062a3e..93d0818fa816 100644
--- a/arch/um/Makefile-i386
+++ b/arch/um/Makefile-i386
@@ -1,4 +1,4 @@
-SUBARCH_CORE := arch/um/sys-i386/ arch/i386/crypto/
+core-y += arch/um/sys-i386/ arch/i386/crypto/
 
 TOP_ADDR := $(CONFIG_TOP_ADDR)
 
@@ -8,21 +8,32 @@ ifeq ($(CONFIG_MODE_SKAS),y)
   endif
 endif
 
+LDFLAGS			+= -m elf_i386
+ELF_ARCH		:= $(SUBARCH)
+ELF_FORMAT 		:= elf32-$(SUBARCH)
+OBJCOPYFLAGS  		:= -O binary -R .note -R .comment -S
+
+ifeq ("$(origin SUBARCH)", "command line")
+ifneq ("$(shell uname -m | sed -e s/i.86/i386/)", "$(SUBARCH)")
+CFLAGS			+= $(call cc-option,-m32)
+USER_CFLAGS		+= $(call cc-option,-m32)
+HOSTCFLAGS		+= $(call cc-option,-m32)
+HOSTLDFLAGS		+= $(call cc-option,-m32)
+AFLAGS			+= $(call cc-option,-m32)
+LINK-y			+= $(call cc-option,-m32)
+UML_OBJCOPYFLAGS	+= -F $(ELF_FORMAT)
+
+export LDFLAGS HOSTCFLAGS HOSTLDFLAGS UML_OBJCOPYFLAGS
+endif
+endif
+
 CFLAGS += -U__$(SUBARCH)__ -U$(SUBARCH) $(STUB_CFLAGS)
-ARCH_USER_CFLAGS :=
 
 ifneq ($(CONFIG_GPROF),y)
 ARCH_CFLAGS += -DUM_FASTCALL
 endif
 
-ELF_ARCH := $(SUBARCH)
-ELF_FORMAT := elf32-$(SUBARCH)
-
-OBJCOPYFLAGS  := -O binary -R .note -R .comment -S
-
-SYS_UTIL_DIR	:= $(ARCH_DIR)/sys-i386/util
-
-SYS_HEADERS := $(SYS_DIR)/sc.h $(SYS_DIR)/thread.h
+SYS_HEADERS	:= $(SYS_DIR)/sc.h $(SYS_DIR)/thread.h
 
 prepare: $(SYS_HEADERS)
 
diff --git a/arch/um/Makefile-x86_64 b/arch/um/Makefile-x86_64
index d80bd0052e6b..aa2f7174ebca 100644
--- a/arch/um/Makefile-x86_64
+++ b/arch/um/Makefile-x86_64
@@ -1,11 +1,13 @@
 # Copyright 2003 - 2004 Pathscale, Inc
 # Released under the GPL
 
-SUBARCH_LIBS := arch/um/sys-x86_64/
+libs-y += arch/um/sys-x86_64/
 START := 0x60000000
 
+#We #undef __x86_64__ for kernelspace, not for userspace where
+#it's needed for headers to work!
 CFLAGS += -U__$(SUBARCH)__ -fno-builtin $(STUB_CFLAGS)
-ARCH_USER_CFLAGS := -D__x86_64__
+USER_CFLAGS += -fno-builtin
 
 ELF_ARCH := i386:x86-64
 ELF_FORMAT := elf64-x86-64
diff --git a/arch/um/scripts/Makefile.unmap b/arch/um/scripts/Makefile.unmap
index 37a8f9765295..802d027a1e13 100644
--- a/arch/um/scripts/Makefile.unmap
+++ b/arch/um/scripts/Makefile.unmap
@@ -12,8 +12,8 @@ $(obj)/unmap.o: _c_flags = $(call unprofile,$(CFLAGS))
 
 quiet_cmd_wrapld = LD      $@
 define cmd_wrapld
-	$(LD) -r -o $(obj)/unmap_tmp.o $< $(shell $(CC) -print-file-name=libc.a); \
-	$(OBJCOPY) $(obj)/unmap_tmp.o $@ -G switcheroo
+	$(LD) $(LDFLAGS) -r -o $(obj)/unmap_tmp.o $< $(shell $(CC) $(CFLAGS) -print-file-name=libc.a); \
+	$(OBJCOPY) $(UML_OBJCOPYFLAGS) $(obj)/unmap_tmp.o $@ -G switcheroo
 endef
 
 $(obj)/unmap_fin.o : $(obj)/unmap.o FORCE

From a0d43df931870997467042931e8fb7657375d327 Mon Sep 17 00:00:00 2001
From: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Date: Thu, 14 Jul 2005 00:33:43 -0700
Subject: [PATCH 090/113] [PATCH] uml: hostfs: unuse ROOT_DEV

Minimal patch removing uses of ROOT_DEV; next patch unexports it.  I've
opposed this, but I've planned to reintroduce the functionality without using
ROOT_DEV.

Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/hostfs/hostfs_kern.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 4bf43ea87c46..88e68caa3784 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -15,7 +15,6 @@
 #include <linux/pagemap.h>
 #include <linux/blkdev.h>
 #include <linux/list.h>
-#include <linux/root_dev.h>
 #include <linux/statfs.h>
 #include <linux/kdev_t.h>
 #include <asm/uaccess.h>
@@ -160,8 +159,6 @@ static int read_name(struct inode *ino, char *name)
 	ino->i_size = i_size;
 	ino->i_blksize = i_blksize;
 	ino->i_blocks = i_blocks;
-	if((ino->i_sb->s_dev == ROOT_DEV) && (ino->i_uid == getuid()))
-		ino->i_uid = 0;
 	return(0);
 }
 
@@ -841,16 +838,10 @@ int hostfs_setattr(struct dentry *dentry, struct iattr *attr)
 		attrs.ia_mode = attr->ia_mode;
 	}
 	if(attr->ia_valid & ATTR_UID){
-		if((dentry->d_inode->i_sb->s_dev == ROOT_DEV) &&
-		   (attr->ia_uid == 0))
-			attr->ia_uid = getuid();
 		attrs.ia_valid |= HOSTFS_ATTR_UID;
 		attrs.ia_uid = attr->ia_uid;
 	}
 	if(attr->ia_valid & ATTR_GID){
-		if((dentry->d_inode->i_sb->s_dev == ROOT_DEV) &&
-		   (attr->ia_gid == 0))
-			attr->ia_gid = getgid();
 		attrs.ia_valid |= HOSTFS_ATTR_GID;
 		attrs.ia_gid = attr->ia_gid;
 	}

From a2d2b5cb8e5d8787f009eeb3bbca52687bd42a77 Mon Sep 17 00:00:00 2001
From: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Date: Thu, 14 Jul 2005 00:33:44 -0700
Subject: [PATCH 091/113] [PATCH] remove EXPORT_SYMBOL for root_dev

Remove ROOT_DEV after unexporting it in the previous patch, as requested time
ago by Christoph Hellwig.

Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 init/do_mounts.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/init/do_mounts.c b/init/do_mounts.c
index 1b02be734ccc..4e11a9aaf14a 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -26,8 +26,6 @@ static char __initdata saved_root_name[64];
 /* this is initialized in init/main.c */
 dev_t ROOT_DEV;
 
-EXPORT_SYMBOL(ROOT_DEV);
-
 static int __init load_ramdisk(char *str)
 {
 	rd_doload = simple_strtol(str,NULL,0) & 3;

From 98848fa83bb6a4572ad55daeb312182dec9bed58 Mon Sep 17 00:00:00 2001
From: Olaf Hering <olh@suse.de>
Date: Thu, 14 Jul 2005 00:33:45 -0700
Subject: [PATCH 092/113] [PATCH] ratelimit the ieee1394 IR legacy activated
 messages

running coriander1 with an Apple iSight produces lots of dmesg output.

Jul 13 22:14:17 ibook kernel: ieee1394: raw1394: /dev/raw1394 device initialized
Jul 13 22:15:28 ibook kernel: ohci1394: fw-host0: IR legacy activated
Jul 13 22:15:59 ibook last message repeated 208 times
Jul 13 22:17:00 ibook last message repeated 762 times
Jul 13 22:18:01 ibook last message repeated 914 times
Jul 13 22:18:17 ibook last message repeated 238 times
Jul 13 22:18:17 ibook kernel: ieee1394: unsolicited response packet received - no tlabel match
Jul 13 22:18:17 ibook kernel: ohci1394: fw-host0: IR legacy activated

its less noisy with the patch:

Jul 14 08:03:08 ibook kernel: ieee1394: raw1394: /dev/raw1394 device initialized
Jul 14 08:03:26 ibook kernel: ohci1394: fw-host0: IR legacy activated
Jul 14 08:03:42 ibook last message repeated 10 times
Jul 14 08:03:47 ibook kernel: printk: 63 messages suppressed.
Jul 14 08:03:47 ibook kernel: ohci1394: fw-host0: IR legacy activated
Jul 14 08:03:52 ibook kernel: printk: 74 messages suppressed.

Signed-off-by: Olaf Hering <olh@suse.de>
Cc: Jody McIntyre <scjody@modernduck.com>
Cc: Ben Collins <bcollins@debian.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/ieee1394/ohci1394.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/ieee1394/ohci1394.c b/drivers/ieee1394/ohci1394.c
index a485f47bb21e..b12a970cc9a3 100644
--- a/drivers/ieee1394/ohci1394.c
+++ b/drivers/ieee1394/ohci1394.c
@@ -1084,7 +1084,8 @@ static int ohci_devctl(struct hpsb_host *host, enum devctl_cmd cmd, int arg)
 
 			initialize_dma_rcv_ctx(&ohci->ir_legacy_context, 1);
 
-			PRINT(KERN_ERR, "IR legacy activated");
+			if (printk_ratelimit())
+				PRINT(KERN_ERR, "IR legacy activated");
 		}
 
                 spin_lock_irqsave(&ohci->IR_channel_lock, flags);

From ba460e48064edeb57e3398eb8972c58de33f11ea Mon Sep 17 00:00:00 2001
From: Matthias Urlichs <smurf@smurf.noris.de>
Date: Thu, 14 Jul 2005 00:33:47 -0700
Subject: [PATCH 093/113] [PATCH] Option Card driver update, Maintainer entry

This patch updates the Option Card driver:
- remove a deadlock
- add sponsor notice
- add new card
- renamed the device to what's usually printed on it
- removed some dead code
- clean up a bunch of irregular whitespace (end-of-line, tabs)

Also add a MAINTAINERS entry for the Option Card driver.

Signed-Off-By: Matthias Urlichs <smurf@smurf.noris.de>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 MAINTAINERS                 |   6 +
 drivers/usb/serial/option.c | 226 ++++++++++++++++++------------------
 2 files changed, 117 insertions(+), 115 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 5d014725901c..f73381af4d5e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2420,6 +2420,12 @@ L:	linux-usb-users@lists.sourceforge.net
 L:	linux-usb-devel@lists.sourceforge.net
 S:	Maintained
 
+USB OPTION-CARD DRIVER
+P:	Matthias Urlichs
+M:	smurf@smurf.noris.de
+L:	linux-usb-devel@lists.sourceforge.net
+S:	Maintained
+
 USB OV511 DRIVER
 P:	Mark McClelland
 M:	mmcclell@bigfoot.com
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index b722175f108f..e9256408757f 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -12,14 +12,25 @@
   History:
 
   2005-05-19  v0.1   Initial version, based on incomplete docs
-                     and analysis of misbehavior of the standard driver
+                     and analysis of misbehavior with the standard driver
   2005-05-20  v0.2   Extended the input buffer to avoid losing
                      random 64-byte chunks of data
   2005-05-21  v0.3   implemented chars_in_buffer()
                      turned on low_latency
                      simplified the code somewhat
+  2005-05-24  v0.4   option_write() sometimes deadlocked under heavy load
+                     removed some dead code
+                     added sponsor notice
+                     coding style clean-up
+  2005-06-20  v0.4.1 add missing braces :-/
+                     killed end-of-line whitespace
+  2005-07-15  v0.4.2 rename WLAN product to FUSION, add FUSION2
+
+  Work sponsored by: Sigos GmbH, Germany <info@sigos.de>
+
 */
-#define DRIVER_VERSION "v0.3"
+
+#define DRIVER_VERSION "v0.4"
 #define DRIVER_AUTHOR "Matthias Urlichs <smurf@smurf.noris.de>"
 #define DRIVER_DESC "Option Card (PC-Card to) USB to Serial Driver"
 
@@ -44,7 +55,6 @@ static int  option_write_room (struct usb_serial_port *port);
 
 static void option_instat_callback(struct urb *urb, struct pt_regs *regs);
 
-
 static int  option_write (struct usb_serial_port *port,
                           const unsigned char *buf, int count);
 
@@ -60,14 +70,17 @@ static int  option_tiocmset (struct usb_serial_port *port, struct file *file,
 static int  option_send_setup (struct usb_serial_port *port);
 
 /* Vendor and product IDs */
-#define OPTION_VENDOR_ID		0x0AF0
+#define OPTION_VENDOR_ID			0x0AF0
+
+#define OPTION_PRODUCT_OLD		0x5000
+#define OPTION_PRODUCT_FUSION	0x6000
+#define OPTION_PRODUCT_FUSION2	0x6300
 
-#define	OPTION_PRODUCT_OLD		0x5000
-#define	OPTION_PRODUCT_WLAN		0x6000
 
 static struct usb_device_id option_ids[] = {
 	{ USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_OLD) },
-	{ USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_WLAN) },
+	{ USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_FUSION) },
+	{ USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_FUSION2) },
 	{ } /* Terminating entry */
 };
 
@@ -85,58 +98,62 @@ static struct usb_driver option_driver = {
  * recognizes separately, thus num_port=1.
  */
 static struct usb_serial_device_type option_3port_device = {
-	.owner			= THIS_MODULE,
-	.name			= "Option 3-port card",
-	.short_name		= "option",
-	.id_table		= option_ids,
-	.num_interrupt_in	= NUM_DONT_CARE,
-	.num_bulk_in		= NUM_DONT_CARE,
-	.num_bulk_out		= NUM_DONT_CARE,
-	.num_ports		= 1, /* 3 */
-	.open			= option_open,
-	.close			= option_close,
-	.write			= option_write,
-	.write_room		= option_write_room,
-	.chars_in_buffer	= option_chars_in_buffer,
-	.throttle		= option_rx_throttle,
-	.unthrottle		= option_rx_unthrottle,
-	.ioctl			= option_ioctl,
-	.set_termios		= option_set_termios,
-	.break_ctl		= option_break_ctl,
-	.tiocmget		= option_tiocmget,
-	.tiocmset		= option_tiocmset,
-	.attach			= option_startup,
-	.shutdown		= option_shutdown,
-	.read_int_callback	= option_instat_callback,
+	.owner             = THIS_MODULE,
+	.name              = "Option 3G data card",
+	.short_name        = "option",
+	.id_table          = option_ids,
+	.num_interrupt_in  = NUM_DONT_CARE,
+	.num_bulk_in       = NUM_DONT_CARE,
+	.num_bulk_out      = NUM_DONT_CARE,
+	.num_ports         = 1, /* 3, but the card reports its ports separately */
+	.open              = option_open,
+	.close             = option_close,
+	.write             = option_write,
+	.write_room        = option_write_room,
+	.chars_in_buffer   = option_chars_in_buffer,
+	.throttle          = option_rx_throttle,
+	.unthrottle        = option_rx_unthrottle,
+	.ioctl             = option_ioctl,
+	.set_termios       = option_set_termios,
+	.break_ctl         = option_break_ctl,
+	.tiocmget          = option_tiocmget,
+	.tiocmset          = option_tiocmset,
+	.attach            = option_startup,
+	.shutdown          = option_shutdown,
+	.read_int_callback = option_instat_callback,
 };
 
+#ifdef CONFIG_USB_DEBUG
 static int debug;
+#else
+#define debug 0
+#endif
+
 
 /* per port private data */
 
-#define N_IN_URB	4
-#define N_OUT_URB	1
-#define IN_BUFLEN	1024
-#define OUT_BUFLEN	1024
+#define N_IN_URB 4
+#define N_OUT_URB 1
+#define IN_BUFLEN 1024
+#define OUT_BUFLEN 128
 
 struct option_port_private {
 	/* Input endpoints and buffer for this port */
-	struct urb	*in_urbs[N_IN_URB];
-	char		in_buffer[N_IN_URB][IN_BUFLEN];
+	struct urb *in_urbs[N_IN_URB];
+	char in_buffer[N_IN_URB][IN_BUFLEN];
 	/* Output endpoints and buffer for this port */
-	struct urb	*out_urbs[N_OUT_URB];
-	char		out_buffer[N_OUT_URB][OUT_BUFLEN];
+	struct urb *out_urbs[N_OUT_URB];
+	char out_buffer[N_OUT_URB][OUT_BUFLEN];
 
 	/* Settings for the port */
-	int		rts_state;	/* Handshaking pins (outputs) */
-	int		dtr_state;
-	int		cts_state;	/* Handshaking pins (inputs) */
-	int		dsr_state;
-	int		dcd_state;
-	int		ri_state;
-	// int		break_on;
+	int rts_state;	/* Handshaking pins (outputs) */
+	int dtr_state;
+	int cts_state;	/* Handshaking pins (inputs) */
+	int dsr_state;
+	int dcd_state;
+	int ri_state;
 
-	unsigned long	tx_start_time[N_OUT_URB];
+	unsigned long tx_start_time[N_OUT_URB];
 };
 
 
@@ -190,13 +207,13 @@ static void
 option_break_ctl (struct usb_serial_port *port, int break_state)
 {
 	/* Unfortunately, I don't know how to send a break */
- 	dbg("%s", __FUNCTION__);
+	dbg("%s", __FUNCTION__);
 }
 
 
 static void
 option_set_termios (struct usb_serial_port *port,
-				     struct termios *old_termios)
+                    struct termios *old_termios)
 {
 	dbg("%s", __FUNCTION__);
 
@@ -204,10 +221,10 @@ option_set_termios (struct usb_serial_port *port,
 }
 
 static int
-option_tiocmget(struct usb_serial_port *port, struct file *file)
+option_tiocmget (struct usb_serial_port *port, struct file *file)
 {
-	unsigned int			value;
-	struct option_port_private 	*portdata;
+	unsigned int value;
+	struct option_port_private *portdata;
 
 	portdata = usb_get_serial_port_data(port);
 
@@ -225,7 +242,7 @@ static int
 option_tiocmset (struct usb_serial_port *port, struct file *file,
                  unsigned int set, unsigned int clear)
 {
-	struct option_port_private 	*portdata;
+	struct option_port_private *portdata;
 
 	portdata = usb_get_serial_port_data(port);
 
@@ -250,71 +267,50 @@ option_ioctl (struct usb_serial_port *port, struct file *file,
 
 /* Write */
 static int
-option_write(struct usb_serial_port *port,
-			 const unsigned char *buf, int count)
+option_write (struct usb_serial_port *port,
+			  const unsigned char *buf, int count)
 {
-	struct option_port_private 	*portdata;
-	int				i;
-	int 				left, todo;
-	struct urb			*this_urb = NULL; /* spurious */
- 	int 				err;
+	struct option_port_private *portdata;
+	int i;
+	int left, todo;
+	struct urb *this_urb = NULL; /* spurious */
+	int err;
 
 	portdata = usb_get_serial_port_data(port);
 
 	dbg("%s: write (%d chars)", __FUNCTION__, count);
 
-#if 0
-	spin_lock(&port->lock);
-	if (port->write_urb_busy) {
-		spin_unlock(&port->lock);
-		dbg("%s: already writing", __FUNCTION__);
-		return 0;
-	}
-	port->write_urb_busy = 1;
-	spin_unlock(&port->lock);
-#endif
-
 	i = 0;
 	left = count;
-	while (left>0) {
+	for (i=0; left > 0 && i < N_OUT_URB; i++) {
 		todo = left;
 		if (todo > OUT_BUFLEN)
 			todo = OUT_BUFLEN;
 
-		for (;i < N_OUT_URB; i++) {
-			/* Check we have a valid urb/endpoint before we use it... */
-			this_urb = portdata->out_urbs[i];
-			if (this_urb->status != -EINPROGRESS)
-				break;
+		this_urb = portdata->out_urbs[i];
+		if (this_urb->status == -EINPROGRESS) {
 			if (this_urb->transfer_flags & URB_ASYNC_UNLINK)
 				continue;
 			if (time_before(jiffies, portdata->tx_start_time[i] + 10 * HZ))
 				continue;
 			this_urb->transfer_flags |= URB_ASYNC_UNLINK;
 			usb_unlink_urb(this_urb);
+			continue;
 		}
-
-		if (i == N_OUT_URB) {
-			/* no bulk out free! */
-			dbg("%s: no output urb -- left %d", __FUNCTION__,count-left);
-#if 0
-			port->write_urb_busy = 0;
-#endif
-			return count-left;
-		}
+		if (this_urb->status != 0)
+			dbg("usb_write %p failed (err=%d)", this_urb, this_urb->status);
 
 		dbg("%s: endpoint %d buf %d", __FUNCTION__, usb_pipeendpoint(this_urb->pipe), i);
 
+		/* send the data */
 		memcpy (this_urb->transfer_buffer, buf, todo);
-
-		/* send the data out the bulk port */
 		this_urb->transfer_buffer_length = todo;
 
 		this_urb->transfer_flags &= ~URB_ASYNC_UNLINK;
 		this_urb->dev = port->serial->dev;
 		err = usb_submit_urb(this_urb, GFP_ATOMIC);
 		if (err) {
-			dbg("usb_submit_urb %p (write bulk) failed (%d,, has %d)", this_urb, err, this_urb->status);
+			dbg("usb_submit_urb %p (write bulk) failed (%d, has %d)", this_urb, err, this_urb->status);
 			continue;
 		}
 		portdata->tx_start_time[i] = jiffies;
@@ -323,9 +319,6 @@ option_write(struct usb_serial_port *port,
 	}
 
 	count -= left;
-#if 0
-	port->write_urb_busy = 0;
-#endif
 	dbg("%s: wrote (did %d)", __FUNCTION__, count);
 	return count;
 }
@@ -333,7 +326,7 @@ option_write(struct usb_serial_port *port,
 static void
 option_indat_callback (struct urb *urb, struct pt_regs *regs)
 {
-	int	i, err;
+	int i, err;
 	int endpoint;
 	struct usb_serial_port *port;
 	struct tty_struct *tty;
@@ -444,10 +437,11 @@ option_write_room (struct usb_serial_port *port)
 
 	portdata = usb_get_serial_port_data(port);
 
-	for (i=0; i < N_OUT_URB; i++)
+	for (i=0; i < N_OUT_URB; i++) {
 		this_urb = portdata->out_urbs[i];
 		if (this_urb && this_urb->status != -EINPROGRESS)
 			data_len += OUT_BUFLEN;
+	}
 
 	dbg("%s: %d", __FUNCTION__, data_len);
 	return data_len;
@@ -464,11 +458,11 @@ option_chars_in_buffer (struct usb_serial_port *port)
 
 	portdata = usb_get_serial_port_data(port);
 
-	for (i=0; i < N_OUT_URB; i++)
+	for (i=0; i < N_OUT_URB; i++) {
 		this_urb = portdata->out_urbs[i];
 		if (this_urb && this_urb->status == -EINPROGRESS)
 			data_len += this_urb->transfer_buffer_length;
-
+	}
 	dbg("%s: %d", __FUNCTION__, data_len);
 	return data_len;
 }
@@ -477,10 +471,10 @@ option_chars_in_buffer (struct usb_serial_port *port)
 static int
 option_open (struct usb_serial_port *port, struct file *filp)
 {
-	struct option_port_private 	*portdata;
-	struct usb_serial 		*serial = port->serial;
-	int				i, err;
-	struct urb			*urb;
+	struct option_port_private *portdata;
+	struct usb_serial *serial = port->serial;
+	int i, err;
+	struct urb *urb;
 
 	portdata = usb_get_serial_port_data(port);
 
@@ -528,7 +522,7 @@ option_open (struct usb_serial_port *port, struct file *filp)
 }
 
 static inline void
-stop_urb(struct urb *urb)
+stop_urb (struct urb *urb)
 {
 	if (urb && urb->status == -EINPROGRESS) {
 		urb->transfer_flags &= ~URB_ASYNC_UNLINK;
@@ -537,11 +531,11 @@ stop_urb(struct urb *urb)
 }
 
 static void
-option_close(struct usb_serial_port *port, struct file *filp)
+option_close (struct usb_serial_port *port, struct file *filp)
 {
-	int			i;
-	struct usb_serial	*serial = port->serial;
-	struct option_port_private 	*portdata;
+	int i;
+	struct usb_serial *serial = port->serial;
+	struct option_port_private *portdata;
 
 	dbg("%s", __FUNCTION__);
 	portdata = usb_get_serial_port_data(port);
@@ -589,11 +583,11 @@ option_setup_urb (struct usb_serial *serial, int endpoint,
 
 /* Setup urbs */
 static void
-option_setup_urbs(struct usb_serial *serial)
+option_setup_urbs (struct usb_serial *serial)
 {
-	int				j;
-	struct usb_serial_port		*port;
-	struct option_port_private	*portdata;
+	int j;
+	struct usb_serial_port *port;
+	struct option_port_private *portdata;
 
 	dbg("%s", __FUNCTION__);
 
@@ -617,7 +611,7 @@ option_setup_urbs(struct usb_serial *serial)
 
 
 static int
-option_send_setup(struct usb_serial_port *port)
+option_send_setup (struct usb_serial_port *port)
 {
 	struct usb_serial *serial = port->serial;
 	struct option_port_private *portdata;
@@ -644,9 +638,9 @@ option_send_setup(struct usb_serial_port *port)
 static int
 option_startup (struct usb_serial *serial)
 {
-	int				i, err;
-	struct usb_serial_port		*port;
-	struct option_port_private	*portdata;
+	int i, err;
+	struct usb_serial_port *port;
+	struct option_port_private *portdata;
 
 	dbg("%s", __FUNCTION__);
 
@@ -677,9 +671,9 @@ option_startup (struct usb_serial *serial)
 static void
 option_shutdown (struct usb_serial *serial)
 {
-	int				i, j;
-	struct usb_serial_port		*port;
-	struct option_port_private	*portdata;
+	int i, j;
+	struct usb_serial_port *port;
+	struct option_port_private *portdata;
 
 	dbg("%s", __FUNCTION__);
 
@@ -724,6 +718,8 @@ MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_VERSION(DRIVER_VERSION);
 MODULE_LICENSE("GPL");
 
+#ifdef CONFIG_USB_DEBUG
 module_param(debug, bool, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(debug, "Debug messages");
+#endif
 

From 46906c4415f88cebfad530917bada0835d651824 Mon Sep 17 00:00:00 2001
From: Ian Wienand <ianw@gelato.unsw.edu.au>
Date: Wed, 13 Jul 2005 21:09:00 -0700
Subject: [PATCH 094/113] [IA64] Fix undefined reference to can_cpei_retarget
 for simulator

The simulator build doesn't turn on ACPI, so doesn't have a definition
of can_cpei_retarget.

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/topology.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index d8030f3bd865..92ff46ad21e2 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -36,12 +36,14 @@ int arch_register_cpu(int num)
 	parent = &sysfs_nodes[cpu_to_node(num)];
 #endif /* CONFIG_NUMA */
 
+#ifdef CONFIG_ACPI_BOOT
 	/*
 	 * If CPEI cannot be re-targetted, and this is
 	 * CPEI target, then dont create the control file
 	 */
 	if (!can_cpei_retarget() && is_cpu_cpei_target(num))
 		sysfs_cpus[num].cpu.no_control = 1;
+#endif
 
 	return register_cpu(&sysfs_cpus[num].cpu, num, parent);
 }

From 5d157885f383ccc0660c011fa488ae4edb77ab16 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@tglx.tec.linutronix.de>
Date: Fri, 15 Jul 2005 08:14:44 +0200
Subject: [PATCH 095/113] [JFFS2] Fix node allocation leak

In the rare case of failing to write the cleanmarker
the allocated node was not freed.

Pointed out by Forrest Zhao
Initial cleanup by Joern Engel

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 fs/jffs2/erase.c | 182 +++++++++++++++++++++++++----------------------
 1 file changed, 96 insertions(+), 86 deletions(-)

diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index 6a4c0a3685da..787d84ac2bcd 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -7,7 +7,7 @@
  *
  * For licensing information, see the file 'LICENCE' in this directory.
  *
- * $Id: erase.c,v 1.76 2005/05/03 15:11:40 dedekind Exp $
+ * $Id: erase.c,v 1.80 2005/07/14 19:46:24 joern Exp $
  *
  */
 
@@ -300,100 +300,86 @@ static void jffs2_free_all_node_refs(struct jffs2_sb_info *c, struct jffs2_erase
 	jeb->last_node = NULL;
 }
 
+static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, uint32_t *bad_offset)
+{
+	void *ebuf;
+	uint32_t ofs;
+	size_t retlen;
+	int ret = -EIO;
+	
+	ebuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!ebuf) {
+		printk(KERN_WARNING "Failed to allocate page buffer for verifying erase at 0x%08x. Refiling\n", jeb->offset);
+		return -EAGAIN;
+	}
+
+	D1(printk(KERN_DEBUG "Verifying erase at 0x%08x\n", jeb->offset));
+
+	for (ofs = jeb->offset; ofs < jeb->offset + c->sector_size; ) {
+		uint32_t readlen = min((uint32_t)PAGE_SIZE, jeb->offset + c->sector_size - ofs);
+		int i;
+
+		*bad_offset = ofs;
+
+		ret = jffs2_flash_read(c, ofs, readlen, &retlen, ebuf);
+		if (ret) {
+			printk(KERN_WARNING "Read of newly-erased block at 0x%08x failed: %d. Putting on bad_list\n", ofs, ret);
+			goto fail;
+		}
+		if (retlen != readlen) {
+			printk(KERN_WARNING "Short read from newly-erased block at 0x%08x. Wanted %d, got %zd\n", ofs, readlen, retlen);
+			goto fail;
+		}
+		for (i=0; i<readlen; i += sizeof(unsigned long)) {
+			/* It's OK. We know it's properly aligned */
+			unsigned long *datum = ebuf + i;
+			if (*datum + 1) {
+				*bad_offset += i;
+				printk(KERN_WARNING "Newly-erased block contained word 0x%lx at offset 0x%08x\n", *datum, *bad_offset);
+				goto fail;
+			}
+		}
+		ofs += readlen;
+		cond_resched();
+	}
+	ret = 0;
+fail:
+	kfree(ebuf);
+	return ret;
+}
+
 static void jffs2_mark_erased_block(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb)
 {
 	struct jffs2_raw_node_ref *marker_ref = NULL;
-	unsigned char *ebuf;
 	size_t retlen;
 	int ret;
 	uint32_t bad_offset;
 
-	if ((!jffs2_cleanmarker_oob(c)) && (c->cleanmarker_size > 0)) {
-		marker_ref = jffs2_alloc_raw_node_ref();
-		if (!marker_ref) {
-			printk(KERN_WARNING "Failed to allocate raw node ref for clean marker\n");
-			/* Stick it back on the list from whence it came and come back later */
-			jffs2_erase_pending_trigger(c);
-			spin_lock(&c->erase_completion_lock);
-			list_add(&jeb->list, &c->erase_complete_list);
-			spin_unlock(&c->erase_completion_lock);
-			return;
-		}
+	switch (jffs2_block_check_erase(c, jeb, &bad_offset)) {
+	case -EAGAIN:	goto refile;
+	case -EIO:	goto filebad;
 	}
-	ebuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
-	if (!ebuf) {
-		printk(KERN_WARNING "Failed to allocate page buffer for verifying erase at 0x%08x. Assuming it worked\n", jeb->offset);
-	} else {
-		uint32_t ofs = jeb->offset;
-
-		D1(printk(KERN_DEBUG "Verifying erase at 0x%08x\n", jeb->offset));
-		while(ofs < jeb->offset + c->sector_size) {
-			uint32_t readlen = min((uint32_t)PAGE_SIZE, jeb->offset + c->sector_size - ofs);
-			int i;
-
-			bad_offset = ofs;
-
-			ret = c->mtd->read(c->mtd, ofs, readlen, &retlen, ebuf);
-
-			if (ret) {
-				printk(KERN_WARNING "Read of newly-erased block at 0x%08x failed: %d. Putting on bad_list\n", ofs, ret);
-				goto bad;
-			}
-			if (retlen != readlen) {
-				printk(KERN_WARNING "Short read from newly-erased block at 0x%08x. Wanted %d, got %zd\n", ofs, readlen, retlen);
-				goto bad;
-			}
-			for (i=0; i<readlen; i += sizeof(unsigned long)) {
-				/* It's OK. We know it's properly aligned */
-				unsigned long datum = *(unsigned long *)(&ebuf[i]);
-				if (datum + 1) {
-					bad_offset += i;
-					printk(KERN_WARNING "Newly-erased block contained word 0x%lx at offset 0x%08x\n", datum, bad_offset);
-				bad: 
-					if ((!jffs2_cleanmarker_oob(c)) && (c->cleanmarker_size > 0))
-						jffs2_free_raw_node_ref(marker_ref);
-					kfree(ebuf);
-				bad2:
-					spin_lock(&c->erase_completion_lock);
-					/* Stick it on a list (any list) so
-					   erase_failed can take it right off
-					   again.  Silly, but shouldn't happen
-					   often. */
-					list_add(&jeb->list, &c->erasing_list);
-					spin_unlock(&c->erase_completion_lock);
-					jffs2_erase_failed(c, jeb, bad_offset);
-					return;
-				}
-			}
-			ofs += readlen;
-			cond_resched();
-		}
-		kfree(ebuf);
-	}
-
-	bad_offset = jeb->offset;
 
 	/* Write the erase complete marker */	
 	D1(printk(KERN_DEBUG "Writing erased marker to block at 0x%08x\n", jeb->offset));
-	if (jffs2_cleanmarker_oob(c)) {
+	bad_offset = jeb->offset;
+
+	/* Cleanmarker in oob area or no cleanmarker at all ? */
+	if (jffs2_cleanmarker_oob(c) || c->cleanmarker_size == 0) {
+
+		if (jffs2_cleanmarker_oob(c)) {
+			if (jffs2_write_nand_cleanmarker(c, jeb))
+				goto filebad;
+		}
 
-		if (jffs2_write_nand_cleanmarker(c, jeb))
-			goto bad2;
-			
 		jeb->first_node = jeb->last_node = NULL;
-
 		jeb->free_size = c->sector_size;
 		jeb->used_size = 0;
 		jeb->dirty_size = 0;
 		jeb->wasted_size = 0;
-	} else if (c->cleanmarker_size == 0) {
-		jeb->first_node = jeb->last_node = NULL;
 
-		jeb->free_size = c->sector_size;
-		jeb->used_size = 0;
-		jeb->dirty_size = 0;
-		jeb->wasted_size = 0;
 	} else {
+
 		struct kvec vecs[1];
 		struct jffs2_unknown_node marker = {
 			.magic =	cpu_to_je16(JFFS2_MAGIC_BITMASK),
@@ -401,21 +387,28 @@ static void jffs2_mark_erased_block(struct jffs2_sb_info *c, struct jffs2_eraseb
 			.totlen =	cpu_to_je32(c->cleanmarker_size)
 		};
 
+		marker_ref = jffs2_alloc_raw_node_ref();
+		if (!marker_ref) {
+			printk(KERN_WARNING "Failed to allocate raw node ref for clean marker. Refiling\n");
+			goto refile;
+		}
+
 		marker.hdr_crc = cpu_to_je32(crc32(0, &marker, sizeof(struct jffs2_unknown_node)-4));
 
 		vecs[0].iov_base = (unsigned char *) &marker;
 		vecs[0].iov_len = sizeof(marker);
 		ret = jffs2_flash_direct_writev(c, vecs, 1, jeb->offset, &retlen);
 		
-		if (ret) {
-			printk(KERN_WARNING "Write clean marker to block at 0x%08x failed: %d\n",
-			       jeb->offset, ret);
-			goto bad2;
-		}
-		if (retlen != sizeof(marker)) {
-			printk(KERN_WARNING "Short write to newly-erased block at 0x%08x: Wanted %zd, got %zd\n",
-			       jeb->offset, sizeof(marker), retlen);
-			goto bad2;
+		if (ret || retlen != sizeof(marker)) {
+			if (ret)
+				printk(KERN_WARNING "Write clean marker to block at 0x%08x failed: %d\n",
+				       jeb->offset, ret);
+			else
+				printk(KERN_WARNING "Short write to newly-erased block at 0x%08x: Wanted %zd, got %zd\n",
+				       jeb->offset, sizeof(marker), retlen);
+
+			jffs2_free_raw_node_ref(marker_ref);
+			goto filebad;
 		}
 
 		marker_ref->next_in_ino = NULL;
@@ -444,5 +437,22 @@ static void jffs2_mark_erased_block(struct jffs2_sb_info *c, struct jffs2_eraseb
 	c->nr_free_blocks++;
 	spin_unlock(&c->erase_completion_lock);
 	wake_up(&c->erase_wait);
-}
+	return;
 
+filebad:
+	spin_lock(&c->erase_completion_lock);
+	/* Stick it on a list (any list) so erase_failed can take it
+	   right off again.  Silly, but shouldn't happen often. */
+	list_add(&jeb->list, &c->erasing_list);
+	spin_unlock(&c->erase_completion_lock);
+	jffs2_erase_failed(c, jeb, bad_offset);
+	return;
+
+refile:
+	/* Stick it back on the list from whence it came and come back later */
+	jffs2_erase_pending_trigger(c);
+	spin_lock(&c->erase_completion_lock);
+	list_add(&jeb->list, &c->erase_complete_list);
+	spin_unlock(&c->erase_completion_lock);
+	return;
+}

From 030babac6ae54df64ae3bba4685ecb1d8d8dd8c3 Mon Sep 17 00:00:00 2001
From: Nishanth Aravamudan <nacc@us.ibm.com>
Date: Fri, 15 Jul 2005 03:56:25 -0700
Subject: [PATCH 096/113] [PATCH] vt.c build fix

Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/char/vt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index d7aa7a29f67e..30d96739fb23 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -2796,7 +2796,7 @@ void do_blank_screen(int entering_gfx)
 		return;
 
 	if (vesa_off_interval) {
-		blank_state = blank_vesa_wait,
+		blank_state = blank_vesa_wait;
 		mod_timer(&console_timer, jiffies + vesa_off_interval);
 	}
 

From 1eb29128c644581fa51f822545921394ad4f719f Mon Sep 17 00:00:00 2001
From: Neil Brown <neilb@cse.unsw.edu.au>
Date: Fri, 15 Jul 2005 03:56:27 -0700
Subject: [PATCH 097/113] [PATCH] Fix raid0's attempt to divide by 64bit
 numbers

Apparently sector_div is only guaranteed to work with a 32bit divisor, even
on 64bit architectures.  So allow for this in raid0.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/md/raid0.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index e11dd14d0b43..2120710172c5 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -314,16 +314,16 @@ static int raid0_run (mddev_t *mddev)
 		sector_t space = conf->hash_spacing;
 		int round;
 		conf->preshift = 0;
-		if (sizeof(sector_t) > sizeof(unsigned long)) {
+		if (sizeof(sector_t) > sizeof(u32)) {
 			/*shift down space and s so that sector_div will work */
-			while (space > (sector_t) (~(unsigned long)0)) {
+			while (space > (sector_t) (~(u32)0)) {
 				s >>= 1;
 				space >>= 1;
 				s += 1; /* force round-up */
 				conf->preshift++;
 			}
 		}
-		round = sector_div(s, (unsigned long)space) ? 1 : 0;
+		round = sector_div(s, (u32)space) ? 1 : 0;
 		nb_zone = s + round;
 	}
 	printk("raid0 : nb_zone is %d.\n", nb_zone);
@@ -443,7 +443,7 @@ static int raid0_make_request (request_queue_t *q, struct bio *bio)
 		volatile
 #endif
 		sector_t x = block >> conf->preshift;
-		sector_div(x, (unsigned long)conf->hash_spacing);
+		sector_div(x, (u32)conf->hash_spacing);
 		zone = conf->hash_table[x];
 	}
  

From c5287ba132ff742e595d42c28b66cbba19522c4e Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@brturbo.com.br>
Date: Fri, 15 Jul 2005 03:56:28 -0700
Subject: [PATCH 098/113] [PATCH] v4l: bug fixes for tuner, cx88 and tea5767

- In CX88 code, some cards needs to have audio reprogramed after changing
  video channel;

- Tuner autodetection code seems not to work on some cards.  Now,
  no_autodetect insmod option allows disabling autodetection code;

- Minor fixes in tea5767 to reduce integer trunc;

- There are some new Pixelview Ultra Pro cards that doesn't use TEA5767
  for radio.  As autodetection is capable of checking for tea, radio tuners
  and addresses removed.

- CX88 version number incremented.

Signed-off-by: Mauro Carvalho Chehab <mchehab@brturbo.com.br>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/media/video/cx88/cx88-cards.c |  8 +++----
 drivers/media/video/cx88/cx88-dvb.c   |  2 +-
 drivers/media/video/cx88/cx88-video.c |  7 +++++-
 drivers/media/video/cx88/cx88.h       |  6 ++---
 drivers/media/video/tea5767.c         | 34 +++++++++++++++------------
 drivers/media/video/tuner-core.c      | 31 ++++++++++++++----------
 6 files changed, 52 insertions(+), 36 deletions(-)

diff --git a/drivers/media/video/cx88/cx88-cards.c b/drivers/media/video/cx88/cx88-cards.c
index b0b47c3cde3c..3d0c784b376f 100644
--- a/drivers/media/video/cx88/cx88-cards.c
+++ b/drivers/media/video/cx88/cx88-cards.c
@@ -1,5 +1,5 @@
 /*
- * $Id: cx88-cards.c,v 1.85 2005/07/04 19:35:05 mkrufky Exp $
+ * $Id: cx88-cards.c,v 1.86 2005/07/14 03:06:43 mchehab Exp $
  *
  * device driver for Conexant 2388x based TV cards
  * card-specific stuff.
@@ -682,9 +682,9 @@ struct cx88_board cx88_boards[] = {
 		.name           = "PixelView PlayTV Ultra Pro (Stereo)",
 		/* May be also TUNER_YMEC_TVF_5533MF for NTSC/M or PAL/M */
 		.tuner_type     = TUNER_PHILIPS_FM1216ME_MK3,
-		.radio_type     = TUNER_TEA5767,
-		.tuner_addr	= 0xc2>>1,
-		.radio_addr	= 0xc0>>1,
+		.radio_type     = UNSET,
+		.tuner_addr	= ADDR_UNSET,
+		.radio_addr	= ADDR_UNSET,
 		.input          = {{
 			.type   = CX88_VMUX_TELEVISION,
 			.vmux   = 0,
diff --git a/drivers/media/video/cx88/cx88-dvb.c b/drivers/media/video/cx88/cx88-dvb.c
index 8db68f2d1351..00ca40a129b9 100644
--- a/drivers/media/video/cx88/cx88-dvb.c
+++ b/drivers/media/video/cx88/cx88-dvb.c
@@ -1,5 +1,5 @@
 /*
- * $Id: cx88-dvb.c,v 1.41 2005/07/04 19:35:05 mkrufky Exp $
+ * $Id: cx88-dvb.c,v 1.42 2005/07/12 15:44:55 mkrufky Exp $
  *
  * device driver for Conexant 2388x based TV cards
  * MPEG Transport Stream (DVB) routines
diff --git a/drivers/media/video/cx88/cx88-video.c b/drivers/media/video/cx88/cx88-video.c
index c44a079d08c0..5588a3aeecb4 100644
--- a/drivers/media/video/cx88/cx88-video.c
+++ b/drivers/media/video/cx88/cx88-video.c
@@ -1,5 +1,5 @@
 /*
- * $Id: cx88-video.c,v 1.79 2005/07/07 14:17:47 mchehab Exp $
+ * $Id: cx88-video.c,v 1.80 2005/07/13 08:49:08 mchehab Exp $
  *
  * device driver for Conexant 2388x based TV cards
  * video4linux video interface
@@ -1346,6 +1346,11 @@ static int video_do_ioctl(struct inode *inode, struct file *file,
 		dev->freq = f->frequency;
 		cx88_newstation(core);
 		cx88_call_i2c_clients(dev->core,VIDIOC_S_FREQUENCY,f);
+
+		/* When changing channels it is required to reset TVAUDIO */
+		msleep (10);
+		cx88_set_tvaudio(core);
+
 		up(&dev->lock);
 		return 0;
 	}
diff --git a/drivers/media/video/cx88/cx88.h b/drivers/media/video/cx88/cx88.h
index 307beae04f2a..b008f7db6dfd 100644
--- a/drivers/media/video/cx88/cx88.h
+++ b/drivers/media/video/cx88/cx88.h
@@ -1,5 +1,5 @@
 /*
- * $Id: cx88.h,v 1.68 2005/07/07 14:17:47 mchehab Exp $
+ * $Id: cx88.h,v 1.69 2005/07/13 17:25:25 mchehab Exp $
  *
  * v4l2 device driver for cx2388x based TV cards
  *
@@ -35,8 +35,8 @@
 #include "btcx-risc.h"
 #include "cx88-reg.h"
 
-#include <linux/version.h>
-#define CX88_VERSION_CODE KERNEL_VERSION(0,0,4)
+#include <linux/utsname.h>
+#define CX88_VERSION_CODE KERNEL_VERSION(0,0,5)
 
 #ifndef TRUE
 # define TRUE (1==1)
diff --git a/drivers/media/video/tea5767.c b/drivers/media/video/tea5767.c
index b53c748caf2a..4d27ac1b7fb8 100644
--- a/drivers/media/video/tea5767.c
+++ b/drivers/media/video/tea5767.c
@@ -2,7 +2,7 @@
  * For Philips TEA5767 FM Chip used on some TV Cards like Prolink Pixelview
  * I2C address is allways 0xC0.
  *
- * $Id: tea5767.c,v 1.18 2005/07/07 03:02:55 mchehab Exp $
+ * $Id: tea5767.c,v 1.21 2005/07/14 03:06:43 mchehab Exp $
  *
  * Copyright (c) 2005 Mauro Carvalho Chehab (mchehab@brturbo.com.br)
  * This code is placed under the terms of the GNU General Public License
@@ -153,17 +153,17 @@ static void tea5767_status_dump(unsigned char *buffer)
 
 	switch (TEA5767_HIGH_LO_32768) {
 	case TEA5767_HIGH_LO_13MHz:
-		frq = 1000 * (div * 50 - 700 - 225) / 4;	/* Freq in KHz */
+		frq = (div * 50000 - 700000 - 225000) / 4;	/* Freq in KHz */
 		break;
 	case TEA5767_LOW_LO_13MHz:
-		frq = 1000 * (div * 50 + 700 + 225) / 4;	/* Freq in KHz */
+		frq = (div * 50000 + 700000 + 225000) / 4;	/* Freq in KHz */
 		break;
 	case TEA5767_LOW_LO_32768:
-		frq = 1000 * (div * 32768 / 1000 + 700 + 225) / 4;	/* Freq in KHz */
+		frq = (div * 32768 + 700000 + 225000) / 4;	/* Freq in KHz */
 		break;
 	case TEA5767_HIGH_LO_32768:
 	default:
-		frq = 1000 * (div * 32768 / 1000 - 700 - 225) / 4;	/* Freq in KHz */
+		frq = (div * 32768 - 700000 - 225000) / 4;	/* Freq in KHz */
 		break;
 	}
 	buffer[0] = (div >> 8) & 0x3f;
@@ -196,7 +196,7 @@ static void set_radio_freq(struct i2c_client *c, unsigned int frq)
 	unsigned div;
 	int rc;
 
-	tuner_dbg (PREFIX "radio freq counter %d\n", frq);
+	tuner_dbg (PREFIX "radio freq = %d.%03d MHz\n", frq/16000,(frq/16)%1000);
 
 	/* Rounds freq to next decimal value - for 62.5 KHz step */
 	/* frq = 20*(frq/16)+radio_frq[frq%16]; */
@@ -224,19 +224,19 @@ static void set_radio_freq(struct i2c_client *c, unsigned int frq)
 		tuner_dbg ("TEA5767 radio HIGH LO inject xtal @ 13 MHz\n");
 		buffer[2] |= TEA5767_HIGH_LO_INJECT;
 		buffer[4] |= TEA5767_PLLREF_ENABLE;
-		div = (frq * 4 / 16 + 700 + 225 + 25) / 50;
+		div = (frq * 4000 / 16 + 700000 + 225000 + 25000) / 50000;
 		break;
 	case TEA5767_LOW_LO_13MHz:
 		tuner_dbg ("TEA5767 radio LOW LO inject xtal @ 13 MHz\n");
 
 		buffer[4] |= TEA5767_PLLREF_ENABLE;
-		div = (frq * 4 / 16 - 700 - 225 + 25) / 50;
+		div = (frq * 4000 / 16 - 700000 - 225000 + 25000) / 50000;
 		break;
 	case TEA5767_LOW_LO_32768:
 		tuner_dbg ("TEA5767 radio LOW LO inject xtal @ 32,768 MHz\n");
 		buffer[3] |= TEA5767_XTAL_32768;
 		/* const 700=4000*175 Khz - to adjust freq to right value */
-		div = (1000 * (frq * 4 / 16 - 700 - 225) + 16384) >> 15;
+		div = ((frq * 4000 / 16 - 700000 - 225000) + 16384) >> 15;
 		break;
 	case TEA5767_HIGH_LO_32768:
 	default:
@@ -244,17 +244,21 @@ static void set_radio_freq(struct i2c_client *c, unsigned int frq)
 
 		buffer[2] |= TEA5767_HIGH_LO_INJECT;
 		buffer[3] |= TEA5767_XTAL_32768;
-		div = (1000 * (frq * 4 / 16 + 700 + 225) + 16384) >> 15;
+		div = ((frq * (4000 / 16) + 700000 + 225000) + 16384) >> 15;
 		break;
 	}
 	buffer[0] = (div >> 8) & 0x3f;
 	buffer[1] = div & 0xff;
 
-	if (tuner_debug)
-		tea5767_status_dump(buffer);
-
 	if (5 != (rc = i2c_master_send(c, buffer, 5)))
 		tuner_warn("i2c i/o error: rc == %d (should be 5)\n", rc);
+
+	if (tuner_debug) {
+		if (5 != (rc = i2c_master_recv(c, buffer, 5)))
+			tuner_warn("i2c i/o error: rc == %d (should be 5)\n", rc);
+		else
+			tea5767_status_dump(buffer);
+	}
 }
 
 static int tea5767_signal(struct i2c_client *c)
@@ -294,7 +298,7 @@ int tea5767_autodetection(struct i2c_client *c)
 	struct tuner *t = i2c_get_clientdata(c);
 
 	if (5 != (rc = i2c_master_recv(c, buffer, 5))) {
-		tuner_warn("it is not a TEA5767. Received %i chars.\n", rc);
+		tuner_warn("It is not a TEA5767. Received %i bytes.\n", rc);
 		return EINVAL;
 	}
 
@@ -310,11 +314,11 @@ int tea5767_autodetection(struct i2c_client *c)
 	 *          bit 0   : internally set to 0
 	 *  Byte 5: bit 7:0 : == 0
 	 */
-
 	if (!((buffer[3] & 0x0f) == 0x00) && (buffer[4] == 0x00)) {
 		tuner_warn("Chip ID is not zero. It is not a TEA5767\n");
 		return EINVAL;
 	}
+
 	tuner_warn("TEA5767 detected.\n");
 	return 0;
 }
diff --git a/drivers/media/video/tuner-core.c b/drivers/media/video/tuner-core.c
index de190630babb..b25a9c08ac02 100644
--- a/drivers/media/video/tuner-core.c
+++ b/drivers/media/video/tuner-core.c
@@ -1,5 +1,5 @@
 /*
- * $Id: tuner-core.c,v 1.55 2005/07/08 13:20:33 mchehab Exp $
+ * $Id: tuner-core.c,v 1.58 2005/07/14 03:06:43 mchehab Exp $
  *
  * i2c tv tuner chip device driver
  * core core, i.e. kernel interfaces, registering and so on
@@ -39,6 +39,9 @@ I2C_CLIENT_INSMOD;
 static unsigned int addr = 0;
 module_param(addr, int, 0444);
 
+static unsigned int no_autodetect = 0;
+module_param(no_autodetect, int, 0444);
+
 /* insmod options used at runtime => read/write */
 unsigned int tuner_debug = 0;
 module_param(tuner_debug, int, 0644);
@@ -318,17 +321,19 @@ static int tuner_attach(struct i2c_adapter *adap, int addr, int kind)
 	tuner_info("chip found @ 0x%x (%s)\n", addr << 1, adap->name);
 
 	/* TEA5767 autodetection code - only for addr = 0xc0 */
-	if (addr == 0x60) {
-		if (tea5767_autodetection(&t->i2c) != EINVAL) {
-			t->type = TUNER_TEA5767;
-			t->mode_mask = T_RADIO;
-			t->mode = T_STANDBY;
-			t->freq = 87.5 * 16; /* Sets freq to FM range */
-			default_mode_mask &= ~T_RADIO;
+	if (!no_autodetect) {
+		if (addr == 0x60) {
+			if (tea5767_autodetection(&t->i2c) != EINVAL) {
+				t->type = TUNER_TEA5767;
+				t->mode_mask = T_RADIO;
+				t->mode = T_STANDBY;
+				t->freq = 87.5 * 16; /* Sets freq to FM range */
+				default_mode_mask &= ~T_RADIO;
 
-			i2c_attach_client (&t->i2c);
-			set_type(&t->i2c,t->type, t->mode_mask);
-			return 0;
+				i2c_attach_client (&t->i2c);
+				set_type(&t->i2c,t->type, t->mode_mask);
+				return 0;
+			}
 		}
 	}
 
@@ -631,7 +636,9 @@ static int tuner_command(struct i2c_client *client, unsigned int cmd, void *arg)
 			break;
 		}
 	default:
-		tuner_dbg("Unimplemented IOCTL 0x%08x called to tuner.\n", cmd);
+		tuner_dbg("Unimplemented IOCTL 0x%08x(dir=%d,tp=0x%02x,nr=%d,sz=%d)\n",
+					 cmd, _IOC_DIR(cmd), _IOC_TYPE(cmd),
+					_IOC_NR(cmd), _IOC_SIZE(cmd));
 		break;
 	}
 

From afa597ba20e9ef55fc6283c1a564854b1c9f13c0 Mon Sep 17 00:00:00 2001
From: Carsten Otte <cotte@de.ibm.com>
Date: Fri, 15 Jul 2005 03:56:30 -0700
Subject: [PATCH 099/113] [PATCH] execute-in-place fixes

This patch includes feedback from Andrew and Christoph. Thanks for
taking time to review.

Use of empty_zero_page was eliminated to fix compilation for architectures
that don't have it.

This patch removes setting pages up-to-date in ext2_get_xip_page and all
bug checks to verify that the page is indeed up to date.  Setting the page
state on mapping to userland is bogus.  None of the code patchs involved
with these pages in mm cares about the page state.

still on my ToDo list: identify a place outside second extended where
__inode_direct_access should reside

Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext2/xip.c    | 81 ++++++++++++++++++++++++++++--------------------
 mm/filemap_xip.c | 23 +++++---------
 2 files changed, 55 insertions(+), 49 deletions(-)

diff --git a/fs/ext2/xip.c b/fs/ext2/xip.c
index d44431d1a338..0aa5ac159c09 100644
--- a/fs/ext2/xip.c
+++ b/fs/ext2/xip.c
@@ -15,66 +15,79 @@
 #include "xip.h"
 
 static inline int
-__inode_direct_access(struct inode *inode, sector_t sector, unsigned long *data) {
+__inode_direct_access(struct inode *inode, sector_t sector,
+		      unsigned long *data)
+{
 	BUG_ON(!inode->i_sb->s_bdev->bd_disk->fops->direct_access);
 	return inode->i_sb->s_bdev->bd_disk->fops
 		->direct_access(inode->i_sb->s_bdev,sector,data);
 }
 
+static inline int
+__ext2_get_sector(struct inode *inode, sector_t offset, int create,
+		   sector_t *result)
+{
+	struct buffer_head tmp;
+	int rc;
+
+	memset(&tmp, 0, sizeof(struct buffer_head));
+	rc = ext2_get_block(inode, offset/ (PAGE_SIZE/512), &tmp,
+			    create);
+	*result = tmp.b_blocknr;
+
+	/* did we get a sparse block (hole in the file)? */
+	if (!(*result)) {
+		BUG_ON(create);
+		rc = -ENODATA;
+	}
+
+	return rc;
+}
+
 int
-ext2_clear_xip_target(struct inode *inode, int block) {
-	sector_t sector = block*(PAGE_SIZE/512);
+ext2_clear_xip_target(struct inode *inode, int block)
+{
+	sector_t sector = block * (PAGE_SIZE/512);
 	unsigned long data;
 	int rc;
 
 	rc = __inode_direct_access(inode, sector, &data);
-	if (rc)
-		return rc;
-	clear_page((void*)data);
-	return 0;
+	if (!rc)
+		clear_page((void*)data);
+	return rc;
 }
 
 void ext2_xip_verify_sb(struct super_block *sb)
 {
 	struct ext2_sb_info *sbi = EXT2_SB(sb);
 
-	if ((sbi->s_mount_opt & EXT2_MOUNT_XIP)) {
-		if ((sb->s_bdev == NULL) ||
-			sb->s_bdev->bd_disk == NULL ||
-			sb->s_bdev->bd_disk->fops == NULL ||
-			sb->s_bdev->bd_disk->fops->direct_access == NULL) {
-			sbi->s_mount_opt &= (~EXT2_MOUNT_XIP);
-			ext2_warning(sb, __FUNCTION__,
-				"ignoring xip option - not supported by bdev");
-		}
+	if ((sbi->s_mount_opt & EXT2_MOUNT_XIP) &&
+	    !sb->s_bdev->bd_disk->fops->direct_access) {
+		sbi->s_mount_opt &= (~EXT2_MOUNT_XIP);
+		ext2_warning(sb, __FUNCTION__,
+			     "ignoring xip option - not supported by bdev");
 	}
 }
 
-struct page*
-ext2_get_xip_page(struct address_space *mapping, sector_t blockno,
+struct page *
+ext2_get_xip_page(struct address_space *mapping, sector_t offset,
 		   int create)
 {
 	int rc;
 	unsigned long data;
-	struct buffer_head tmp;
+	sector_t sector;
 
-	tmp.b_state = 0;
-	tmp.b_blocknr = 0;
-	rc = ext2_get_block(mapping->host, blockno/(PAGE_SIZE/512) , &tmp,
-				create);
+	/* first, retrieve the sector number */
+	rc = __ext2_get_sector(mapping->host, offset, create, &sector);
 	if (rc)
-		return ERR_PTR(rc);
-	if (tmp.b_blocknr == 0) {
-		/* SPARSE block */
-		BUG_ON(create);
-		return ERR_PTR(-ENODATA);
-	}
+		goto error;
 
+	/* retrieve address of the target data */
 	rc = __inode_direct_access
-		(mapping->host,tmp.b_blocknr*(PAGE_SIZE/512) ,&data);
-	if (rc)
-		return ERR_PTR(rc);
+		(mapping->host, sector * (PAGE_SIZE/512), &data);
+	if (!rc)
+		return virt_to_page(data);
 
-	SetPageUptodate(virt_to_page(data));
-	return virt_to_page(data);
+ error:
+	return ERR_PTR(rc);
 }
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 4553b2c5aab4..8c199f537732 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -68,13 +68,12 @@ do_xip_mapping_read(struct address_space *mapping,
 		if (unlikely(IS_ERR(page))) {
 			if (PTR_ERR(page) == -ENODATA) {
 				/* sparse */
-				page = virt_to_page(empty_zero_page);
+				page = ZERO_PAGE(0);
 			} else {
 				desc->error = PTR_ERR(page);
 				goto out;
 			}
-		} else
-			BUG_ON(!PageUptodate(page));
+		}
 
 		/* If users can be writing to this page using arbitrary
 		 * virtual addresses, take care about potential aliasing
@@ -84,8 +83,7 @@ do_xip_mapping_read(struct address_space *mapping,
 			flush_dcache_page(page);
 
 		/*
-		 * Ok, we have the page, and it's up-to-date, so
-		 * now we can copy it to user space...
+		 * Ok, we have the page, so now we can copy it to user space...
 		 *
 		 * The actor routine returns how many bytes were actually used..
 		 * NOTE! This may not be the same as how much of a user buffer
@@ -164,7 +162,7 @@ EXPORT_SYMBOL_GPL(xip_file_sendfile);
  * xip_write
  *
  * This function walks all vmas of the address_space and unmaps the
- * empty_zero_page when found at pgoff. Should it go in rmap.c?
+ * ZERO_PAGE when found at pgoff. Should it go in rmap.c?
  */
 static void
 __xip_unmap (struct address_space * mapping,
@@ -187,7 +185,7 @@ __xip_unmap (struct address_space * mapping,
 		 * We need the page_table_lock to protect us from page faults,
 		 * munmap, fork, etc...
 		 */
-		pte = page_check_address(virt_to_page(empty_zero_page), mm,
+		pte = page_check_address(ZERO_PAGE(address), mm,
 					 address);
 		if (!IS_ERR(pte)) {
 			/* Nuke the page table entry. */
@@ -230,7 +228,6 @@ xip_file_nopage(struct vm_area_struct * area,
 
 	page = mapping->a_ops->get_xip_page(mapping, pgoff*(PAGE_SIZE/512), 0);
 	if (!IS_ERR(page)) {
-		BUG_ON(!PageUptodate(page));
 		return page;
 	}
 	if (PTR_ERR(page) != -ENODATA)
@@ -245,12 +242,11 @@ xip_file_nopage(struct vm_area_struct * area,
 			pgoff*(PAGE_SIZE/512), 1);
 		if (IS_ERR(page))
 			return NULL;
-		BUG_ON(!PageUptodate(page));
 		/* unmap page at pgoff from all other vmas */
 		__xip_unmap(mapping, pgoff);
 	} else {
-		/* not shared and writable, use empty_zero_page */
-		page = virt_to_page(empty_zero_page);
+		/* not shared and writable, use ZERO_PAGE() */
+		page = ZERO_PAGE(address);
 	}
 
 	return page;
@@ -319,8 +315,6 @@ __xip_file_write(struct file *filp, const char __user *buf,
 			break;
 		}
 
-		BUG_ON(!PageUptodate(page));
-
 		copied = filemap_copy_from_user(page, offset, buf, bytes);
 		flush_dcache_page(page);
 		if (likely(copied > 0)) {
@@ -435,8 +429,7 @@ xip_truncate_page(struct address_space *mapping, loff_t from)
 			return 0;
 		else
 			return PTR_ERR(page);
-	} else
-		BUG_ON(!PageUptodate(page));
+	}
 	kaddr = kmap_atomic(page, KM_USER0);
 	memset(kaddr + offset, 0, length);
 	kunmap_atomic(kaddr, KM_USER0);

From 35e422c967d3208f188e3096c9f603ac7333fb1b Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 15 Jul 2005 03:56:32 -0700
Subject: [PATCH 100/113] [PATCH] visws: reexport pm_power_off

More fallout from the i386_ksyms.c cleanups.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/i386/mach-visws/reboot.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/i386/mach-visws/reboot.c b/arch/i386/mach-visws/reboot.c
index 3a81e904a7b8..95e4676594e1 100644
--- a/arch/i386/mach-visws/reboot.c
+++ b/arch/i386/mach-visws/reboot.c
@@ -7,6 +7,7 @@
 #include "piix4.h"
 
 void (*pm_power_off)(void);
+EXPORT_SYMBOL(pm_power_off);
 
 void machine_restart(char * __unused)
 {

From 6f97933d0fd13920d7d53b6e0107bb674b3a1f0b Mon Sep 17 00:00:00 2001
From: Robert Love <rml@novell.com>
Date: Fri, 15 Jul 2005 03:56:33 -0700
Subject: [PATCH 101/113] [PATCH] inotify: documentation update

Clean up and expand some of the inotify documentation.

Signed-off-by: Robert Love <rml@novell.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Documentation/filesystems/inotify.txt | 77 ++++++++++++++++-----------
 1 file changed, 45 insertions(+), 32 deletions(-)

diff --git a/Documentation/filesystems/inotify.txt b/Documentation/filesystems/inotify.txt
index 2c716041f578..6d501903f68e 100644
--- a/Documentation/filesystems/inotify.txt
+++ b/Documentation/filesystems/inotify.txt
@@ -1,18 +1,22 @@
-				    inotify
-	     a powerful yet simple file change notification system
+				   inotify
+	    a powerful yet simple file change notification system
 
 
 Document started 15 Mar 2005 by Robert Love <rml@novell.com>
 
+
 (i) User Interface
 
-Inotify is controlled by a set of three sys calls 
+Inotify is controlled by a set of three system calls and normal file I/O on a
+returned file descriptor.
 
-First step in using inotify is to initialise an inotify instance
+First step in using inotify is to initialise an inotify instance:
 
 	int fd = inotify_init ();
 
+Each instance is associated with a unique, ordered queue.
+
 Change events are managed by "watches".  A watch is an (object,mask) pair where
 the object is a file or directory and the mask is a bit mask of one or more
 inotify events that the application wishes to receive.  See <linux/inotify.h>
@@ -22,43 +26,52 @@ Watches are added via a path to the file.
 
 Watches on a directory will return events on any files inside of the directory.
 
-Adding a watch is simple,
+Adding a watch is simple:
 
 	int wd = inotify_add_watch (fd, path, mask);
 
-You can add a large number of files via something like
-
-	for each file to watch {
-		int wd = inotify_add_watch (fd, file, mask);
-	}
+Where "fd" is the return value from inotify_init(), path is the path to the
+object to watch, and mask is the watch mask (see <linux/inotify.h>).
 
 You can update an existing watch in the same manner, by passing in a new mask.
 
-An existing watch is removed via the INOTIFY_IGNORE ioctl, for example
+An existing watch is removed via
 
-	inotify_rm_watch (fd, wd);
+	int ret = inotify_rm_watch (fd, wd);
 
 Events are provided in the form of an inotify_event structure that is read(2)
-from a inotify instance fd.  The filename is of dynamic length and follows the 
-struct. It is of size len.  The filename is padded with null bytes to ensure 
-proper alignment.  This padding is reflected in len.
+from a given inotify instance.  The filename is of dynamic length and follows
+the struct. It is of size len.  The filename is padded with null bytes to
+ensure proper alignment.  This padding is reflected in len.
 
 You can slurp multiple events by passing a large buffer, for example
 
 	size_t len = read (fd, buf, BUF_LEN);
 
-Will return as many events as are available and fit in BUF_LEN.
+Where "buf" is a pointer to an array of "inotify_event" structures at least
+BUF_LEN bytes in size.  The above example will return as many events as are
+available and fit in BUF_LEN.
 
-each inotify instance fd is also select()- and poll()-able.
+Each inotify instance fd is also select()- and poll()-able.
 
-You can find the size of the current event queue via the FIONREAD ioctl.
+You can find the size of the current event queue via the standard FIONREAD
+ioctl on the fd returned by inotify_init().
 
 All watches are destroyed and cleaned up on close.
 
 
-(ii) Internal Kernel Implementation
+(ii)
 
-Each open inotify instance is associated with an inotify_device structure.
+Prototypes:
+
+	int inotify_init (void);
+	int inotify_add_watch (int fd, const char *path, __u32 mask);
+	int inotify_rm_watch (int fd, __u32 mask);
+
+
+(iii) Internal Kernel Implementation
+
+Each inotify instance is associated with an inotify_device structure.
 
 Each watch is associated with an inotify_watch structure.  Watches are chained
 off of each associated device and each associated inode.
@@ -66,7 +79,7 @@ off of each associated device and each associated inode.
 See fs/inotify.c for the locking and lifetime rules.
 
 
-(iii) Rationale
+(iv) Rationale
 
 Q: What is the design decision behind not tying the watch to the open fd of
    the watched object?
@@ -75,9 +88,9 @@ A: Watches are associated with an open inotify device, not an open file.
    This solves the primary problem with dnotify: keeping the file open pins
    the file and thus, worse, pins the mount.  Dnotify is therefore infeasible
    for use on a desktop system with removable media as the media cannot be
-   unmounted.
+   unmounted.  Watching a file should not require that it be open.
 
-Q: What is the design decision behind using an-fd-per-device as opposed to
+Q: What is the design decision behind using an-fd-per-instance as opposed to
    an fd-per-watch?
 
 A: An fd-per-watch quickly consumes more file descriptors than are allowed,
@@ -86,8 +99,8 @@ A: An fd-per-watch quickly consumes more file descriptors than are allowed,
    can use epoll, but requiring both is a silly and extraneous requirement.
    A watch consumes less memory than an open file, separating the number
    spaces is thus sensible.  The current design is what user-space developers
-   want: Users initialize inotify, once, and add n watches, requiring but one fd
-   and no twiddling with fd limits.  Initializing an inotify instance two
+   want: Users initialize inotify, once, and add n watches, requiring but one
+   fd and no twiddling with fd limits.  Initializing an inotify instance two
    thousand times is silly.  If we can implement user-space's preferences 
    cleanly--and we can, the idr layer makes stuff like this trivial--then we 
    should.
@@ -111,9 +124,6 @@ A: An fd-per-watch quickly consumes more file descriptors than are allowed,
      example, love it.  Trust me, I asked.  It is not a surprise: Who'd want
      to manage and block on 1000 fd's via select?
 
-   - You'd have to manage the fd's, as an example: Call close() when you
-     received a delete event.
-
    - No way to get out of band data.
 
    - 1024 is still too low.  ;-)
@@ -122,6 +132,11 @@ A: An fd-per-watch quickly consumes more file descriptors than are allowed,
    scales to 1000s of directories, juggling 1000s of fd's just does not seem
    the right interface.  It is too heavy.
 
+   Additionally, it _is_ possible to  more than one instance  and
+   juggle more than one queue and thus more than one associated fd.  There
+   need not be a one-fd-per-process mapping; it is one-fd-per-queue and a
+   process can easily want more than one queue.
+
 Q: Why the system call approach?
 
 A: The poor user-space interface is the second biggest problem with dnotify.
@@ -131,8 +146,6 @@ A: The poor user-space interface is the second biggest problem with dnotify.
    Obtaining the fd and managing the watches could have been done either via a
    device file or a family of new system calls.  We decided to implement a
    family of system calls because that is the preffered approach for new kernel
-   features and it means our user interface requirements.
-
-   Additionally, it _is_ possible to  more than one instance  and
-   juggle more than one queue and thus more than one associated fd.
+   interfaces.  The only real difference was whether we wanted to use open(2)
+   and ioctl(2) or a couple of new system calls.  System calls beat ioctls.
 

From a1287ba1ba810aae1f8b81e32560d5d3bf3ff9f0 Mon Sep 17 00:00:00 2001
From: Michal Ostrowski <mostrows@watson.ibm.com>
Date: Fri, 15 Jul 2005 03:56:33 -0700
Subject: [PATCH 102/113] [PATCH] rocket.c: Fix ldisc ref count handling

If bailing out because there is nothing to receive in rp_do_receive(),
tty_ldisc_deref is not called.  Failure to do so increases the ref count
and causes release_dev() to hang since it can't get the ref count to 0.

Signed-off-by: Michal Ostrowski <mostrows@watson.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/char/rocket.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/char/rocket.c b/drivers/char/rocket.c
index f463d6baa685..5b1d3680c8ab 100644
--- a/drivers/char/rocket.c
+++ b/drivers/char/rocket.c
@@ -355,7 +355,7 @@ static void rp_do_receive(struct r_port *info,
 		ToRecv = space;
 
 	if (ToRecv <= 0)
-		return;
+		goto done;
 
 	/*
 	 * if status indicates there are errored characters in the
@@ -437,6 +437,7 @@ static void rp_do_receive(struct r_port *info,
 	}
 	/*  Push the data up to the tty layer */
 	ld->receive_buf(tty, tty->flip.char_buf, tty->flip.flag_buf, count);
+done:
 	tty_ldisc_deref(ld);
 }
 

From 6a806c510de490318846b53bbfec463d02ca274b Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Fri, 15 Jul 2005 03:56:35 -0700
Subject: [PATCH 103/113] [PATCH] md/raid1: clear bitmap when fullsync
 completes

We need to be careful differentiating between a resync of a complete array,
in which we can clear the bitmap, and a resync of a degraded array, in
which we cannot.

This patch cleans all that up.

Cc: Paul Clements <paul.clements@steeleye.com>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/md/bitmap.c         |  9 ++++++---
 drivers/md/raid1.c          | 37 ++++++++++++++++++-------------------
 include/linux/raid/bitmap.h |  2 +-
 3 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 95980ad6b27b..0c2ed99a3832 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -1345,7 +1345,8 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto
 	}
 }
 
-int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks)
+int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks,
+			int degraded)
 {
 	bitmap_counter_t *bmc;
 	int rv;
@@ -1362,8 +1363,10 @@ int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks)
 			rv = 1;
 		else if (NEEDED(*bmc)) {
 			rv = 1;
-			*bmc |= RESYNC_MASK;
-			*bmc &= ~NEEDED_MASK;
+			if (!degraded) { /* don't set/clear bits if degraded */
+				*bmc |= RESYNC_MASK;
+				*bmc &= ~NEEDED_MASK;
+			}
 		}
 	}
 	spin_unlock_irq(&bitmap->lock);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index ff1dbec864af..5f253ee536bb 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1126,21 +1126,19 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 		 * only be one in raid1 resync.
 		 * We can find the current addess in mddev->curr_resync
 		 */
-		if (!conf->fullsync) {
-			if (mddev->curr_resync < max_sector)
-				bitmap_end_sync(mddev->bitmap,
-						mddev->curr_resync,
+		if (mddev->curr_resync < max_sector) /* aborted */
+			bitmap_end_sync(mddev->bitmap, mddev->curr_resync,
 						&sync_blocks, 1);
-			bitmap_close_sync(mddev->bitmap);
-		}
-		if (mddev->curr_resync >= max_sector)
+		else /* completed sync */
 			conf->fullsync = 0;
+
+		bitmap_close_sync(mddev->bitmap);
 		close_sync(conf);
 		return 0;
 	}
 
-	if (!conf->fullsync &&
-	    !bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks)) {
+	if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, mddev->degraded) &&
+	    !conf->fullsync) {
 		/* We can skip this block, and probably several more */
 		*skipped = 1;
 		return sync_blocks;
@@ -1243,15 +1241,15 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 			len = (max_sector - sector_nr) << 9;
 		if (len == 0)
 			break;
-		if (!conf->fullsync) {
-			if (sync_blocks == 0) {
-				if (!bitmap_start_sync(mddev->bitmap,
-						       sector_nr, &sync_blocks))
-					break;
-				if (sync_blocks < (PAGE_SIZE>>9))
-					BUG();
-				if (len > (sync_blocks<<9)) len = sync_blocks<<9;
-			}
+		if (sync_blocks == 0) {
+			if (!bitmap_start_sync(mddev->bitmap, sector_nr,
+					&sync_blocks, mddev->degraded) &&
+					!conf->fullsync)
+				break;
+			if (sync_blocks < (PAGE_SIZE>>9))
+				BUG();
+			if (len > (sync_blocks<<9))
+				len = sync_blocks<<9;
 		}
 
 		for (i=0 ; i < conf->raid_disks; i++) {
@@ -1264,7 +1262,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 					while (i > 0) {
 						i--;
 						bio = r1_bio->bios[i];
-						if (bio->bi_end_io==NULL) continue;
+						if (bio->bi_end_io==NULL)
+							continue;
 						/* remove last page from this bio */
 						bio->bi_vcnt--;
 						bio->bi_size -= len;
diff --git a/include/linux/raid/bitmap.h b/include/linux/raid/bitmap.h
index e24b74b11150..6213e976eade 100644
--- a/include/linux/raid/bitmap.h
+++ b/include/linux/raid/bitmap.h
@@ -262,7 +262,7 @@ void bitmap_write_all(struct bitmap *bitmap);
 int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors);
 void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors,
 		     int success);
-int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks);
+int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int degraded);
 void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int aborted);
 void bitmap_close_sync(struct bitmap *bitmap);
 

From c9f04f5847c17d9933c5bd795c6c3740e96058e9 Mon Sep 17 00:00:00 2001
From: Robert Love <rml@novell.com>
Date: Fri, 15 Jul 2005 12:21:07 -0400
Subject: [PATCH 104/113] [PATCH] inotify: MAINTAINERS

Add a MAINTAINERS entry for Inotify.

Signed-off-by: Robert Love <rml@novell.com>
Signed-off-by: John McCutchan <ttb@tentacle.dhs.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 MAINTAINERS | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index f73381af4d5e..48aa6d3d3ce9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1169,6 +1169,12 @@ L:	linux-input@atrey.karlin.mff.cuni.cz
 L:	linux-joystick@atrey.karlin.mff.cuni.cz
 S:	Maintained
 
+INOTIFY
+P:	John McCutchan and Robert Love
+M:	ttb@tentacle.dhs.org and rml@novell.com
+L:	linux-kernel@vger.kernel.org
+S:	Maintained
+
 INTEL 810/815 FRAMEBUFFER DRIVER
 P:      Antonino Daplas
 M:      adaplas@pol.net

From 38d84c3bd6dd22bdb1f797c87006931133d71aea Mon Sep 17 00:00:00 2001
From: Patrick Boettcher <patrick.boettcher@desy.de>
Date: Fri, 15 Jul 2005 12:20:26 -0700
Subject: [PATCH 105/113] [PATCH] dvb: cx88 output mode fix

The output_mode needs to be set for the Hauppauge Nova-T DVB-T
(cx88-dvb.c).

Cc: Johannes Stezenbach <js@linuxtv.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/media/video/cx88/cx88-dvb.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/media/video/cx88/cx88-dvb.c b/drivers/media/video/cx88/cx88-dvb.c
index 00ca40a129b9..6ad1458ab652 100644
--- a/drivers/media/video/cx88/cx88-dvb.c
+++ b/drivers/media/video/cx88/cx88-dvb.c
@@ -180,12 +180,14 @@ static struct mt352_config dntv_live_dvbt_config = {
 #if CONFIG_DVB_CX22702
 static struct cx22702_config connexant_refboard_config = {
 	.demod_address = 0x43,
+	.output_mode   = CX22702_SERIAL_OUTPUT,
 	.pll_address   = 0x60,
 	.pll_desc      = &dvb_pll_thomson_dtt7579,
 };
 
 static struct cx22702_config hauppauge_novat_config = {
 	.demod_address = 0x43,
+	.output_mode   = CX22702_SERIAL_OUTPUT,
 	.pll_address   = 0x61,
 	.pll_desc      = &dvb_pll_thomson_dtt759x,
 };

From 9fb1759a3102c26cd8f64254a7c3e532782c2bb8 Mon Sep 17 00:00:00 2001
From: "Siddha, Suresh B" <suresh.b.siddha@intel.com>
Date: Fri, 15 Jul 2005 19:17:44 -0700
Subject: [PATCH 106/113] [PATCH] x86_64: TASK_SIZE fixes for compatibility
 mode processes

A malicious 32bit app can have an elf section at 0xffffe000.  During
exec of this app, we will have a memory leak as insert_vm_struct() is
not checking for return value in syscall32_setup_pages() and thus not
freeing the vma allocated for the vsyscall page.

Check the return value and free the vma incase of failure.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/x86_64/ia32/syscall32.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/x86_64/ia32/syscall32.c b/arch/x86_64/ia32/syscall32.c
index 01d8db1a1c09..816a3b89f13d 100644
--- a/arch/x86_64/ia32/syscall32.c
+++ b/arch/x86_64/ia32/syscall32.c
@@ -57,6 +57,7 @@ int syscall32_setup_pages(struct linux_binprm *bprm, int exstack)
 	int npages = (VSYSCALL32_END - VSYSCALL32_BASE) >> PAGE_SHIFT;
 	struct vm_area_struct *vma;
 	struct mm_struct *mm = current->mm;
+	int ret;
 
 	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
 	if (!vma)
@@ -78,7 +79,11 @@ int syscall32_setup_pages(struct linux_binprm *bprm, int exstack)
 	vma->vm_mm = mm;
 
 	down_write(&mm->mmap_sem);
-	insert_vm_struct(mm, vma);
+	if ((ret = insert_vm_struct(mm, vma))) {
+		up_write(&mm->mmap_sem);
+		kmem_cache_free(vm_area_cachep, vma);
+		return ret;
+	}
 	mm->total_vm += npages;
 	up_write(&mm->mmap_sem);
 	return 0;

From 19870da7ea2fc483bf73a189046a430fd9b01391 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 15 Jul 2005 14:53:51 +0100
Subject: [PATCH 107/113] [MTD] NAND: Fix broken bad block scan for 16 bit
 devices

The previous change to read a single byte from oob breaks the
bad block scan on 16 bit devices, when the byte is on an odd
address. Read the complete oob for now.
Remove the unused arguments from check_short_pattern()
Move the wait for ready function so it is only executed when
consecutive reads happen.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/mtd/nand/nand_base.c | 22 +++++++++++-----------
 drivers/mtd/nand/nand_bbt.c  | 20 +++++++++-----------
 2 files changed, 20 insertions(+), 22 deletions(-)

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 1bd71a598c79..eee5115658c8 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -59,7 +59,7 @@
  *	The AG-AND chips have nice features for speed improvement,
  *	which are not supported yet. Read / program 4 pages in one go.
  *
- * $Id: nand_base.c,v 1.146 2005/06/17 15:02:06 gleixner Exp $
+ * $Id: nand_base.c,v 1.147 2005/07/15 07:18:06 gleixner Exp $
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -1409,16 +1409,6 @@ static int nand_read_oob (struct mtd_info *mtd, loff_t from, size_t len, size_t
 		thislen = min_t(int, thislen, len);
 		this->read_buf(mtd, &buf[i], thislen);
 		i += thislen;
-		
-		/* Apply delay or wait for ready/busy pin 
-		 * Do this before the AUTOINCR check, so no problems
-		 * arise if a chip which does auto increment
-		 * is marked as NOAUTOINCR by the board driver.
-		*/
-		if (!this->dev_ready) 
-			udelay (this->chip_delay);
-		else
-			nand_wait_ready(mtd);
 
 		/* Read more ? */
 		if (i < len) {
@@ -1432,6 +1422,16 @@ static int nand_read_oob (struct mtd_info *mtd, loff_t from, size_t len, size_t
 				this->select_chip(mtd, chipnr);
 			}
 				
+			/* Apply delay or wait for ready/busy pin 
+			 * Do this before the AUTOINCR check, so no problems
+			 * arise if a chip which does auto increment
+			 * is marked as NOAUTOINCR by the board driver.
+			 */
+			if (!this->dev_ready) 
+				udelay (this->chip_delay);
+			else
+				nand_wait_ready(mtd);
+
 			/* Check, if the chip supports auto page increment 
 			 * or if we have hit a block boundary. 
 			*/ 
diff --git a/drivers/mtd/nand/nand_bbt.c b/drivers/mtd/nand/nand_bbt.c
index 5ac2d2962220..7535ef53685e 100644
--- a/drivers/mtd/nand/nand_bbt.c
+++ b/drivers/mtd/nand/nand_bbt.c
@@ -6,7 +6,7 @@
  *   
  *  Copyright (C) 2004 Thomas Gleixner (tglx@linutronix.de)
  *
- * $Id: nand_bbt.c,v 1.33 2005/06/14 15:47:56 gleixner Exp $
+ * $Id: nand_bbt.c,v 1.35 2005/07/15 13:53:47 gleixner Exp $
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -109,24 +109,21 @@ static int check_pattern (uint8_t *buf, int len, int paglen, struct nand_bbt_des
 /** 
  * check_short_pattern - [GENERIC] check if a pattern is in the buffer
  * @buf:	the buffer to search
- * @len:	the length of buffer to search
- * @paglen:	the pagelength
  * @td:		search pattern descriptor
  *
  * Check for a pattern at the given place. Used to search bad block
  * tables and good / bad block identifiers. Same as check_pattern, but 
- * no optional empty check and the pattern is expected to start
- * at offset 0.
+ * no optional empty check
  *
 */
-static int check_short_pattern (uint8_t *buf, int len, int paglen, struct nand_bbt_descr *td)
+static int check_short_pattern (uint8_t *buf, struct nand_bbt_descr *td)
 {
 	int i;
 	uint8_t *p = buf;
 
 	/* Compare the pattern */
 	for (i = 0; i < td->len; i++) {
-		if (p[i] != td->pattern[i])
+		if (p[td->offs + i] != td->pattern[i])
 			return -1;
 	}
 	return 0;
@@ -337,13 +334,14 @@ static int create_bbt (struct mtd_info *mtd, uint8_t *buf, struct nand_bbt_descr
 			if (!(bd->options & NAND_BBT_SCANEMPTY)) {
 				size_t retlen;
 				
-				/* No need to read pages fully, just read required OOB bytes */
-				ret = mtd->read_oob(mtd, from + j * mtd->oobblock + bd->offs,
-							readlen, &retlen, &buf[0]);
+				/* Read the full oob until read_oob is fixed to 
+				 * handle single byte reads for 16 bit buswidth */
+				ret = mtd->read_oob(mtd, from + j * mtd->oobblock,
+							mtd->oobsize, &retlen, buf);
 				if (ret)
 					return ret;
 
-				if (check_short_pattern (&buf[j * scanlen], scanlen, mtd->oobblock, bd)) {
+				if (check_short_pattern (buf, bd)) {
 					this->bbt[i >> 3] |= 0x03 << (i & 0x6);
 					printk (KERN_WARNING "Bad eraseblock %d at 0x%08x\n", 
 						i >> 1, (unsigned int) from);

From 661f83a67c2e360d5a4d2406cc28379c909f94bf Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Sat, 16 Jul 2005 09:30:53 +0100
Subject: [PATCH 108/113] [PATCH] Serial: Move deprecation of register_serial
 forward to September

I think it's about time to make the build a little more vocal about the
expiry of these functions.  Due to recent discussions with problems in
the console initialisation vs power manglement, I'd like to move the
date forward to September.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 Documentation/feature-removal-schedule.txt | 4 ++--
 include/linux/serial.h                     | 6 ++++--
 include/linux/serial_core.h                | 5 +++--
 3 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 12dde43fe657..8b1430b46655 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -103,11 +103,11 @@ Who:	Jody McIntyre <scjody@steamballoon.com>
 ---------------------------
 
 What:	register_serial/unregister_serial
-When:	December 2005
+When:	September 2005
 Why:	This interface does not allow serial ports to be registered against
 	a struct device, and as such does not allow correct power management
 	of such ports.  8250-based ports should use serial8250_register_port
-	and serial8250_unregister_port instead.
+	and serial8250_unregister_port, or platform devices instead.
 Who:	Russell King <rmk@arm.linux.org.uk>
 
 ---------------------------
diff --git a/include/linux/serial.h b/include/linux/serial.h
index 00145822fb74..9f2d85284d0b 100644
--- a/include/linux/serial.h
+++ b/include/linux/serial.h
@@ -174,9 +174,11 @@ struct serial_icounter_struct {
 
 
 #ifdef __KERNEL__
+#include <linux/compiler.h>
+
 /* Export to allow PCMCIA to use this - Dave Hinds */
-extern int register_serial(struct serial_struct *req);
-extern void unregister_serial(int line);
+extern int __deprecated register_serial(struct serial_struct *req);
+extern void __deprecated unregister_serial(int line);
 
 /* Allow architectures to override entries in serial8250_ports[] at run time: */
 struct uart_port;	/* forward declaration */
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index d6025af7efac..30b64f3534f4 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -122,6 +122,7 @@
 #ifdef __KERNEL__
 
 #include <linux/config.h>
+#include <linux/compiler.h>
 #include <linux/interrupt.h>
 #include <linux/circ_buf.h>
 #include <linux/spinlock.h>
@@ -359,8 +360,8 @@ struct tty_driver *uart_console_device(struct console *co, int *index);
  */
 int uart_register_driver(struct uart_driver *uart);
 void uart_unregister_driver(struct uart_driver *uart);
-void uart_unregister_port(struct uart_driver *reg, int line);
-int uart_register_port(struct uart_driver *reg, struct uart_port *port);
+void __deprecated uart_unregister_port(struct uart_driver *reg, int line);
+int __deprecated uart_register_port(struct uart_driver *reg, struct uart_port *port);
 int uart_add_one_port(struct uart_driver *reg, struct uart_port *port);
 int uart_remove_one_port(struct uart_driver *reg, struct uart_port *port);
 int uart_match_port(struct uart_port *port1, struct uart_port *port2);

From 6d283d271674b1127881ebf082266a2c3fe6e0e4 Mon Sep 17 00:00:00 2001
From: Olaf Hering <olh@suse.de>
Date: Sat, 16 Jul 2005 09:59:00 +0100
Subject: [PATCH 109/113] [PATCH] Serial: Remove linux/version.h

changing CONFIG_LOCALVERSION rebuilds too much, for no appearent reason.

Signed-off-by: Olaf Hering <olh@suse.de>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/serial/crisv10.c | 2 --
 drivers/serial/icom.c    | 1 -
 drivers/serial/jsm/jsm.h | 1 -
 include/linux/serialP.h  | 1 -
 4 files changed, 5 deletions(-)

diff --git a/drivers/serial/crisv10.c b/drivers/serial/crisv10.c
index 3da5494953af..23b8871e74cc 100644
--- a/drivers/serial/crisv10.c
+++ b/drivers/serial/crisv10.c
@@ -426,8 +426,6 @@
 static char *serial_version = "$Revision: 1.25 $";
 
 #include <linux/config.h>
-#include <linux/version.h>
-
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/signal.h>
diff --git a/drivers/serial/icom.c b/drivers/serial/icom.c
index 546a0bc77e1e..c112b32764e8 100644
--- a/drivers/serial/icom.c
+++ b/drivers/serial/icom.c
@@ -25,7 +25,6 @@
 #define SERIAL_DO_RESTART
 #include <linux/module.h>
 #include <linux/config.h>
-#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/signal.h>
diff --git a/drivers/serial/jsm/jsm.h b/drivers/serial/jsm/jsm.h
index 777829fa3300..5bf3c45521f4 100644
--- a/drivers/serial/jsm/jsm.h
+++ b/drivers/serial/jsm/jsm.h
@@ -28,7 +28,6 @@
 #define __JSM_DRIVER_H
 
 #include <linux/kernel.h>
-#include <linux/version.h>
 #include <linux/types.h>	/* To pick up the varions Linux types */
 #include <linux/tty.h>
 #include <linux/serial_core.h>
diff --git a/include/linux/serialP.h b/include/linux/serialP.h
index 2307f11d8a6b..2b2f35a64d75 100644
--- a/include/linux/serialP.h
+++ b/include/linux/serialP.h
@@ -19,7 +19,6 @@
  * For definitions of the flags field, see tty.h
  */
 
-#include <linux/version.h>
 #include <linux/config.h>
 #include <linux/termios.h>
 #include <linux/workqueue.h>

From 54ea06f6afe85aaf419e51343d4e4b5599197113 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Sat, 16 Jul 2005 15:21:51 +0100
Subject: [PATCH 110/113] [PATCH] ARM: Convert bitops to use ARMv6 ldrex/strex
 instructions

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/lib/bitops.h | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h
index 4a83ab6cd565..6976e60e47cb 100644
--- a/arch/arm/lib/bitops.h
+++ b/arch/arm/lib/bitops.h
@@ -1,3 +1,33 @@
+#if __LINUX_ARM_ARCH__ >= 6
+	.macro	bitop, instr
+	mov	r2, #1
+	and	r3, r0, #7		@ Get bit offset
+	add	r1, r1, r0, lsr #3	@ Get byte offset
+	mov	r3, r2, lsl r3
+1:	ldrexb	r2, [r1]
+	\instr	r2, r2, r3
+	strexb	r0, r2, [r1]
+	cmpne	r0, #0
+	bne	1b
+	mov	pc, lr
+	.endm
+
+	.macro	testop, instr, store
+	and	r3, r0, #7		@ Get bit offset
+	mov	r2, #1
+	add	r1, r1, r0, lsr #3	@ Get byte offset
+	mov	r3, r2, lsl r3		@ create mask
+1:	ldrexb	r2, [r1]
+	ands	r0, r2, r3		@ save old value of bit
+	\instr	ip, r2, r3			@ toggle bit
+	strexb	r2, ip, [r1]
+	cmp	r2, #0
+	bne	1b
+	cmp	r0, #0
+	movne	r0, #1
+2:	mov	pc, lr
+	.endm
+#else
 	.macro	bitop, instr
 	and	r2, r0, #7
 	mov	r3, #1
@@ -31,3 +61,4 @@
 	moveq	r0, #0
 	mov	pc, lr
 	.endm
+#endif

From 109d89ca0ee76138e04b41726b7f719c7df65e88 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Sat, 16 Jul 2005 16:43:33 +0100
Subject: [PATCH 111/113] [PATCH] ARM: Allow register_undef_hook to be called
 with IRQs off

Preserve the interrupt status across a call to register_undef_hook.
This allows it to be called while interrupts are disabled.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/traps.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index df2cb06ce424..d571c37ac30c 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -248,16 +248,20 @@ static DEFINE_SPINLOCK(undef_lock);
 
 void register_undef_hook(struct undef_hook *hook)
 {
-	spin_lock_irq(&undef_lock);
+	unsigned long flags;
+
+	spin_lock_irqsave(&undef_lock, flags);
 	list_add(&hook->node, &undef_hook);
-	spin_unlock_irq(&undef_lock);
+	spin_unlock_irqrestore(&undef_lock, flags);
 }
 
 void unregister_undef_hook(struct undef_hook *hook)
 {
-	spin_lock_irq(&undef_lock);
+	unsigned long flags;
+
+	spin_lock_irqsave(&undef_lock, flags);
 	list_del(&hook->node);
-	spin_unlock_irq(&undef_lock);
+	spin_unlock_irqrestore(&undef_lock, flags);
 }
 
 asmlinkage void do_undefinstr(struct pt_regs *regs)

From 878cf4e1c7be6bffde3ace888a65ac3d43c127bb Mon Sep 17 00:00:00 2001
From: Michael Burian <dynmail1@gassner-waagen.at>
Date: Sat, 16 Jul 2005 16:43:49 +0100
Subject: [PATCH 112/113] [PATCH] ARM: 2794/1: Add "Image" and "mach-types.h"
 to dontdiff list

Patch from Michael Burian

comment in "mach-types.h" tells that it should not be patched
"Image" is a binary, just as zImage, uImage and friends are

Signed-off-by: Michael Burian <dynmail1@gassner-waagen.at>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 Documentation/dontdiff | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/dontdiff b/Documentation/dontdiff
index d4fda25db868..b974cf595d01 100644
--- a/Documentation/dontdiff
+++ b/Documentation/dontdiff
@@ -41,6 +41,7 @@ COPYING
 CREDITS
 CVS
 ChangeSet
+Image
 Kerntypes
 MODS.txt
 Module.symvers
@@ -103,6 +104,7 @@ logo_*.c
 logo_*_clut224.c
 logo_*_mono.c
 lxdialog
+mach-types.h
 make_times_h
 map
 maui_boot.h

From b7523418f6af9093c462341c08c2233f44f7a28d Mon Sep 17 00:00:00 2001
From: Alexander Schulz <alex@shark-linux.de>
Date: Sat, 16 Jul 2005 17:17:18 +0100
Subject: [PATCH 113/113] [PATCH] ARM: 2815/1: Shark: new defconfig, fixes with
 __io and serial ports

Patch from Alexander Schulz

This patch brings a new default config file for the shark and
fixes a compilation issue with io addressing and a runtime
problem with the serial ports, where I corrected a wrong
regshift value.
These are all shark specific files so I hope it is ok to
put them in one patch.

Signed-off-by: Alexander Schulz <alex@shark-linux.de>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/configs/shark_defconfig |  85 +++++++++++-------
 arch/arm/lib/io-shark.c          |  70 ---------------
 arch/arm/mach-shark/core.c       |   4 +-
 include/asm-arm/arch-shark/io.h  | 147 +------------------------------
 4 files changed, 55 insertions(+), 251 deletions(-)

diff --git a/arch/arm/configs/shark_defconfig b/arch/arm/configs/shark_defconfig
index 1d9bcbbc8dfc..271823f0d708 100644
--- a/arch/arm/configs/shark_defconfig
+++ b/arch/arm/configs/shark_defconfig
@@ -1,22 +1,21 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.12-rc1-bk2
-# Sun Mar 27 23:59:14 2005
+# Linux kernel version: 2.6.12-git3
+# Sat Jul 16 15:21:47 2005
 #
 CONFIG_ARM=y
 CONFIG_MMU=y
 CONFIG_UID16=y
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_CALIBRATE_DELAY=y
-CONFIG_GENERIC_IOMAP=y
 
 #
 # Code maturity level options
 #
 CONFIG_EXPERIMENTAL=y
-# CONFIG_CLEAN_COMPILE is not set
-CONFIG_BROKEN=y
+CONFIG_CLEAN_COMPILE=y
 CONFIG_BROKEN_ON_SMP=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
 
 #
 # General setup
@@ -33,7 +32,10 @@ CONFIG_KOBJECT_UEVENT=y
 # CONFIG_IKCONFIG is not set
 # CONFIG_EMBEDDED is not set
 CONFIG_KALLSYMS=y
+# CONFIG_KALLSYMS_ALL is not set
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_PRINTK=y
+CONFIG_BUG=y
 CONFIG_BASE_FULL=y
 CONFIG_FUTEX=y
 CONFIG_EPOLL=y
@@ -81,6 +83,7 @@ CONFIG_ARCH_SHARK=y
 # CONFIG_ARCH_VERSATILE is not set
 # CONFIG_ARCH_IMX is not set
 # CONFIG_ARCH_H720X is not set
+# CONFIG_ARCH_AAEC2000 is not set
 
 #
 # Processor Type
@@ -103,10 +106,12 @@ CONFIG_CPU_TLB_V4WB=y
 #
 CONFIG_ISA=y
 CONFIG_ISA_DMA=y
+CONFIG_ISA_DMA_API=y
 CONFIG_PCI=y
 CONFIG_PCI_HOST_VIA82C505=y
 CONFIG_PCI_LEGACY_PROC=y
 # CONFIG_PCI_NAMES is not set
+# CONFIG_PCI_DEBUG is not set
 
 #
 # PCCARD (PCMCIA/CardBus) support
@@ -116,7 +121,9 @@ CONFIG_PCI_LEGACY_PROC=y
 #
 # Kernel Features
 #
+# CONFIG_SMP is not set
 # CONFIG_PREEMPT is not set
+# CONFIG_DISCONTIGMEM is not set
 CONFIG_LEDS=y
 CONFIG_LEDS_TIMER=y
 # CONFIG_LEDS_CPU is not set
@@ -163,6 +170,7 @@ CONFIG_BINFMT_ELF=y
 # CONFIG_STANDALONE is not set
 CONFIG_PREVENT_FIRMWARE_BUILD=y
 # CONFIG_FW_LOADER is not set
+# CONFIG_DEBUG_DRIVER is not set
 
 #
 # Memory Technology Devices (MTD)
@@ -172,8 +180,8 @@ CONFIG_PREVENT_FIRMWARE_BUILD=y
 #
 # Parallel port support
 #
-CONFIG_PARPORT=y
-CONFIG_PARPORT_PC=y
+CONFIG_PARPORT=m
+CONFIG_PARPORT_PC=m
 # CONFIG_PARPORT_SERIAL is not set
 # CONFIG_PARPORT_PC_FIFO is not set
 # CONFIG_PARPORT_PC_SUPERIO is not set
@@ -189,7 +197,6 @@ CONFIG_PARPORT_PC=y
 #
 # Block devices
 #
-# CONFIG_BLK_DEV_FD is not set
 # CONFIG_BLK_DEV_XD is not set
 # CONFIG_PARIDE is not set
 # CONFIG_BLK_CPQ_DA is not set
@@ -229,7 +236,7 @@ CONFIG_BLK_DEV_IDE=y
 # CONFIG_BLK_DEV_IDE_SATA is not set
 CONFIG_BLK_DEV_IDEDISK=y
 # CONFIG_IDEDISK_MULTI_MODE is not set
-CONFIG_BLK_DEV_IDECD=y
+CONFIG_BLK_DEV_IDECD=m
 # CONFIG_BLK_DEV_IDETAPE is not set
 CONFIG_BLK_DEV_IDEFLOPPY=y
 # CONFIG_BLK_DEV_IDESCSI is not set
@@ -261,6 +268,7 @@ CONFIG_CHR_DEV_ST=m
 CONFIG_BLK_DEV_SR=m
 # CONFIG_BLK_DEV_SR_VENDOR is not set
 CONFIG_CHR_DEV_SG=m
+# CONFIG_CHR_DEV_SCH is not set
 
 #
 # Some SCSI devices (e.g. CD jukebox) support multiple LUNs
@@ -290,17 +298,14 @@ CONFIG_CHR_DEV_SG=m
 # CONFIG_SCSI_AIC7XXX_OLD is not set
 # CONFIG_SCSI_AIC79XX is not set
 # CONFIG_SCSI_DPT_I2O is not set
-# CONFIG_SCSI_ADVANSYS is not set
 # CONFIG_SCSI_IN2000 is not set
 # CONFIG_MEGARAID_NEWGEN is not set
 # CONFIG_MEGARAID_LEGACY is not set
 # CONFIG_SCSI_SATA is not set
 # CONFIG_SCSI_BUSLOGIC is not set
-# CONFIG_SCSI_CPQFCTS is not set
 # CONFIG_SCSI_DMX3191D is not set
 # CONFIG_SCSI_DTC3280 is not set
 # CONFIG_SCSI_EATA is not set
-# CONFIG_SCSI_EATA_PIO is not set
 # CONFIG_SCSI_FUTURE_DOMAIN is not set
 # CONFIG_SCSI_GDTH is not set
 # CONFIG_SCSI_GENERIC_NCR5380 is not set
@@ -314,11 +319,8 @@ CONFIG_CHR_DEV_SG=m
 # CONFIG_SCSI_SYM53C8XX_2 is not set
 # CONFIG_SCSI_IPR is not set
 # CONFIG_SCSI_PAS16 is not set
-# CONFIG_SCSI_PCI2000 is not set
-# CONFIG_SCSI_PCI2220I is not set
 # CONFIG_SCSI_PSI240I is not set
 # CONFIG_SCSI_QLOGIC_FAS is not set
-# CONFIG_SCSI_QLOGIC_ISP is not set
 # CONFIG_SCSI_QLOGIC_FC is not set
 # CONFIG_SCSI_QLOGIC_1280 is not set
 CONFIG_SCSI_QLA2XXX=m
@@ -327,6 +329,7 @@ CONFIG_SCSI_QLA2XXX=m
 # CONFIG_SCSI_QLA2300 is not set
 # CONFIG_SCSI_QLA2322 is not set
 # CONFIG_SCSI_QLA6312 is not set
+# CONFIG_SCSI_LPFC is not set
 # CONFIG_SCSI_SYM53C416 is not set
 # CONFIG_SCSI_DC395x is not set
 # CONFIG_SCSI_DC390T is not set
@@ -344,6 +347,8 @@ CONFIG_SCSI_QLA2XXX=m
 # Fusion MPT device support
 #
 # CONFIG_FUSION is not set
+# CONFIG_FUSION_SPI is not set
+# CONFIG_FUSION_FC is not set
 
 #
 # IEEE 1394 (FireWire) support
@@ -365,7 +370,6 @@ CONFIG_NET=y
 #
 CONFIG_PACKET=y
 # CONFIG_PACKET_MMAP is not set
-# CONFIG_NETLINK_DEV is not set
 CONFIG_UNIX=y
 # CONFIG_NET_KEY is not set
 CONFIG_INET=y
@@ -380,7 +384,7 @@ CONFIG_INET=y
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
 # CONFIG_INET_TUNNEL is not set
-# CONFIG_IP_TCPDIAG is not set
+CONFIG_IP_TCPDIAG=y
 # CONFIG_IP_TCPDIAG_IPV6 is not set
 # CONFIG_IPV6 is not set
 # CONFIG_NETFILTER is not set
@@ -439,6 +443,7 @@ CONFIG_NET_ETHERNET=y
 # CONFIG_LANCE is not set
 # CONFIG_NET_VENDOR_SMC is not set
 # CONFIG_SMC91X is not set
+# CONFIG_DM9000 is not set
 # CONFIG_NET_VENDOR_RACAL is not set
 
 #
@@ -483,9 +488,11 @@ CONFIG_CS89x0=y
 # CONFIG_HAMACHI is not set
 # CONFIG_YELLOWFIN is not set
 # CONFIG_R8169 is not set
+# CONFIG_SKGE is not set
 # CONFIG_SK98LIN is not set
 # CONFIG_VIA_VELOCITY is not set
 # CONFIG_TIGON3 is not set
+# CONFIG_BNX2 is not set
 
 #
 # Ethernet (10000 Mbit)
@@ -569,7 +576,6 @@ CONFIG_SERIO_I8042=y
 CONFIG_SERIO_LIBPS2=y
 # CONFIG_SERIO_RAW is not set
 # CONFIG_GAMEPORT is not set
-CONFIG_SOUND_GAMEPORT=y
 
 #
 # Character devices
@@ -592,6 +598,7 @@ CONFIG_SERIAL_8250_NR_UARTS=4
 #
 CONFIG_SERIAL_CORE=y
 CONFIG_SERIAL_CORE_CONSOLE=y
+# CONFIG_SERIAL_JSM is not set
 CONFIG_UNIX98_PTYS=y
 CONFIG_LEGACY_PTYS=y
 CONFIG_LEGACY_PTY_COUNT=256
@@ -653,6 +660,7 @@ CONFIG_FB_CFB_FILLRECT=y
 CONFIG_FB_CFB_COPYAREA=y
 CONFIG_FB_CFB_IMAGEBLIT=y
 CONFIG_FB_SOFT_CURSOR=y
+# CONFIG_FB_MACMODES is not set
 # CONFIG_FB_MODE_HELPERS is not set
 # CONFIG_FB_TILEBLITTING is not set
 # CONFIG_FB_CIRRUS is not set
@@ -674,7 +682,7 @@ CONFIG_FB_CYBER2000=y
 # CONFIG_FB_3DFX is not set
 # CONFIG_FB_VOODOO1 is not set
 # CONFIG_FB_TRIDENT is not set
-# CONFIG_FB_PM3 is not set
+# CONFIG_FB_S1D13XXX is not set
 # CONFIG_FB_VIRTUAL is not set
 
 #
@@ -808,7 +816,7 @@ CONFIG_DNOTIFY=y
 #
 # CD-ROM/DVD Filesystems
 #
-CONFIG_ISO9660_FS=y
+CONFIG_ISO9660_FS=m
 CONFIG_JOLIET=y
 # CONFIG_ZISOFS is not set
 # CONFIG_UDF_FS is not set
@@ -816,9 +824,9 @@ CONFIG_JOLIET=y
 #
 # DOS/FAT/NT Filesystems
 #
-CONFIG_FAT_FS=y
-CONFIG_MSDOS_FS=y
-CONFIG_VFAT_FS=y
+CONFIG_FAT_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
 CONFIG_FAT_DEFAULT_CODEPAGE=437
 CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
 # CONFIG_NTFS_FS is not set
@@ -833,7 +841,6 @@ CONFIG_DEVFS_MOUNT=y
 # CONFIG_DEVFS_DEBUG is not set
 # CONFIG_DEVPTS_FS_XATTR is not set
 # CONFIG_TMPFS is not set
-# CONFIG_HUGETLBFS is not set
 # CONFIG_HUGETLB_PAGE is not set
 CONFIG_RAMFS=y
 
@@ -857,13 +864,14 @@ CONFIG_RAMFS=y
 #
 # Network File Systems
 #
-CONFIG_NFS_FS=y
-# CONFIG_NFS_V3 is not set
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3=y
 # CONFIG_NFS_V4 is not set
 # CONFIG_NFS_DIRECTIO is not set
 # CONFIG_NFSD is not set
-CONFIG_LOCKD=y
-CONFIG_SUNRPC=y
+CONFIG_LOCKD=m
+CONFIG_LOCKD_V4=y
+CONFIG_SUNRPC=m
 # CONFIG_RPCSEC_GSS_KRB5 is not set
 # CONFIG_RPCSEC_GSS_SPKM3 is not set
 # CONFIG_SMB_FS is not set
@@ -895,12 +903,12 @@ CONFIG_MSDOS_PARTITION=y
 #
 # Native Language Support
 #
-CONFIG_NLS=y
+CONFIG_NLS=m
 CONFIG_NLS_DEFAULT="iso8859-1"
-CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_CODEPAGE_437=m
 # CONFIG_NLS_CODEPAGE_737 is not set
 # CONFIG_NLS_CODEPAGE_775 is not set
-CONFIG_NLS_CODEPAGE_850=y
+CONFIG_NLS_CODEPAGE_850=m
 # CONFIG_NLS_CODEPAGE_852 is not set
 # CONFIG_NLS_CODEPAGE_855 is not set
 # CONFIG_NLS_CODEPAGE_857 is not set
@@ -921,7 +929,7 @@ CONFIG_NLS_CODEPAGE_850=y
 # CONFIG_NLS_CODEPAGE_1250 is not set
 # CONFIG_NLS_CODEPAGE_1251 is not set
 # CONFIG_NLS_ASCII is not set
-CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_ISO8859_1=m
 # CONFIG_NLS_ISO8859_2 is not set
 # CONFIG_NLS_ISO8859_3 is not set
 # CONFIG_NLS_ISO8859_4 is not set
@@ -945,11 +953,22 @@ CONFIG_NLS_ISO8859_1=y
 # Kernel hacking
 #
 # CONFIG_PRINTK_TIME is not set
-# CONFIG_DEBUG_KERNEL is not set
+CONFIG_DEBUG_KERNEL=y
+# CONFIG_MAGIC_SYSRQ is not set
 CONFIG_LOG_BUF_SHIFT=14
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_KOBJECT is not set
 CONFIG_DEBUG_BUGVERBOSE=y
+# CONFIG_DEBUG_INFO is not set
+# CONFIG_DEBUG_FS is not set
 CONFIG_FRAME_POINTER=y
 CONFIG_DEBUG_USER=y
+# CONFIG_DEBUG_WAITQ is not set
+# CONFIG_DEBUG_ERRORS is not set
+# CONFIG_DEBUG_LL is not set
 
 #
 # Security options
diff --git a/arch/arm/lib/io-shark.c b/arch/arm/lib/io-shark.c
index 108d4573e970..824253948f51 100644
--- a/arch/arm/lib/io-shark.c
+++ b/arch/arm/lib/io-shark.c
@@ -11,73 +11,3 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-#include <linux/kernel.h>
-
-#include <asm/io.h>
-
-void print_warning(void)
-{
-	printk(KERN_WARNING "ins?/outs? not implemented on this architecture\n");
-}
-
-void insl(unsigned int port, void *to, int len)
-{
-	print_warning();
-}
-
-void insb(unsigned int port, void *to, int len)
-{
-	print_warning();
-}
-
-void outsl(unsigned int port, const void *from, int len)
-{
-	print_warning();
-}
-
-void outsb(unsigned int port, const void *from, int len)
-{
-	print_warning();
-}
-
-/* these should be in assembler again */
-
-/*
- * Purpose: read a block of data from a hardware register to memory.
- * Proto  : insw(int from_port, void *to, int len_in_words);
- * Proto  : inswb(int from_port, void *to, int len_in_bytes);
- * Notes  : increment to
- */
-
-void insw(unsigned int port, void *to, int len)
-{
-	int i;
-
-	for (i = 0; i < len; i++)
-		((unsigned short *) to)[i] = inw(port);
-}
-
-void inswb(unsigned int port, void *to, int len)
-{
-	insw(port, to, len >> 2);
-}
-
-/*
- * Purpose: write a block of data from memory to a hardware register.
- * Proto  : outsw(int to_reg, void *from, int len_in_words);
- * Proto  : outswb(int to_reg, void *from, int len_in_bytes);
- * Notes  : increments from
- */
-
-void outsw(unsigned int port, const void *from, int len)
-{
-	int i;
-
-	for (i = 0; i < len; i++)
-		outw(((unsigned short *) from)[i], port);
-}
-
-void outswb(unsigned int port, const void *from, int len)
-{
-	outsw(port, from, len >> 2);
-}
diff --git a/arch/arm/mach-shark/core.c b/arch/arm/mach-shark/core.c
index 726445895b5c..e737eae4521f 100644
--- a/arch/arm/mach-shark/core.c
+++ b/arch/arm/mach-shark/core.c
@@ -24,7 +24,7 @@ static struct plat_serial8250_port serial_platform_data[] = {
 		.iobase		= 0x3f8,
 		.irq		= 4,
 		.uartclk	= 1843200,
-		.regshift	= 2,
+		.regshift	= 0,
 		.iotype		= UPIO_PORT,
 		.flags		= UPF_BOOT_AUTOCONF | UPF_SKIP_TEST,
 	},
@@ -32,7 +32,7 @@ static struct plat_serial8250_port serial_platform_data[] = {
 		.iobase		= 0x2f8,
 		.irq		= 3,
 		.uartclk	= 1843200,
-		.regshift	= 2,
+		.regshift	= 0,
 		.iotype		= UPIO_PORT,
 		.flags		= UPF_BOOT_AUTOCONF | UPF_SKIP_TEST,
 	},
diff --git a/include/asm-arm/arch-shark/io.h b/include/asm-arm/arch-shark/io.h
index 1e7f26bc2e1d..5e6ed0038b2b 100644
--- a/include/asm-arm/arch-shark/io.h
+++ b/include/asm-arm/arch-shark/io.h
@@ -21,38 +21,8 @@
  */
 #define __PORT_PCIO(x)	(!((x) & 0x80000000))
 
-/*
- * Dynamic IO functions - let the compiler
- * optimize the expressions
- */
-#define DECLARE_DYN_OUT(fnsuffix,instr)						\
-static inline void __out##fnsuffix (unsigned int value, unsigned int port)	\
-{										\
-	unsigned long temp;							\
-	__asm__ __volatile__(							\
-	"tst	%2, #0x80000000\n\t"						\
-	"mov	%0, %4\n\t"							\
-	"addeq	%0, %0, %3\n\t"							\
-	"str" instr "	%1, [%0, %2]	@ out" #fnsuffix			\
-	: "=&r" (temp)								\
-	: "r" (value), "r" (port), "Ir" (PCIO_BASE - IO_BASE), "Ir" (IO_BASE)	\
-	: "cc");								\
-}
+#define __io(a)                 ((void __iomem *)(PCIO_BASE + (a)))
 
-#define DECLARE_DYN_IN(sz,fnsuffix,instr)					\
-static inline unsigned sz __in##fnsuffix (unsigned int port)		\
-{										\
-	unsigned long temp, value;						\
-	__asm__ __volatile__(							\
-	"tst	%2, #0x80000000\n\t"						\
-	"mov	%0, %4\n\t"							\
-	"addeq	%0, %0, %3\n\t"							\
-	"ldr" instr "	%1, [%0, %2]	@ in" #fnsuffix				\
-	: "=&r" (temp), "=r" (value)						\
-	: "r" (port), "Ir" (PCIO_BASE - IO_BASE), "Ir" (IO_BASE)		\
-	: "cc");								\
-	return (unsigned sz)value;						\
-}
 
 static inline unsigned int __ioaddr (unsigned int port)			\
 {										\
@@ -62,123 +32,8 @@ static inline unsigned int __ioaddr (unsigned int port)			\
 		return (unsigned int)(IO_BASE + (port));			\
 }
 
-#define DECLARE_IO(sz,fnsuffix,instr)	\
-	DECLARE_DYN_OUT(fnsuffix,instr)	\
-	DECLARE_DYN_IN(sz,fnsuffix,instr)
-
-DECLARE_IO(char,b,"b")
-DECLARE_IO(short,w,"h")
-DECLARE_IO(long,l,"")
-
-#undef DECLARE_IO
-#undef DECLARE_DYN_OUT
-#undef DECLARE_DYN_IN
-
-/*
- * Constant address IO functions
- *
- * These have to be macros for the 'J' constraint to work -
- * +/-4096 immediate operand.
- */
-#define __outbc(value,port)							\
-({										\
-	if (__PORT_PCIO((port)))						\
-		__asm__ __volatile__(						\
-		"strb	%0, [%1, %2]		@ outbc"			\
-		: : "r" (value), "r" (PCIO_BASE), "Jr" (port));		\
-	else									\
-		__asm__ __volatile__(						\
-		"strb	%0, [%1, %2]		@ outbc"			\
-		: : "r" (value), "r" (IO_BASE), "r" (port));		\
-})
-
-#define __inbc(port)								\
-({										\
-	unsigned char result;                                                   \
-	if (__PORT_PCIO((port)))						\
-		__asm__ __volatile__(						\
-		"ldrb	%0, [%1, %2]		@ inbc"				\
-		: "=r" (result) : "r" (PCIO_BASE), "Jr" (port));		\
-	else									\
-		__asm__ __volatile__(						\
-		"ldrb	%0, [%1, %2]		@ inbc"				\
-		: "=r" (result) : "r" (IO_BASE), "r" (port));		\
-	result;									\
-})
-
-#define __outwc(value,port)							\
-({										\
-	unsigned long v = value;						\
-	if (__PORT_PCIO((port)))						\
-		__asm__ __volatile__(						\
-		"strh	%0, [%1, %2]		@ outwc"			\
-		: : "r" (v|v<<16), "r" (PCIO_BASE), "Jr" (port));	\
-	else									\
-		__asm__ __volatile__(						\
-		"strh	%0, [%1, %2]		@ outwc"			\
-		: : "r" (v|v<<16), "r" (IO_BASE), "r" (port));		\
-})
-
-#define __inwc(port)								\
-({										\
-	unsigned short result;							\
-	if (__PORT_PCIO((port)))						\
-		__asm__ __volatile__(						\
-		"ldrh	%0, [%1, %2]		@ inwc"				\
-		: "=r" (result) : "r" (PCIO_BASE), "Jr" (port));		\
-	else									\
-		__asm__ __volatile__(						\
-		"ldrh	%0, [%1, %2]		@ inwc"				\
-		: "=r" (result) : "r" (IO_BASE), "r" (port));		\
-	result & 0xffff;							\
-})
-
-#define __outlc(value,port)								\
-({										\
-	unsigned long v = value;						\
-	if (__PORT_PCIO((port)))						\
-		__asm__ __volatile__(						\
-		"str	%0, [%1, %2]		@ outlc"			\
-		: : "r" (v), "r" (PCIO_BASE), "Jr" (port));		\
-	else									\
-		__asm__ __volatile__(						\
-		"str	%0, [%1, %2]		@ outlc"			\
-		: : "r" (v), "r" (IO_BASE), "r" (port));			\
-})
-
-#define __inlc(port)								\
-({										\
-	unsigned long result;							\
-	if (__PORT_PCIO((port)))						\
-		__asm__ __volatile__(						\
-		"ldr	%0, [%1, %2]		@ inlc"				\
-		: "=r" (result) : "r" (PCIO_BASE), "Jr" (port));		\
-	else									\
-		__asm__ __volatile__(						\
-		"ldr	%0, [%1, %2]		@ inlc"				\
-		: "=r" (result) : "r" (IO_BASE), "r" (port));		\
-	result;									\
-})
-
-#define __ioaddrc(port)								\
-({										\
-	unsigned long addr;							\
-	if (__PORT_PCIO((port)))						\
-		addr = PCIO_BASE + (port);				\
-	else									\
-		addr = IO_BASE + (port);					\
-	addr;									\
-})
-
 #define __mem_pci(addr) (addr)
 
-#define inb(p)	 	(__builtin_constant_p((p)) ? __inbc(p)    : __inb(p))
-#define inw(p)	 	(__builtin_constant_p((p)) ? __inwc(p)    : __inw(p))
-#define inl(p)	 	(__builtin_constant_p((p)) ? __inlc(p)    : __inl(p))
-#define outb(v,p)	(__builtin_constant_p((p)) ? __outbc(v,p) : __outb(v,p))
-#define outw(v,p)	(__builtin_constant_p((p)) ? __outwc(v,p) : __outw(v,p))
-#define outl(v,p)	(__builtin_constant_p((p)) ? __outlc(v,p) : __outl(v,p))
-
 /*
  * Translated address IO functions
  *