From f25181f598cf4a8ccc40a51d8b74f8b555ecddee Mon Sep 17 00:00:00 2001
From: Lachlan McIlroy <lmcilroy@redhat.com>
Date: Thu, 23 Apr 2009 22:18:00 -0400
Subject: [PATCH 01/23] xfs_file_last_byte() needs to acquire ilock

We had some systems crash with this stack:

[<a00000010000cb20>] ia64_leave_kernel+0x0/0x280
[<a00000021291ca00>] xfs_bmbt_get_startoff+0x0/0x20 [xfs]
[<a0000002129080b0>] xfs_bmap_last_offset+0x210/0x280 [xfs]
[<a00000021295b010>] xfs_file_last_byte+0x70/0x1a0 [xfs]
[<a00000021295b200>] xfs_itruncate_start+0xc0/0x1a0 [xfs]
[<a0000002129935f0>] xfs_inactive_free_eofblocks+0x290/0x460 [xfs]
[<a000000212998fb0>] xfs_release+0x1b0/0x240 [xfs]
[<a0000002129ad930>] xfs_file_release+0x70/0xa0 [xfs]
[<a000000100162ea0>] __fput+0x1a0/0x420
[<a000000100163160>] fput+0x40/0x60

The problem here is that xfs_file_last_byte() does not acquire the
inode lock and can therefore race with another thread that is modifying
the extext list.  While xfs_bmap_last_offset() is trying to lookup
what was the last extent some extents were merged and the extent list
shrunk so the index we lookup is now beyond the end of the extent list
and potentially in a freed buffer.

Signed-off-by: Lachlan McIlroy <lmcilroy@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Felix Blyakher <felixb@sgi.com>
Signed-off-by: Felix Blyakher <felixb@sgi.com>
---
 fs/xfs/xfs_inode.c | 2 ++
 1 file changed, 2 insertions(+)
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index e7ae08d1df48..123b20c8cbf2 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1258,8 +1258,10 @@ xfs_file_last_byte(
 	 * necessary.
 	 */
 	if (ip->i_df.if_flags & XFS_IFEXTENTS) {
+		xfs_ilock(ip, XFS_ILOCK_SHARED);
 		error = xfs_bmap_last_offset(NULL, ip, &last_block,
 			XFS_DATA_FORK);
+		xfs_iunlock(ip, XFS_ILOCK_SHARED);
 		if (error) {
 			last_block = 0;
 		}

From 2ac00af7a6d2e65013e6f28bd1f37c0cd98ba134 Mon Sep 17 00:00:00 2001
From: Olaf Weber <olaf@sgi.com>
Date: Fri, 17 Apr 2009 16:12:45 -0500
Subject: [PATCH 02/23] xfs: add more checks to superblock validation

There had been reports where xfs filesystem was randomly
corrupted with fsfuzzer, and xfs failed to handle it
gracefully. This patch fixes couple of reported problem
by providing additional checks in the superblock
validation routine.

Signed-off-by: Olaf Weber <olaf@sgi.com>
Reviewed-by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Felix Blyakher <felixb@sgi.com>
---
 fs/xfs/xfs_mount.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index b101990df027..65a99725d0cc 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -291,14 +291,17 @@ xfs_mount_validate_sb(
 	    sbp->sb_sectsize > XFS_MAX_SECTORSIZE			||
 	    sbp->sb_sectlog < XFS_MIN_SECTORSIZE_LOG			||
 	    sbp->sb_sectlog > XFS_MAX_SECTORSIZE_LOG			||
+	    sbp->sb_sectsize != (1 << sbp->sb_sectlog)			||
 	    sbp->sb_blocksize < XFS_MIN_BLOCKSIZE			||
 	    sbp->sb_blocksize > XFS_MAX_BLOCKSIZE			||
 	    sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG			||
 	    sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG			||
+	    sbp->sb_blocksize != (1 << sbp->sb_blocklog)		||
 	    sbp->sb_inodesize < XFS_DINODE_MIN_SIZE			||
 	    sbp->sb_inodesize > XFS_DINODE_MAX_SIZE			||
 	    sbp->sb_inodelog < XFS_DINODE_MIN_LOG			||
 	    sbp->sb_inodelog > XFS_DINODE_MAX_LOG			||
+	    sbp->sb_inodesize != (1 << sbp->sb_inodelog)		||
 	    (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog)	||
 	    (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE)	||
 	    (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE)	||

From 4be4a00fb55879ef44b5914c189aecffa828deb4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 29 Apr 2009 10:50:48 -0400
Subject: [PATCH 03/23] xfs: a couple getbmap cleanups

 - reshuffle various conditionals for data vs attr fork to make the code
   more readable
 - do fine-grainded goto-based error handling
 - exit early from conditionals instead of keeping a long else branch around
 - allow kmem_alloc to fail

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Sandeen <sandeen@sandeen.net>
Reviewed-by: Felix Blyakher <felixb@sgi.com>
Signed-off-by: Felix Blyakher <felixb@sgi.com>
---
 fs/xfs/xfs_bmap.c | 164 ++++++++++++++++++++++------------------------
 1 file changed, 80 insertions(+), 84 deletions(-)

diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 3a6ed426327a..abe42448b1c0 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -5880,7 +5880,7 @@ xfs_getbmap(
 	void			*arg)		/* formatter arg */
 {
 	__int64_t		bmvend;		/* last block requested */
-	int			error;		/* return value */
+	int			error = 0;	/* return value */
 	__int64_t		fixlen;		/* length for -1 case */
 	int			i;		/* extent number */
 	int			lock;		/* lock state */
@@ -5899,30 +5899,8 @@ xfs_getbmap(
 
 	mp = ip->i_mount;
 	iflags = bmv->bmv_iflags;
-
 	whichfork = iflags & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK;
 
-	/*	If the BMV_IF_NO_DMAPI_READ interface bit specified, do not
-	 *	generate a DMAPI read event.  Otherwise, if the DM_EVENT_READ
-	 *	bit is set for the file, generate a read event in order
-	 *	that the DMAPI application may do its thing before we return
-	 *	the extents.  Usually this means restoring user file data to
-	 *	regions of the file that look like holes.
-	 *
-	 *	The "old behavior" (from XFS_IOC_GETBMAP) is to not specify
-	 *	BMV_IF_NO_DMAPI_READ so that read events are generated.
-	 *	If this were not true, callers of ioctl( XFS_IOC_GETBMAP )
-	 *	could misinterpret holes in a DMAPI file as true holes,
-	 *	when in fact they may represent offline user data.
-	 */
-	if ((iflags & BMV_IF_NO_DMAPI_READ) == 0 &&
-	    DM_EVENT_ENABLED(ip, DM_EVENT_READ) &&
-	    whichfork == XFS_DATA_FORK) {
-		error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, 0, 0, 0, NULL);
-		if (error)
-			return XFS_ERROR(error);
-	}
-
 	if (whichfork == XFS_ATTR_FORK) {
 		if (XFS_IFORK_Q(ip)) {
 			if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS &&
@@ -5936,11 +5914,37 @@ xfs_getbmap(
 					 ip->i_mount);
 			return XFS_ERROR(EFSCORRUPTED);
 		}
-	} else if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
-		   ip->i_d.di_format != XFS_DINODE_FMT_BTREE &&
-		   ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
-		return XFS_ERROR(EINVAL);
-	if (whichfork == XFS_DATA_FORK) {
+
+		prealloced = 0;
+		fixlen = 1LL << 32;
+	} else {
+		/*
+		 * If the BMV_IF_NO_DMAPI_READ interface bit specified, do
+		 * not generate a DMAPI read event.  Otherwise, if the
+		 * DM_EVENT_READ bit is set for the file, generate a read
+		 * event in order that the DMAPI application may do its thing
+		 * before we return the extents.  Usually this means restoring
+		 * user file data to regions of the file that look like holes.
+		 *
+		 * The "old behavior" (from XFS_IOC_GETBMAP) is to not specify
+		 * BMV_IF_NO_DMAPI_READ so that read events are generated.
+		 * If this were not true, callers of ioctl(XFS_IOC_GETBMAP)
+		 * could misinterpret holes in a DMAPI file as true holes,
+		 * when in fact they may represent offline user data.
+		 */
+		if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) &&
+		    !(iflags & BMV_IF_NO_DMAPI_READ)) {
+			error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip,
+					      0, 0, 0, NULL);
+			if (error)
+				return XFS_ERROR(error);
+		}
+
+		if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
+		    ip->i_d.di_format != XFS_DINODE_FMT_BTREE &&
+		    ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
+			return XFS_ERROR(EINVAL);
+
 		if (xfs_get_extsz_hint(ip) ||
 		    ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){
 			prealloced = 1;
@@ -5949,42 +5953,34 @@ xfs_getbmap(
 			prealloced = 0;
 			fixlen = ip->i_size;
 		}
-	} else {
-		prealloced = 0;
-		fixlen = 1LL << 32;
 	}
 
 	if (bmv->bmv_length == -1) {
 		fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen));
-		bmv->bmv_length = MAX( (__int64_t)(fixlen - bmv->bmv_offset),
-					(__int64_t)0);
-	} else if (bmv->bmv_length < 0)
-		return XFS_ERROR(EINVAL);
-	if (bmv->bmv_length == 0) {
+		bmv->bmv_length =
+			max_t(__int64_t, fixlen - bmv->bmv_offset, 0);
+	} else if (bmv->bmv_length == 0) {
 		bmv->bmv_entries = 0;
 		return 0;
+	} else if (bmv->bmv_length < 0) {
+		return XFS_ERROR(EINVAL);
 	}
+
 	nex = bmv->bmv_count - 1;
 	if (nex <= 0)
 		return XFS_ERROR(EINVAL);
 	bmvend = bmv->bmv_offset + bmv->bmv_length;
 
 	xfs_ilock(ip, XFS_IOLOCK_SHARED);
-
-	if (((iflags & BMV_IF_DELALLOC) == 0) &&
-	    (whichfork == XFS_DATA_FORK) &&
-	    (ip->i_delayed_blks || ip->i_size > ip->i_d.di_size)) {
-		/* xfs_fsize_t last_byte = xfs_file_last_byte(ip); */
-		error = xfs_flush_pages(ip, (xfs_off_t)0,
-					       -1, 0, FI_REMAPF);
-		if (error) {
-			xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-		return error;
+	if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) {
+		if (ip->i_delayed_blks || ip->i_size > ip->i_d.di_size) {
+			error = xfs_flush_pages(ip, 0, -1, 0, FI_REMAPF);
+			if (error)
+				goto out_unlock_iolock;
 		}
-	}
 
-	ASSERT(whichfork == XFS_ATTR_FORK || (iflags & BMV_IF_DELALLOC) ||
-	       ip->i_delayed_blks == 0);
+		ASSERT(ip->i_delayed_blks == 0);
+	}
 
 	lock = xfs_ilock_map_shared(ip);
 
@@ -5995,23 +5991,25 @@ xfs_getbmap(
 	if (nex > XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1)
 		nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1;
 
-	bmapi_flags = xfs_bmapi_aflag(whichfork) |
-			((iflags & BMV_IF_PREALLOC) ? 0 : XFS_BMAPI_IGSTATE);
+	bmapi_flags = xfs_bmapi_aflag(whichfork);
+	if (!(iflags & BMV_IF_PREALLOC))
+		bmapi_flags |= XFS_BMAPI_IGSTATE;
 
 	/*
 	 * Allocate enough space to handle "subnex" maps at a time.
 	 */
+	error = ENOMEM;
 	subnex = 16;
-	map = kmem_alloc(subnex * sizeof(*map), KM_SLEEP);
+	map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL);
+	if (!map)
+		goto out_unlock_ilock;
 
 	bmv->bmv_entries = 0;
 
-	if ((XFS_IFORK_NEXTENTS(ip, whichfork) == 0)) {
-		if (((iflags & BMV_IF_DELALLOC) == 0) ||
-		    whichfork == XFS_ATTR_FORK) {
-			error = 0;
-			goto unlock_and_return;
-		}
+	if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0 &&
+	    (whichfork == XFS_ATTR_FORK || !(iflags & BMV_IF_DELALLOC))) {
+		error = 0;
+		goto out_free_map;
 	}
 
 	nexleft = nex;
@@ -6023,10 +6021,12 @@ xfs_getbmap(
 				  bmapi_flags, NULL, 0, map, &nmap,
 				  NULL, NULL);
 		if (error)
-			goto unlock_and_return;
+			goto out_free_map;
 		ASSERT(nmap <= subnex);
 
 		for (i = 0; i < nmap && nexleft && bmv->bmv_length; i++) {
+			int full = 0;	/* user array is full */
+
 			out.bmv_oflags = 0;
 			if (map[i].br_state == XFS_EXT_UNWRITTEN)
 				out.bmv_oflags |= BMV_OF_PREALLOC;
@@ -6041,36 +6041,32 @@ xfs_getbmap(
 			    whichfork == XFS_ATTR_FORK) {
 				/* came to the end of attribute fork */
 				out.bmv_oflags |= BMV_OF_LAST;
-				goto unlock_and_return;
-			} else {
-				int full = 0;	/* user array is full */
-
-				if (!xfs_getbmapx_fix_eof_hole(ip, &out,
-							prealloced, bmvend,
-							map[i].br_startblock)) {
-					goto unlock_and_return;
-				}
-
-				/* format results & advance arg */
-				error = formatter(&arg, &out, &full);
-				if (error || full)
-					goto unlock_and_return;
-				nexleft--;
-				bmv->bmv_offset =
-					out.bmv_offset + out.bmv_length;
-				bmv->bmv_length = MAX((__int64_t)0,
-					(__int64_t)(bmvend - bmv->bmv_offset));
-				bmv->bmv_entries++;
+				goto out_free_map;
 			}
+
+			if (!xfs_getbmapx_fix_eof_hole(ip, &out, prealloced,
+					bmvend, map[i].br_startblock))
+				goto out_free_map;
+
+			/* format results & advance arg */
+			error = formatter(&arg, &out, &full);
+			if (error || full)
+				goto out_free_map;
+			nexleft--;
+			bmv->bmv_offset =
+				out.bmv_offset + out.bmv_length;
+			bmv->bmv_length =
+				max_t(__int64_t, 0, bmvend - bmv->bmv_offset);
+			bmv->bmv_entries++;
 		}
 	} while (nmap && nexleft && bmv->bmv_length);
 
-unlock_and_return:
-	xfs_iunlock_map_shared(ip, lock);
-	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-
+ out_free_map:
 	kmem_free(map);
-
+ out_unlock_ilock:
+	xfs_iunlock_map_shared(ip, lock);
+ out_unlock_iolock:
+	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
 	return error;
 }
 

From 6321e3ed2acf3ee9643cdd403e1c88605d7944ba Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Tue, 24 Feb 2009 08:39:02 -0500
Subject: [PATCH 04/23] xfs: fix getbmap vs mmap deadlock

xfs_getbmap (or rather the formatters called by it) copy out the getbmap
structures under the ilock, which can deadlock against mmap.  This has
been reported via bugzilla a while ago (#717) and has recently also
shown up via lockdep.

So allocate a temporary buffer to format the kernel getbmap structures
into and then copy them out after dropping the locks.

A little problem with this is that we limit the number of extents we
can copy out by the maximum allocation size, but I see no real way
around that.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Sandeen <sandeen@sandeen.net>
Reviewed-by: Felix Blyakher <felixb@sgi.com>
Signed-off-by: Felix Blyakher <felixb@sgi.com>
---
 fs/xfs/xfs_bmap.c | 52 +++++++++++++++++++++++++++++++----------------
 1 file changed, 35 insertions(+), 17 deletions(-)

diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index abe42448b1c0..ca7c6005a487 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -5890,12 +5890,13 @@ xfs_getbmap(
 	int			nexleft;	/* # of user extents left */
 	int			subnex;		/* # of bmapi's can do */
 	int			nmap;		/* number of map entries */
-	struct getbmapx		out;		/* output structure */
+	struct getbmapx		*out;		/* output structure */
 	int			whichfork;	/* data or attr fork */
 	int			prealloced;	/* this is a file with
 						 * preallocated data space */
 	int			iflags;		/* interface flags */
 	int			bmapi_flags;	/* flags for xfs_bmapi */
+	int			cur_ext = 0;
 
 	mp = ip->i_mount;
 	iflags = bmv->bmv_iflags;
@@ -5971,6 +5972,13 @@ xfs_getbmap(
 		return XFS_ERROR(EINVAL);
 	bmvend = bmv->bmv_offset + bmv->bmv_length;
 
+
+	if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
+		return XFS_ERROR(ENOMEM);
+	out = kmem_zalloc(bmv->bmv_count * sizeof(struct getbmapx), KM_MAYFAIL);
+	if (!out)
+		return XFS_ERROR(ENOMEM);
+
 	xfs_ilock(ip, XFS_IOLOCK_SHARED);
 	if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) {
 		if (ip->i_delayed_blks || ip->i_size > ip->i_d.di_size) {
@@ -6025,39 +6033,39 @@ xfs_getbmap(
 		ASSERT(nmap <= subnex);
 
 		for (i = 0; i < nmap && nexleft && bmv->bmv_length; i++) {
-			int full = 0;	/* user array is full */
-
-			out.bmv_oflags = 0;
+			out[cur_ext].bmv_oflags = 0;
 			if (map[i].br_state == XFS_EXT_UNWRITTEN)
-				out.bmv_oflags |= BMV_OF_PREALLOC;
+				out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC;
 			else if (map[i].br_startblock == DELAYSTARTBLOCK)
-				out.bmv_oflags |= BMV_OF_DELALLOC;
-			out.bmv_offset = XFS_FSB_TO_BB(mp, map[i].br_startoff);
-			out.bmv_length = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
-			out.bmv_unused1 = out.bmv_unused2 = 0;
+				out[cur_ext].bmv_oflags |= BMV_OF_DELALLOC;
+			out[cur_ext].bmv_offset =
+				XFS_FSB_TO_BB(mp, map[i].br_startoff);
+			out[cur_ext].bmv_length =
+				XFS_FSB_TO_BB(mp, map[i].br_blockcount);
+			out[cur_ext].bmv_unused1 = 0;
+			out[cur_ext].bmv_unused2 = 0;
 			ASSERT(((iflags & BMV_IF_DELALLOC) != 0) ||
 			      (map[i].br_startblock != DELAYSTARTBLOCK));
                         if (map[i].br_startblock == HOLESTARTBLOCK &&
 			    whichfork == XFS_ATTR_FORK) {
 				/* came to the end of attribute fork */
-				out.bmv_oflags |= BMV_OF_LAST;
+				out[cur_ext].bmv_oflags |= BMV_OF_LAST;
 				goto out_free_map;
 			}
 
-			if (!xfs_getbmapx_fix_eof_hole(ip, &out, prealloced,
-					bmvend, map[i].br_startblock))
+			if (!xfs_getbmapx_fix_eof_hole(ip, &out[cur_ext],
+					prealloced, bmvend,
+					map[i].br_startblock))
 				goto out_free_map;
 
-			/* format results & advance arg */
-			error = formatter(&arg, &out, &full);
-			if (error || full)
-				goto out_free_map;
 			nexleft--;
 			bmv->bmv_offset =
-				out.bmv_offset + out.bmv_length;
+				out[cur_ext].bmv_offset +
+				out[cur_ext].bmv_length;
 			bmv->bmv_length =
 				max_t(__int64_t, 0, bmvend - bmv->bmv_offset);
 			bmv->bmv_entries++;
+			cur_ext++;
 		}
 	} while (nmap && nexleft && bmv->bmv_length);
 
@@ -6067,6 +6075,16 @@ xfs_getbmap(
 	xfs_iunlock_map_shared(ip, lock);
  out_unlock_iolock:
 	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+
+	for (i = 0; i < cur_ext; i++) {
+		int full = 0;	/* user array is full */
+
+		/* format results & advance arg */
+		error = formatter(&arg, &out[i], &full);
+		if (error || full)
+			break;
+	}
+
 	return error;
 }
 

From ec91d1335f478c5cd089d82ffbf936075c5f24c8 Mon Sep 17 00:00:00 2001
From: Felix Blyakher <felixb@sgi.com>
Date: Thu, 7 May 2009 19:49:45 -0500
Subject: [PATCH 05/23] xfs: fix double unlock in xfs_swap_extents()

Regreesion from commit ef8f7fc, which rearranged the code in
xfs_swap_extents() leading to double unlock of xfs inode ilock.
That resulted in xfs_fsr deadlocking itself on platforms, which
don't handle double unlock of rw_semaphore nicely. It caused the
count go negative, which represents the write holder, without
really having one. ia64 is one of the platforms where deadlock
was easily reproduced and the fix was tested.

Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
Reviewed-by: Eric Sandeen <sandeen@sandeen.net>
Signed-off-by: Felix Blyakher <felixb@sgi.com>
---
 fs/xfs/xfs_dfrag.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index e6d839bddbf0..7465f9ee125f 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -347,13 +347,15 @@ xfs_swap_extents(
 
 	error = xfs_trans_commit(tp, XFS_TRANS_SWAPEXT);
 
-out_unlock:
-	xfs_iunlock(ip,  XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-	xfs_iunlock(tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
 out:
 	kmem_free(tempifp);
 	return error;
 
+out_unlock:
+	xfs_iunlock(ip,  XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+	xfs_iunlock(tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+	goto out;
+
 out_trans_cancel:
 	xfs_trans_cancel(tp, 0);
 	goto out_unlock;

From 096324873f9c7172a17aff9db1356f4f01b77afe Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@sandeen.net>
Date: Sat, 23 May 2009 14:30:12 -0500
Subject: [PATCH 06/23] xfs: fix overflow in xfs_growfs_data_private

In the case where growing a filesystem would leave the last AG
too small, the fixup code has an overflow in the calculation
of the new size with one fewer ag, because "nagcount" is a 32
bit number.  If the new filesystem has > 2^32 blocks in it
this causes a problem resulting in an EINVAL return from growfs:

 # xfs_io -f -c "truncate 19998630180864" fsfile
 # mkfs.xfs -f -bsize=4096 -dagsize=76288719b,size=3905982455b fsfile
 # mount -o loop fsfile /mnt
 # xfs_growfs /mnt

meta-data=/dev/loop0             isize=256    agcount=52,
agsize=76288719 blks
         =                       sectsz=512   attr=2
data     =                       bsize=4096   blocks=3905982455, imaxpct=5
         =                       sunit=0      swidth=0 blks
naming   =version 2              bsize=4096   ascii-ci=0
log      =internal               bsize=4096   blocks=32768, version=2
         =                       sectsz=512   sunit=0 blks, lazy-count=0
realtime =none                   extsz=4096   blocks=0, rtextents=0
xfs_growfs: XFS_IOC_FSGROWFSDATA xfsctl failed: Invalid argument

Reported-by: richard.ems@cape-horn-eng.com
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Felix Blyakher <felixb@sgi.com>
Signed-off-by: Felix Blyakher <felixb@sgi.com>
---
 fs/xfs/xfs_fsops.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 8379e3bca26c..cbd451bb4848 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -160,7 +160,7 @@ xfs_growfs_data_private(
 	nagcount = new + (nb_mod != 0);
 	if (nb_mod && nb_mod < XFS_MIN_AG_BLOCKS) {
 		nagcount--;
-		nb = nagcount * mp->m_sb.sb_agblocks;
+		nb = (xfs_rfsblock_t)nagcount * mp->m_sb.sb_agblocks;
 		if (nb < mp->m_sb.sb_dblocks)
 			return XFS_ERROR(EINVAL);
 	}

From 4156e735d3abde8e9243b5d22f7999dd3fffab2e Mon Sep 17 00:00:00 2001
From: Felix Blyakher <felixb@sgi.com>
Date: Mon, 1 Jun 2009 13:13:24 -0500
Subject: [PATCH 07/23] xfs: prevent deadlock in xfs_qm_shake()

It's possible to recurse into filesystem from the memory
allocation, which deadlocks in xfs_qm_shake(). Add check
for __GFP_FS, and bail out if it is not set.

Signed-off-by: Felix Blyakher <felixb@sgi.com>
Signed-off-by: Hedi Berriche <hedi@sgi.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Felix Blyakher <felixb@sgi.com>
---
 fs/xfs/linux-2.6/kmem.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
index af6843c7ee4b..179cbd630f69 100644
--- a/fs/xfs/linux-2.6/kmem.h
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -103,7 +103,7 @@ extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast);
 static inline int
 kmem_shake_allow(gfp_t gfp_mask)
 {
-	return (gfp_mask & __GFP_WAIT) != 0;
+	return ((gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS));
 }
 
 #endif /* __XFS_SUPPORT_KMEM_H__ */

From e1696834e8a15d7ef9ae8ffdffe00bac1399a2e3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 Jun 2009 15:32:59 +0200
Subject: [PATCH 08/23] xfs: update max log size

Commit a6634fba3dec4a92f0a2c4e30c80b634c0576ad5 in xfsprogs increased the
maximum log size supported by mkfs.  Merged back the changes to xfs_fs.h
so the growfs enforced the same limit and the headers are in sync.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Sandeen <sandeen@sandeen.net>
---
 fs/xfs/xfs_fs.h | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index f7c06fac8229..c4ea51b55dce 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -239,10 +239,13 @@ typedef struct xfs_fsop_resblks {
  * Minimum and maximum sizes need for growth checks
  */
 #define XFS_MIN_AG_BLOCKS	64
-#define XFS_MIN_LOG_BLOCKS	512
-#define XFS_MAX_LOG_BLOCKS	(64 * 1024)
-#define XFS_MIN_LOG_BYTES	(256 * 1024)
-#define XFS_MAX_LOG_BYTES	(128 * 1024 * 1024)
+#define XFS_MIN_LOG_BLOCKS	512ULL
+#define XFS_MAX_LOG_BLOCKS	(1024 * 1024ULL)
+#define XFS_MIN_LOG_BYTES	(10 * 1024 * 1024ULL)
+
+/* keep the maximum size under 2^31 by a small amount */
+#define XFS_MAX_LOG_BYTES \
+	((2 * 1024 * 1024 * 1024ULL) - XFS_MIN_LOG_BYTES)
 
 /*
  * Structures for XFS_IOC_FSGROWFSDATA, XFS_IOC_FSGROWFSLOG & XFS_IOC_FSGROWFSRT

From 0c5e1ce89f1eacc366ec421c0f5f681159479c28 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 Jun 2009 15:33:21 +0200
Subject: [PATCH 09/23] xfs: validate quota log items during log recovery

Arkadiusz has seen really strange crashes in xfs_qm_dqcheck that
I can only explain by a log item being too smal to actually fit the
xfs_dqblk_t we're dereferencing all over xfs_qm_dqcheck.  So add
graceful checks for NULL or too small quota items to the log recovery
code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Sandeen <sandeen@sandeen.net>
---
 fs/xfs/xfs_log_recover.c | 38 ++++++++++++++++++++++++++++++++------
 1 file changed, 32 insertions(+), 6 deletions(-)

diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 7ba450116d4f..47da2fb45377 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1975,16 +1975,30 @@ xlog_recover_do_reg_buffer(
 		error = 0;
 		if (buf_f->blf_flags &
 		   (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) {
+			if (item->ri_buf[i].i_addr == NULL) {
+				cmn_err(CE_ALERT,
+					"XFS: NULL dquot in %s.", __func__);
+				goto next;
+			}
+			if (item->ri_buf[i].i_len < sizeof(xfs_dqblk_t)) {
+				cmn_err(CE_ALERT,
+					"XFS: dquot too small (%d) in %s.",
+					item->ri_buf[i].i_len, __func__);
+				goto next;
+			}
 			error = xfs_qm_dqcheck((xfs_disk_dquot_t *)
 					       item->ri_buf[i].i_addr,
 					       -1, 0, XFS_QMOPT_DOWARN,
 					       "dquot_buf_recover");
+			if (error)
+				goto next;
 		}
-		if (!error)
-			memcpy(xfs_buf_offset(bp,
-				(uint)bit << XFS_BLI_SHIFT),	/* dest */
-				item->ri_buf[i].i_addr,		/* source */
-				nbits<<XFS_BLI_SHIFT);		/* length */
+
+		memcpy(xfs_buf_offset(bp,
+			(uint)bit << XFS_BLI_SHIFT),	/* dest */
+			item->ri_buf[i].i_addr,		/* source */
+			nbits<<XFS_BLI_SHIFT);		/* length */
+ next:
 		i++;
 		bit += nbits;
 	}
@@ -2615,7 +2629,19 @@ xlog_recover_do_dquot_trans(
 		return (0);
 
 	recddq = (xfs_disk_dquot_t *)item->ri_buf[1].i_addr;
-	ASSERT(recddq);
+
+	if (item->ri_buf[1].i_addr == NULL) {
+		cmn_err(CE_ALERT,
+			"XFS: NULL dquot in %s.", __func__);
+		return XFS_ERROR(EIO);
+	}
+	if (item->ri_buf[1].i_len < sizeof(xfs_dqblk_t)) {
+		cmn_err(CE_ALERT,
+			"XFS: dquot too small (%d) in %s.",
+			item->ri_buf[1].i_len, __func__);
+		return XFS_ERROR(EIO);
+	}
+
 	/*
 	 * This type of quotas was turned off, so ignore this record.
 	 */

From 7d095257e321214e4cf359abd131ba1f09c60cba Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 Jun 2009 15:33:32 +0200
Subject: [PATCH 10/23] xfs: kill xfs_qmops

Kill the quota ops function vector and replace it with direct calls or
stubs in the CONFIG_XFS_QUOTA=n case.

Make sure we check XFS_IS_QUOTA_RUNNING in the right spots.  We can remove
the number of those checks because the XFS_TRANS_DQ_DIRTY flag can't be set
otherwise.

This brings us back closer to the way this code worked in IRIX and earlier
Linux versions, but we keep a lot of the more useful factoring of common
code.

Eventually we should also kill xfs_qm_bhv.c, but that's left for a later
patch.

Reduces the size of the source code by about 250 lines and the size of
XFS module by about 1.5 kilobytes with quotas enabled:

   text	   data	    bss	    dec	    hex	filename
 615957	   2960	   3848	 622765	  980ad	fs/xfs/xfs.o
 617231	   3152	   3848	 624231	  98667	fs/xfs/xfs.o.old

Fallout:

 - xfs_qm_dqattach is split into xfs_qm_dqattach_locked which expects
   the inode locked and xfs_qm_dqattach which does the locking around it,
   thus removing XFS_QMOPT_ILOCKED.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Sandeen <sandeen@sandeen.net>
---
 fs/xfs/Makefile                |   3 +-
 fs/xfs/linux-2.6/xfs_ioctl.c   |  24 +++---
 fs/xfs/linux-2.6/xfs_super.c   |  22 +++--
 fs/xfs/linux-2.6/xfs_sync.c    |   7 +-
 fs/xfs/quota/xfs_dquot.c       |   4 +-
 fs/xfs/quota/xfs_dquot.h       |   1 -
 fs/xfs/quota/xfs_qm.c          | 139 ++++++++++++++++--------------
 fs/xfs/quota/xfs_qm.h          |  21 -----
 fs/xfs/quota/xfs_qm_bhv.c      |  76 +----------------
 fs/xfs/quota/xfs_trans_dquot.c |  65 ++++++--------
 fs/xfs/xfs_attr.c              |  12 +--
 fs/xfs/xfs_bmap.c              |  33 +++----
 fs/xfs/xfs_bmap_btree.c        |   4 +-
 fs/xfs/xfs_iget.c              |   5 +-
 fs/xfs/xfs_iomap.c             |  12 +--
 fs/xfs/xfs_mount.c             | 103 ++++++++++++++++++----
 fs/xfs/xfs_mount.h             |  84 +-----------------
 fs/xfs/xfs_qmops.c             | 152 ---------------------------------
 fs/xfs/xfs_quota.h             | 116 ++++++++++++-------------
 fs/xfs/xfs_rename.c            |   3 +-
 fs/xfs/xfs_trans.c             |  15 ++--
 fs/xfs/xfs_utils.c             |   2 +-
 fs/xfs/xfs_vnodeops.c          |  99 ++++++++++-----------
 23 files changed, 377 insertions(+), 625 deletions(-)
 delete mode 100644 fs/xfs/xfs_qmops.c

diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 60f107e47fe9..222c59e541ce 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -88,8 +88,7 @@ xfs-y				+= xfs_alloc.o \
 				   xfs_utils.o \
 				   xfs_vnodeops.o \
 				   xfs_rw.o \
-				   xfs_dmops.o \
-				   xfs_qmops.o
+				   xfs_dmops.o
 
 xfs-$(CONFIG_XFS_TRACE)		+= xfs_btree_trace.o \
 				   xfs_dir2_trace.o
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index d0b499418a7d..c7d684f02f89 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -908,7 +908,8 @@ xfs_ioctl_setattr(
 	struct xfs_mount	*mp = ip->i_mount;
 	struct xfs_trans	*tp;
 	unsigned int		lock_flags = 0;
-	struct xfs_dquot	*udqp = NULL, *gdqp = NULL;
+	struct xfs_dquot	*udqp = NULL;
+	struct xfs_dquot	*gdqp = NULL;
 	struct xfs_dquot	*olddquot = NULL;
 	int			code;
 
@@ -928,7 +929,7 @@ xfs_ioctl_setattr(
 	 * because the i_*dquot fields will get updated anyway.
 	 */
 	if (XFS_IS_QUOTA_ON(mp) && (mask & FSX_PROJID)) {
-		code = XFS_QM_DQVOPALLOC(mp, ip, ip->i_d.di_uid,
+		code = xfs_qm_vop_dqalloc(ip, ip->i_d.di_uid,
 					 ip->i_d.di_gid, fa->fsx_projid,
 					 XFS_QMOPT_PQUOTA, &udqp, &gdqp);
 		if (code)
@@ -963,10 +964,11 @@ xfs_ioctl_setattr(
 	 * Do a quota reservation only if projid is actually going to change.
 	 */
 	if (mask & FSX_PROJID) {
-		if (XFS_IS_PQUOTA_ON(mp) &&
+		if (XFS_IS_QUOTA_RUNNING(mp) &&
+		    XFS_IS_PQUOTA_ON(mp) &&
 		    ip->i_d.di_projid != fa->fsx_projid) {
 			ASSERT(tp);
-			code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp,
+			code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
 						capable(CAP_FOWNER) ?
 						XFS_QMOPT_FORCE_RES : 0);
 			if (code)	/* out of quota */
@@ -1068,8 +1070,8 @@ xfs_ioctl_setattr(
 		 * in the transaction.
 		 */
 		if (ip->i_d.di_projid != fa->fsx_projid) {
-			if (XFS_IS_PQUOTA_ON(mp)) {
-				olddquot = XFS_QM_DQVOPCHOWN(mp, tp, ip,
+			if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) {
+				olddquot = xfs_qm_vop_chown(tp, ip,
 							&ip->i_gdquot, gdqp);
 			}
 			ip->i_d.di_projid = fa->fsx_projid;
@@ -1115,9 +1117,9 @@ xfs_ioctl_setattr(
 	/*
 	 * Release any dquot(s) the inode had kept before chown.
 	 */
-	XFS_QM_DQRELE(mp, olddquot);
-	XFS_QM_DQRELE(mp, udqp);
-	XFS_QM_DQRELE(mp, gdqp);
+	xfs_qm_dqrele(olddquot);
+	xfs_qm_dqrele(udqp);
+	xfs_qm_dqrele(gdqp);
 
 	if (code)
 		return code;
@@ -1131,8 +1133,8 @@ xfs_ioctl_setattr(
 	return 0;
 
  error_return:
-	XFS_QM_DQRELE(mp, udqp);
-	XFS_QM_DQRELE(mp, gdqp);
+	xfs_qm_dqrele(udqp);
+	xfs_qm_dqrele(gdqp);
 	xfs_trans_cancel(tp, 0);
 	if (lock_flags)
 		xfs_iunlock(ip, lock_flags);
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index bb685269f832..0d9b64b219e0 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -405,6 +405,14 @@ xfs_parseargs(
 		return EINVAL;
 	}
 
+#ifndef CONFIG_XFS_QUOTA
+	if (XFS_IS_QUOTA_RUNNING(mp)) {
+		cmn_err(CE_WARN,
+			"XFS: quota support not available in this kernel.");
+		return EINVAL;
+	}
+#endif
+
 	if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
 	    (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) {
 		cmn_err(CE_WARN,
@@ -1098,7 +1106,6 @@ xfs_fs_put_super(
 	xfs_freesb(mp);
 	xfs_icsb_destroy_counters(mp);
 	xfs_close_devices(mp);
-	xfs_qmops_put(mp);
 	xfs_dmops_put(mp);
 	xfs_free_fsname(mp);
 	kfree(mp);
@@ -1168,6 +1175,7 @@ xfs_fs_statfs(
 {
 	struct xfs_mount	*mp = XFS_M(dentry->d_sb);
 	xfs_sb_t		*sbp = &mp->m_sb;
+	struct xfs_inode	*ip = XFS_I(dentry->d_inode);
 	__uint64_t		fakeinos, id;
 	xfs_extlen_t		lsize;
 
@@ -1196,7 +1204,10 @@ xfs_fs_statfs(
 	statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
 	spin_unlock(&mp->m_sb_lock);
 
-	XFS_QM_DQSTATVFS(XFS_I(dentry->d_inode), statp);
+	if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) ||
+	    ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) ==
+			      (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))
+		xfs_qm_statvfs(ip, statp);
 	return 0;
 }
 
@@ -1404,16 +1415,13 @@ xfs_fs_fill_super(
 	error = xfs_dmops_get(mp);
 	if (error)
 		goto out_free_fsname;
-	error = xfs_qmops_get(mp);
-	if (error)
-		goto out_put_dmops;
 
 	if (silent)
 		flags |= XFS_MFSI_QUIET;
 
 	error = xfs_open_devices(mp);
 	if (error)
-		goto out_put_qmops;
+		goto out_put_dmops;
 
 	if (xfs_icsb_init_counters(mp))
 		mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
@@ -1482,8 +1490,6 @@ xfs_fs_fill_super(
  out_destroy_counters:
 	xfs_icsb_destroy_counters(mp);
 	xfs_close_devices(mp);
- out_put_qmops:
-	xfs_qmops_put(mp);
  out_put_dmops:
 	xfs_dmops_put(mp);
  out_free_fsname:
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index f7ba76633c29..b06b95c154cc 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -43,6 +43,7 @@
 #include "xfs_buf_item.h"
 #include "xfs_inode_item.h"
 #include "xfs_rw.h"
+#include "xfs_quota.h"
 
 #include <linux/kthread.h>
 #include <linux/freezer.h>
@@ -317,12 +318,12 @@ xfs_quiesce_data(
 
 	/* push non-blocking */
 	xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_BDFLUSH);
-	XFS_QM_DQSYNC(mp, SYNC_BDFLUSH);
+	xfs_qm_sync(mp, SYNC_BDFLUSH);
 	xfs_filestream_flush(mp);
 
 	/* push and block */
 	xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_WAIT|SYNC_IOWAIT);
-	XFS_QM_DQSYNC(mp, SYNC_WAIT);
+	xfs_qm_sync(mp, SYNC_WAIT);
 
 	/* write superblock and hoover up shutdown errors */
 	error = xfs_sync_fsdata(mp, 0);
@@ -467,7 +468,7 @@ xfs_sync_worker(
 		xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
 		xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
 		/* dgc: errors ignored here */
-		error = XFS_QM_DQSYNC(mp, SYNC_BDFLUSH);
+		error = xfs_qm_sync(mp, SYNC_BDFLUSH);
 		error = xfs_sync_fsdata(mp, SYNC_BDFLUSH);
 		if (xfs_log_need_covered(mp))
 			error = xfs_commit_dummy_trans(mp, XFS_LOG_FORCE);
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index e4babcc63423..4d6d051ee56e 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -1194,7 +1194,9 @@ void
 xfs_qm_dqrele(
 	xfs_dquot_t	*dqp)
 {
-	ASSERT(dqp);
+	if (!dqp)
+		return;
+
 	xfs_dqtrace_entry(dqp, "DQRELE");
 
 	xfs_dqlock(dqp);
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
index de0f402ddb4c..6533ead9b889 100644
--- a/fs/xfs/quota/xfs_dquot.h
+++ b/fs/xfs/quota/xfs_dquot.h
@@ -181,7 +181,6 @@ extern void		xfs_qm_adjust_dqlimits(xfs_mount_t *,
 extern int		xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *,
 					xfs_dqid_t, uint, uint, xfs_dquot_t **);
 extern void		xfs_qm_dqput(xfs_dquot_t *);
-extern void		xfs_qm_dqrele(xfs_dquot_t *);
 extern void		xfs_dqlock(xfs_dquot_t *);
 extern void		xfs_dqlock2(xfs_dquot_t *, xfs_dquot_t *);
 extern void		xfs_dqunlock(xfs_dquot_t *);
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 5b6695049e00..aa5d8212661c 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -287,11 +287,13 @@ xfs_qm_rele_quotafs_ref(
  * Just destroy the quotainfo structure.
  */
 void
-xfs_qm_unmount_quotadestroy(
-	xfs_mount_t	*mp)
+xfs_qm_unmount(
+	struct xfs_mount	*mp)
 {
-	if (mp->m_quotainfo)
+	if (mp->m_quotainfo) {
+		xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING);
 		xfs_qm_destroy_quotainfo(mp);
+	}
 }
 
 
@@ -385,8 +387,13 @@ xfs_qm_mount_quotas(
 	if (error) {
 		xfs_fs_cmn_err(CE_WARN, mp,
 			"Failed to initialize disk quotas.");
+		return;
 	}
-	return;
+
+#ifdef QUOTADEBUG
+	if (XFS_IS_QUOTA_ON(mp))
+		xfs_qm_internalqcheck(mp);
+#endif
 }
 
 /*
@@ -774,12 +781,11 @@ xfs_qm_dqattach_grouphint(
  * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON
  * into account.
  * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed.
- * If XFS_QMOPT_ILOCKED, then inode sent is already locked EXCL.
  * Inode may get unlocked and relocked in here, and the caller must deal with
  * the consequences.
  */
 int
-xfs_qm_dqattach(
+xfs_qm_dqattach_locked(
 	xfs_inode_t	*ip,
 	uint		flags)
 {
@@ -787,17 +793,14 @@ xfs_qm_dqattach(
 	uint		nquotas = 0;
 	int		error = 0;
 
-	if ((! XFS_IS_QUOTA_ON(mp)) ||
-	    (! XFS_NOT_DQATTACHED(mp, ip)) ||
-	    (ip->i_ino == mp->m_sb.sb_uquotino) ||
-	    (ip->i_ino == mp->m_sb.sb_gquotino))
+	if (!XFS_IS_QUOTA_RUNNING(mp) ||
+	    !XFS_IS_QUOTA_ON(mp) ||
+	    !XFS_NOT_DQATTACHED(mp, ip) ||
+	    ip->i_ino == mp->m_sb.sb_uquotino ||
+	    ip->i_ino == mp->m_sb.sb_gquotino)
 		return 0;
 
-	ASSERT((flags & XFS_QMOPT_ILOCKED) == 0 ||
-	       xfs_isilocked(ip, XFS_ILOCK_EXCL));
-
-	if (! (flags & XFS_QMOPT_ILOCKED))
-		xfs_ilock(ip, XFS_ILOCK_EXCL);
+	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 
 	if (XFS_IS_UQUOTA_ON(mp)) {
 		error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER,
@@ -849,8 +852,7 @@ xfs_qm_dqattach(
 		xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot);
 	}
 
-      done:
-
+ done:
 #ifdef QUOTADEBUG
 	if (! error) {
 		if (XFS_IS_UQUOTA_ON(mp))
@@ -858,15 +860,22 @@ xfs_qm_dqattach(
 		if (XFS_IS_OQUOTA_ON(mp))
 			ASSERT(ip->i_gdquot);
 	}
+	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 #endif
+	return error;
+}
 
-	if (! (flags & XFS_QMOPT_ILOCKED))
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+int
+xfs_qm_dqattach(
+	struct xfs_inode	*ip,
+	uint			flags)
+{
+	int			error;
+
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
+	error = xfs_qm_dqattach_locked(ip, flags);
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 
-#ifdef QUOTADEBUG
-	else
-		ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-#endif
 	return error;
 }
 
@@ -912,7 +921,7 @@ xfs_qm_sync(
 	boolean_t	nowait;
 	int		error;
 
-	if (! XFS_IS_QUOTA_ON(mp))
+	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
 		return 0;
 
 	restarts = 0;
@@ -2319,20 +2328,20 @@ xfs_qm_write_sb_changes(
  */
 int
 xfs_qm_vop_dqalloc(
-	xfs_mount_t	*mp,
-	xfs_inode_t	*ip,
-	uid_t		uid,
-	gid_t		gid,
-	prid_t		prid,
-	uint		flags,
-	xfs_dquot_t	**O_udqpp,
-	xfs_dquot_t	**O_gdqpp)
+	struct xfs_inode	*ip,
+	uid_t			uid,
+	gid_t			gid,
+	prid_t			prid,
+	uint			flags,
+	struct xfs_dquot	**O_udqpp,
+	struct xfs_dquot	**O_gdqpp)
 {
-	int		error;
-	xfs_dquot_t	*uq, *gq;
-	uint		lockflags;
+	struct xfs_mount	*mp = ip->i_mount;
+	struct xfs_dquot	*uq, *gq;
+	int			error;
+	uint			lockflags;
 
-	if (!XFS_IS_QUOTA_ON(mp))
+	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
 		return 0;
 
 	lockflags = XFS_ILOCK_EXCL;
@@ -2346,8 +2355,8 @@ xfs_qm_vop_dqalloc(
 	 * if necessary. The dquot(s) will not be locked.
 	 */
 	if (XFS_NOT_DQATTACHED(mp, ip)) {
-		if ((error = xfs_qm_dqattach(ip, XFS_QMOPT_DQALLOC |
-					    XFS_QMOPT_ILOCKED))) {
+		error = xfs_qm_dqattach_locked(ip, XFS_QMOPT_DQALLOC);
+		if (error) {
 			xfs_iunlock(ip, lockflags);
 			return error;
 		}
@@ -2469,6 +2478,7 @@ xfs_qm_vop_chown(
 	uint		bfield = XFS_IS_REALTIME_INODE(ip) ?
 				 XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
 
+
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 	ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
 
@@ -2508,13 +2518,13 @@ xfs_qm_vop_chown_reserve(
 	xfs_dquot_t	*gdqp,
 	uint		flags)
 {
-	int		error;
-	xfs_mount_t	*mp;
+	xfs_mount_t	*mp = ip->i_mount;
 	uint		delblks, blkflags, prjflags = 0;
 	xfs_dquot_t	*unresudq, *unresgdq, *delblksudq, *delblksgdq;
+	int		error;
+
 
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
-	mp = ip->i_mount;
 	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
 
 	delblks = ip->i_delayed_blks;
@@ -2582,28 +2592,23 @@ xfs_qm_vop_chown_reserve(
 
 int
 xfs_qm_vop_rename_dqattach(
-	xfs_inode_t	**i_tab)
+	struct xfs_inode	**i_tab)
 {
-	xfs_inode_t	*ip;
-	int		i;
-	int		error;
+	struct xfs_mount	*mp = i_tab[0]->i_mount;
+	int			i;
 
-	ip = i_tab[0];
-
-	if (! XFS_IS_QUOTA_ON(ip->i_mount))
+	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
 		return 0;
 
-	if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) {
-		error = xfs_qm_dqattach(ip, 0);
-		if (error)
-			return error;
-	}
-	for (i = 1; (i < 4 && i_tab[i]); i++) {
+	for (i = 0; (i < 4 && i_tab[i]); i++) {
+		struct xfs_inode	*ip = i_tab[i];
+		int			error;
+
 		/*
 		 * Watch out for duplicate entries in the table.
 		 */
-		if ((ip = i_tab[i]) != i_tab[i-1]) {
-			if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) {
+		if (i == 0 || ip != i_tab[i-1]) {
+			if (XFS_NOT_DQATTACHED(mp, ip)) {
 				error = xfs_qm_dqattach(ip, 0);
 				if (error)
 					return error;
@@ -2614,17 +2619,19 @@ xfs_qm_vop_rename_dqattach(
 }
 
 void
-xfs_qm_vop_dqattach_and_dqmod_newinode(
-	xfs_trans_t	*tp,
-	xfs_inode_t	*ip,
-	xfs_dquot_t	*udqp,
-	xfs_dquot_t	*gdqp)
+xfs_qm_vop_create_dqattach(
+	struct xfs_trans	*tp,
+	struct xfs_inode	*ip,
+	struct xfs_dquot	*udqp,
+	struct xfs_dquot	*gdqp)
 {
-	if (!XFS_IS_QUOTA_ON(tp->t_mountp))
+	struct xfs_mount	*mp = tp->t_mountp;
+
+	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
 		return;
 
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-	ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp));
+	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
 
 	if (udqp) {
 		xfs_dqlock(udqp);
@@ -2632,7 +2639,7 @@ xfs_qm_vop_dqattach_and_dqmod_newinode(
 		xfs_dqunlock(udqp);
 		ASSERT(ip->i_udquot == NULL);
 		ip->i_udquot = udqp;
-		ASSERT(XFS_IS_UQUOTA_ON(tp->t_mountp));
+		ASSERT(XFS_IS_UQUOTA_ON(mp));
 		ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
 		xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
 	}
@@ -2642,8 +2649,8 @@ xfs_qm_vop_dqattach_and_dqmod_newinode(
 		xfs_dqunlock(gdqp);
 		ASSERT(ip->i_gdquot == NULL);
 		ip->i_gdquot = gdqp;
-		ASSERT(XFS_IS_OQUOTA_ON(tp->t_mountp));
-		ASSERT((XFS_IS_GQUOTA_ON(tp->t_mountp) ?
+		ASSERT(XFS_IS_OQUOTA_ON(mp));
+		ASSERT((XFS_IS_GQUOTA_ON(mp) ?
 			ip->i_d.di_gid : ip->i_d.di_projid) ==
 				be32_to_cpu(gdqp->q_core.d_id));
 		xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
index a371954cae1b..495564b8af38 100644
--- a/fs/xfs/quota/xfs_qm.h
+++ b/fs/xfs/quota/xfs_qm.h
@@ -127,8 +127,6 @@ typedef struct xfs_quotainfo {
 } xfs_quotainfo_t;
 
 
-extern xfs_dqtrxops_t	xfs_trans_dquot_ops;
-
 extern void	xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long);
 extern int	xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *,
 			xfs_dquot_t *, xfs_dquot_t *, long, long, uint);
@@ -159,17 +157,11 @@ typedef struct xfs_dquot_acct {
 #define XFS_QM_RTBWARNLIMIT	5
 
 extern void		xfs_qm_destroy_quotainfo(xfs_mount_t *);
-extern void		xfs_qm_mount_quotas(xfs_mount_t *);
 extern int		xfs_qm_quotacheck(xfs_mount_t *);
-extern void		xfs_qm_unmount_quotadestroy(xfs_mount_t *);
-extern void		xfs_qm_unmount_quotas(xfs_mount_t *);
 extern int		xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t);
-extern int		xfs_qm_sync(xfs_mount_t *, int);
 
 /* dquot stuff */
 extern boolean_t	xfs_qm_dqalloc_incore(xfs_dquot_t **);
-extern int		xfs_qm_dqattach(xfs_inode_t *, uint);
-extern void		xfs_qm_dqdetach(xfs_inode_t *);
 extern int		xfs_qm_dqpurge_all(xfs_mount_t *, uint);
 extern void		xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint);
 
@@ -183,19 +175,6 @@ extern int		xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *);
 extern int		xfs_qm_scall_quotaon(xfs_mount_t *, uint);
 extern int		xfs_qm_scall_quotaoff(xfs_mount_t *, uint);
 
-/* vop stuff */
-extern int		xfs_qm_vop_dqalloc(xfs_mount_t *, xfs_inode_t *,
-					uid_t, gid_t, prid_t, uint,
-					xfs_dquot_t **, xfs_dquot_t **);
-extern void		xfs_qm_vop_dqattach_and_dqmod_newinode(
-					xfs_trans_t *, xfs_inode_t *,
-					xfs_dquot_t *, xfs_dquot_t *);
-extern int		xfs_qm_vop_rename_dqattach(xfs_inode_t **);
-extern xfs_dquot_t *	xfs_qm_vop_chown(xfs_trans_t *, xfs_inode_t *,
-					xfs_dquot_t **, xfs_dquot_t *);
-extern int		xfs_qm_vop_chown_reserve(xfs_trans_t *, xfs_inode_t *,
-					xfs_dquot_t *, xfs_dquot_t *, uint);
-
 /* list stuff */
 extern void		xfs_qm_freelist_append(xfs_frlist_t *, xfs_dquot_t *);
 extern void		xfs_qm_freelist_unlink(xfs_dquot_t *);
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index 63037c689a4b..56a5965f3c8b 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -84,7 +84,7 @@ xfs_fill_statvfs_from_dquot(
  * return a statvfs of the project, not the entire filesystem.
  * This makes such trees appear as if they are filesystems in themselves.
  */
-STATIC void
+void
 xfs_qm_statvfs(
 	xfs_inode_t		*ip,
 	struct kstatfs		*statp)
@@ -92,20 +92,13 @@ xfs_qm_statvfs(
 	xfs_mount_t		*mp = ip->i_mount;
 	xfs_dquot_t		*dqp;
 
-	if (!(ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) ||
-	    !((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) ==
-	    		      (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))
-		return;
-
 	if (!xfs_qm_dqget(mp, NULL, ip->i_d.di_projid, XFS_DQ_PROJ, 0, &dqp)) {
-		xfs_disk_dquot_t	*dp = &dqp->q_core;
-
-		xfs_fill_statvfs_from_dquot(statp, dp);
+		xfs_fill_statvfs_from_dquot(statp, &dqp->q_core);
 		xfs_qm_dqput(dqp);
 	}
 }
 
-STATIC int
+int
 xfs_qm_newmount(
 	xfs_mount_t	*mp,
 	uint		*needquotamount,
@@ -114,9 +107,6 @@ xfs_qm_newmount(
 	uint		quotaondisk;
 	uint		uquotaondisk = 0, gquotaondisk = 0, pquotaondisk = 0;
 
-	*quotaflags = 0;
-	*needquotamount = B_FALSE;
-
 	quotaondisk = xfs_sb_version_hasquota(&mp->m_sb) &&
 				(mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT);
 
@@ -179,66 +169,6 @@ xfs_qm_newmount(
 	return 0;
 }
 
-STATIC int
-xfs_qm_endmount(
-	xfs_mount_t	*mp,
-	uint		needquotamount,
-	uint		quotaflags)
-{
-	if (needquotamount) {
-		ASSERT(mp->m_qflags == 0);
-		mp->m_qflags = quotaflags;
-		xfs_qm_mount_quotas(mp);
-	}
-
-#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
-	if (! (XFS_IS_QUOTA_ON(mp)))
-		xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas not turned on");
-	else
-		xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas turned on");
-#endif
-
-#ifdef QUOTADEBUG
-	if (XFS_IS_QUOTA_ON(mp) && xfs_qm_internalqcheck(mp))
-		cmn_err(CE_WARN, "XFS: mount internalqcheck failed");
-#endif
-
-	return 0;
-}
-
-STATIC void
-xfs_qm_dqrele_null(
-	xfs_dquot_t	*dq)
-{
-	/*
-	 * Called from XFS, where we always check first for a NULL dquot.
-	 */
-	if (!dq)
-		return;
-	xfs_qm_dqrele(dq);
-}
-
-
-struct xfs_qmops xfs_qmcore_xfs = {
-	.xfs_qminit		= xfs_qm_newmount,
-	.xfs_qmdone		= xfs_qm_unmount_quotadestroy,
-	.xfs_qmmount		= xfs_qm_endmount,
-	.xfs_qmunmount		= xfs_qm_unmount_quotas,
-	.xfs_dqrele		= xfs_qm_dqrele_null,
-	.xfs_dqattach		= xfs_qm_dqattach,
-	.xfs_dqdetach		= xfs_qm_dqdetach,
-	.xfs_dqpurgeall		= xfs_qm_dqpurge_all,
-	.xfs_dqvopalloc		= xfs_qm_vop_dqalloc,
-	.xfs_dqvopcreate	= xfs_qm_vop_dqattach_and_dqmod_newinode,
-	.xfs_dqvoprename	= xfs_qm_vop_rename_dqattach,
-	.xfs_dqvopchown		= xfs_qm_vop_chown,
-	.xfs_dqvopchownresv	= xfs_qm_vop_chown_reserve,
-	.xfs_dqstatvfs		= xfs_qm_statvfs,
-	.xfs_dqsync		= xfs_qm_sync,
-	.xfs_dqtrxops		= &xfs_trans_dquot_ops,
-};
-EXPORT_SYMBOL(xfs_qmcore_xfs);
-
 void __init
 xfs_qm_init(void)
 {
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
index 447173bcf96d..eafa7ab34085 100644
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ b/fs/xfs/quota/xfs_trans_dquot.c
@@ -111,7 +111,7 @@ xfs_trans_log_dquot(
  * Carry forward whatever is left of the quota blk reservation to
  * the spanky new transaction
  */
-STATIC void
+void
 xfs_trans_dup_dqinfo(
 	xfs_trans_t	*otp,
 	xfs_trans_t	*ntp)
@@ -167,19 +167,17 @@ xfs_trans_dup_dqinfo(
 /*
  * Wrap around mod_dquot to account for both user and group quotas.
  */
-STATIC void
+void
 xfs_trans_mod_dquot_byino(
 	xfs_trans_t	*tp,
 	xfs_inode_t	*ip,
 	uint		field,
 	long		delta)
 {
-	xfs_mount_t	*mp;
+	xfs_mount_t	*mp = tp->t_mountp;
 
-	ASSERT(tp);
-	mp = tp->t_mountp;
-
-	if (!XFS_IS_QUOTA_ON(mp) ||
+	if (!XFS_IS_QUOTA_RUNNING(mp) ||
+	    !XFS_IS_QUOTA_ON(mp) ||
 	    ip->i_ino == mp->m_sb.sb_uquotino ||
 	    ip->i_ino == mp->m_sb.sb_gquotino)
 		return;
@@ -229,6 +227,7 @@ xfs_trans_mod_dquot(
 	xfs_dqtrx_t	*qtrx;
 
 	ASSERT(tp);
+	ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp));
 	qtrx = NULL;
 
 	if (tp->t_dqinfo == NULL)
@@ -346,7 +345,7 @@ xfs_trans_dqlockedjoin(
  * Unreserve just the reservations done by this transaction.
  * dquot is still left locked at exit.
  */
-STATIC void
+void
 xfs_trans_apply_dquot_deltas(
 	xfs_trans_t		*tp)
 {
@@ -357,7 +356,7 @@ xfs_trans_apply_dquot_deltas(
 	long			totalbdelta;
 	long			totalrtbdelta;
 
-	if (! (tp->t_flags & XFS_TRANS_DQ_DIRTY))
+	if (!(tp->t_flags & XFS_TRANS_DQ_DIRTY))
 		return;
 
 	ASSERT(tp->t_dqinfo);
@@ -531,7 +530,7 @@ xfs_trans_apply_dquot_deltas(
  * we simply throw those away, since that's the expected behavior
  * when a transaction is curtailed without a commit.
  */
-STATIC void
+void
 xfs_trans_unreserve_and_mod_dquots(
 	xfs_trans_t		*tp)
 {
@@ -768,7 +767,7 @@ xfs_trans_reserve_quota_bydquots(
 {
 	int		resvd = 0, error;
 
-	if (!XFS_IS_QUOTA_ON(mp))
+	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
 		return 0;
 
 	if (tp && tp->t_dqinfo == NULL)
@@ -811,18 +810,17 @@ xfs_trans_reserve_quota_bydquots(
  * This doesn't change the actual usage, just the reservation.
  * The inode sent in is locked.
  */
-STATIC int
+int
 xfs_trans_reserve_quota_nblks(
-	xfs_trans_t	*tp,
-	xfs_mount_t	*mp,
-	xfs_inode_t	*ip,
-	long		nblks,
-	long		ninos,
-	uint		flags)
+	struct xfs_trans	*tp,
+	struct xfs_inode	*ip,
+	long			nblks,
+	long			ninos,
+	uint			flags)
 {
-	int		error;
+	struct xfs_mount	*mp = ip->i_mount;
 
-	if (!XFS_IS_QUOTA_ON(mp))
+	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
 		return 0;
 	if (XFS_IS_PQUOTA_ON(mp))
 		flags |= XFS_QMOPT_ENOSPC;
@@ -831,7 +829,6 @@ xfs_trans_reserve_quota_nblks(
 	ASSERT(ip->i_ino != mp->m_sb.sb_gquotino);
 
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-	ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
 	ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) ==
 				XFS_TRANS_DQ_RES_RTBLKS ||
 	       (flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) ==
@@ -840,11 +837,9 @@ xfs_trans_reserve_quota_nblks(
 	/*
 	 * Reserve nblks against these dquots, with trans as the mediator.
 	 */
-	error = xfs_trans_reserve_quota_bydquots(tp, mp,
-						 ip->i_udquot, ip->i_gdquot,
-						 nblks, ninos,
-						 flags);
-	return error;
+	return xfs_trans_reserve_quota_bydquots(tp, mp,
+						ip->i_udquot, ip->i_gdquot,
+						nblks, ninos, flags);
 }
 
 /*
@@ -895,25 +890,15 @@ STATIC void
 xfs_trans_alloc_dqinfo(
 	xfs_trans_t	*tp)
 {
-	(tp)->t_dqinfo = kmem_zone_zalloc(xfs_Gqm->qm_dqtrxzone, KM_SLEEP);
+	tp->t_dqinfo = kmem_zone_zalloc(xfs_Gqm->qm_dqtrxzone, KM_SLEEP);
 }
 
-STATIC void
+void
 xfs_trans_free_dqinfo(
 	xfs_trans_t	*tp)
 {
 	if (!tp->t_dqinfo)
 		return;
-	kmem_zone_free(xfs_Gqm->qm_dqtrxzone, (tp)->t_dqinfo);
-	(tp)->t_dqinfo = NULL;
+	kmem_zone_free(xfs_Gqm->qm_dqtrxzone, tp->t_dqinfo);
+	tp->t_dqinfo = NULL;
 }
-
-xfs_dqtrxops_t	xfs_trans_dquot_ops = {
-	.qo_dup_dqinfo			= xfs_trans_dup_dqinfo,
-	.qo_free_dqinfo			= xfs_trans_free_dqinfo,
-	.qo_mod_dquot_byino		= xfs_trans_mod_dquot_byino,
-	.qo_apply_dquot_deltas		= xfs_trans_apply_dquot_deltas,
-	.qo_reserve_quota_nblks		= xfs_trans_reserve_quota_nblks,
-	.qo_reserve_quota_bydquots	= xfs_trans_reserve_quota_bydquots,
-	.qo_unreserve_and_mod_dquots	= xfs_trans_unreserve_and_mod_dquots,
-};
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 5fde1654b430..cd1008b1c4cd 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -249,8 +249,9 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,
 	/*
 	 * Attach the dquots to the inode.
 	 */
-	if ((error = XFS_QM_DQATTACH(mp, dp, 0)))
-		return (error);
+	error = xfs_qm_dqattach(dp, 0);
+	if (error)
+		return error;
 
 	/*
 	 * If the inode doesn't have an attribute fork, add one.
@@ -311,7 +312,7 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,
 	}
 	xfs_ilock(dp, XFS_ILOCK_EXCL);
 
-	error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, args.trans, dp, args.total, 0,
+	error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0,
 				rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
 				       XFS_QMOPT_RES_REGBLKS);
 	if (error) {
@@ -501,8 +502,9 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
 	/*
 	 * Attach the dquots to the inode.
 	 */
-	if ((error = XFS_QM_DQATTACH(mp, dp, 0)))
-		return (error);
+	error = xfs_qm_dqattach(dp, 0);
+	if (error)
+		return error;
 
 	/*
 	 * Start our first transaction of the day.
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index ca7c6005a487..4b0f6efb046c 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -2691,7 +2691,7 @@ xfs_bmap_rtalloc(
 		 * Adjust the disk quota also. This was reserved
 		 * earlier.
 		 */
-		XFS_TRANS_MOD_DQUOT_BYINO(mp, ap->tp, ap->ip,
+		xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
 			ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT :
 					XFS_TRANS_DQ_RTBCOUNT, (long) ralen);
 	} else {
@@ -2995,7 +2995,7 @@ xfs_bmap_btalloc(
 		 * Adjust the disk quota also. This was reserved
 		 * earlier.
 		 */
-		XFS_TRANS_MOD_DQUOT_BYINO(mp, ap->tp, ap->ip,
+		xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
 			ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT :
 					XFS_TRANS_DQ_BCOUNT,
 			(long) args.len);
@@ -3066,7 +3066,7 @@ xfs_bmap_btree_to_extents(
 		return error;
 	xfs_bmap_add_free(cbno, 1, cur->bc_private.b.flist, mp);
 	ip->i_d.di_nblocks--;
-	XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
+	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
 	xfs_trans_binval(tp, cbp);
 	if (cur->bc_bufs[0] == cbp)
 		cur->bc_bufs[0] = NULL;
@@ -3386,7 +3386,7 @@ xfs_bmap_del_extent(
 	 * Adjust quota data.
 	 */
 	if (qfield)
-		XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, qfield, (long)-nblks);
+		xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
 
 	/*
 	 * Account for change in delayed indirect blocks.
@@ -3523,7 +3523,7 @@ xfs_bmap_extents_to_btree(
 	*firstblock = cur->bc_private.b.firstblock = args.fsbno;
 	cur->bc_private.b.allocated++;
 	ip->i_d.di_nblocks++;
-	XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
+	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
 	abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0);
 	/*
 	 * Fill in the child block.
@@ -3690,7 +3690,7 @@ xfs_bmap_local_to_extents(
 		XFS_BMAP_TRACE_POST_UPDATE("new", ip, 0, whichfork);
 		XFS_IFORK_NEXT_SET(ip, whichfork, 1);
 		ip->i_d.di_nblocks = 1;
-		XFS_TRANS_MOD_DQUOT_BYINO(args.mp, tp, ip,
+		xfs_trans_mod_dquot_byino(tp, ip,
 			XFS_TRANS_DQ_BCOUNT, 1L);
 		flags |= xfs_ilog_fext(whichfork);
 	} else {
@@ -4048,7 +4048,7 @@ xfs_bmap_add_attrfork(
 			XFS_TRANS_PERM_LOG_RES, XFS_ADDAFORK_LOG_COUNT)))
 		goto error0;
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
-	error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, blks, 0, rsvd ?
+	error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
 			XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
 			XFS_QMOPT_RES_REGBLKS);
 	if (error) {
@@ -4983,10 +4983,11 @@ xfs_bmapi(
 				 * adjusted later.  We return if we haven't
 				 * allocated blocks already inside this loop.
 				 */
-				if ((error = XFS_TRANS_RESERVE_QUOTA_NBLKS(
-						mp, NULL, ip, (long)alen, 0,
+				error = xfs_trans_reserve_quota_nblks(
+						NULL, ip, (long)alen, 0,
 						rt ? XFS_QMOPT_RES_RTBLKS :
-						     XFS_QMOPT_RES_REGBLKS))) {
+						     XFS_QMOPT_RES_REGBLKS);
+				if (error) {
 					if (n == 0) {
 						*nmap = 0;
 						ASSERT(cur == NULL);
@@ -5035,8 +5036,8 @@ xfs_bmapi(
 					if (XFS_IS_QUOTA_ON(mp))
 						/* unreserve the blocks now */
 						(void)
-						XFS_TRANS_UNRESERVE_QUOTA_NBLKS(
-							mp, NULL, ip,
+						xfs_trans_unreserve_quota_nblks(
+							NULL, ip,
 							(long)alen, 0, rt ?
 							XFS_QMOPT_RES_RTBLKS :
 							XFS_QMOPT_RES_REGBLKS);
@@ -5691,14 +5692,14 @@ xfs_bunmapi(
 				do_div(rtexts, mp->m_sb.sb_rextsize);
 				xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS,
 						(int64_t)rtexts, rsvd);
-				(void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp,
-					NULL, ip, -((long)del.br_blockcount), 0,
+				(void)xfs_trans_reserve_quota_nblks(NULL,
+					ip, -((long)del.br_blockcount), 0,
 					XFS_QMOPT_RES_RTBLKS);
 			} else {
 				xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS,
 						(int64_t)del.br_blockcount, rsvd);
-				(void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp,
-					NULL, ip, -((long)del.br_blockcount), 0,
+				(void)xfs_trans_reserve_quota_nblks(NULL,
+					ip, -((long)del.br_blockcount), 0,
 					XFS_QMOPT_RES_REGBLKS);
 			}
 			ip->i_delayed_blks -= del.br_blockcount;
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 0760d352586f..5c1ade06578e 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -590,7 +590,7 @@ xfs_bmbt_alloc_block(
 	cur->bc_private.b.allocated++;
 	cur->bc_private.b.ip->i_d.di_nblocks++;
 	xfs_trans_log_inode(args.tp, cur->bc_private.b.ip, XFS_ILOG_CORE);
-	XFS_TRANS_MOD_DQUOT_BYINO(args.mp, args.tp, cur->bc_private.b.ip,
+	xfs_trans_mod_dquot_byino(args.tp, cur->bc_private.b.ip,
 			XFS_TRANS_DQ_BCOUNT, 1L);
 
 	new->l = cpu_to_be64(args.fsbno);
@@ -618,7 +618,7 @@ xfs_bmbt_free_block(
 	ip->i_d.di_nblocks--;
 
 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-	XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
+	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
 	xfs_trans_binval(tp, bp);
 	return 0;
 }
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 89b81eedce6a..73e1c0d767ac 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -500,10 +500,7 @@ xfs_ireclaim(
 	 * ilock one but will still hold the iolock.
 	 */
 	xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-	/*
-	 * Release dquots (and their references) if any.
-	 */
-	XFS_QM_DQDETACH(ip->i_mount, ip);
+	xfs_qm_dqdetach(ip);
 	xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
 
 	switch (ip->i_d.di_mode & S_IFMT) {
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 5aaa2d7ec155..feb30a92549b 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -385,7 +385,7 @@ xfs_iomap_write_direct(
 	 * Make sure that the dquots are there. This doesn't hold
 	 * the ilock across a disk read.
 	 */
-	error = XFS_QM_DQATTACH(ip->i_mount, ip, XFS_QMOPT_ILOCKED);
+	error = xfs_qm_dqattach_locked(ip, 0);
 	if (error)
 		return XFS_ERROR(error);
 
@@ -444,8 +444,7 @@ xfs_iomap_write_direct(
 	if (error)
 		goto error_out;
 
-	error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip,
-					      qblocks, 0, quota_flag);
+	error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
 	if (error)
 		goto error1;
 
@@ -495,7 +494,7 @@ xfs_iomap_write_direct(
 
 error0:	/* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
 	xfs_bmap_cancel(&free_list);
-	XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag);
+	xfs_trans_unreserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
 
 error1:	/* Just cancel transaction */
 	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
@@ -582,7 +581,7 @@ xfs_iomap_write_delay(
 	 * Make sure that the dquots are there. This doesn't hold
 	 * the ilock across a disk read.
 	 */
-	error = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED);
+	error = xfs_qm_dqattach_locked(ip, 0);
 	if (error)
 		return XFS_ERROR(error);
 
@@ -684,7 +683,8 @@ xfs_iomap_write_allocate(
 	/*
 	 * Make sure that the dquots are there.
 	 */
-	if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
+	error = xfs_qm_dqattach(ip, 0);
+	if (error)
 		return XFS_ERROR(error);
 
 	offset_fsb = XFS_B_TO_FSBT(mp, offset);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 65a99725d0cc..b659db5e7165 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -959,6 +959,53 @@ xfs_check_sizes(xfs_mount_t *mp)
 	return 0;
 }
 
+/*
+ * Clear the quotaflags in memory and in the superblock.
+ */
+int
+xfs_mount_reset_sbqflags(
+	struct xfs_mount	*mp)
+{
+	int			error;
+	struct xfs_trans	*tp;
+
+	mp->m_qflags = 0;
+
+	/*
+	 * It is OK to look at sb_qflags here in mount path,
+	 * without m_sb_lock.
+	 */
+	if (mp->m_sb.sb_qflags == 0)
+		return 0;
+	spin_lock(&mp->m_sb_lock);
+	mp->m_sb.sb_qflags = 0;
+	spin_unlock(&mp->m_sb_lock);
+
+	/*
+	 * If the fs is readonly, let the incore superblock run
+	 * with quotas off but don't flush the update out to disk
+	 */
+	if (mp->m_flags & XFS_MOUNT_RDONLY)
+		return 0;
+
+#ifdef QUOTADEBUG
+	xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes");
+#endif
+
+	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
+	error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
+				      XFS_DEFAULT_LOG_COUNT);
+	if (error) {
+		xfs_trans_cancel(tp, 0);
+		xfs_fs_cmn_err(CE_ALERT, mp,
+			"xfs_mount_reset_sbqflags: Superblock update failed!");
+		return error;
+	}
+
+	xfs_mod_sb(tp, XFS_SB_QFLAGS);
+	return xfs_trans_commit(tp, 0);
+}
+
 /*
  * This function does the following on an initial mount of a file system:
  *	- reads the superblock from disk and init the mount struct
@@ -976,7 +1023,8 @@ xfs_mountfs(
 	xfs_sb_t	*sbp = &(mp->m_sb);
 	xfs_inode_t	*rip;
 	__uint64_t	resblks;
-	uint		quotamount, quotaflags;
+	uint		quotamount = 0;
+	uint		quotaflags = 0;
 	int		error = 0;
 
 	xfs_mount_common(mp, sbp);
@@ -1210,9 +1258,28 @@ xfs_mountfs(
 	/*
 	 * Initialise the XFS quota management subsystem for this mount
 	 */
-	error = XFS_QM_INIT(mp, &quotamount, &quotaflags);
-	if (error)
-		goto out_rtunmount;
+	if (XFS_IS_QUOTA_RUNNING(mp)) {
+		error = xfs_qm_newmount(mp, &quotamount, &quotaflags);
+		if (error)
+			goto out_rtunmount;
+	} else {
+		ASSERT(!XFS_IS_QUOTA_ON(mp));
+
+		/*
+		 * If a file system had quotas running earlier, but decided to
+		 * mount without -o uquota/pquota/gquota options, revoke the
+		 * quotachecked license.
+		 */
+		if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) {
+			cmn_err(CE_NOTE,
+				"XFS: resetting qflags for filesystem %s",
+				mp->m_fsname);
+
+			error = xfs_mount_reset_sbqflags(mp);
+			if (error)
+				return error;
+		}
+	}
 
 	/*
 	 * Finish recovering the file system.  This part needed to be
@@ -1228,9 +1295,19 @@ xfs_mountfs(
 	/*
 	 * Complete the quota initialisation, post-log-replay component.
 	 */
-	error = XFS_QM_MOUNT(mp, quotamount, quotaflags);
-	if (error)
-		goto out_rtunmount;
+	if (quotamount) {
+		ASSERT(mp->m_qflags == 0);
+		mp->m_qflags = quotaflags;
+
+		xfs_qm_mount_quotas(mp);
+	}
+
+#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
+	if (XFS_IS_QUOTA_ON(mp))
+		xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas turned on");
+	else
+		xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas not turned on");
+#endif
 
 	/*
 	 * Now we are mounted, reserve a small amount of unused space for
@@ -1279,12 +1356,7 @@ xfs_unmountfs(
 	__uint64_t		resblks;
 	int			error;
 
-	/*
-	 * Release dquot that rootinode, rbmino and rsumino might be holding,
-	 * and release the quota inodes.
-	 */
-	XFS_QM_UNMOUNT(mp);
-
+	xfs_qm_unmount_quotas(mp);
 	xfs_rtunmount_inodes(mp);
 	IRELE(mp->m_rootip);
 
@@ -1301,10 +1373,7 @@ xfs_unmountfs(
 	xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC);
 	xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_ASYNC);
 
-	XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING);
-
-	if (mp->m_quotainfo)
-		XFS_QM_DONE(mp);
+	xfs_qm_unmount(mp);
 
 	/*
 	 * Flush out the log synchronously so that we know for sure
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index d6a64392f983..a5122382afde 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -64,6 +64,8 @@ struct xfs_swapext;
 struct xfs_mru_cache;
 struct xfs_nameops;
 struct xfs_ail;
+struct xfs_quotainfo;
+
 
 /*
  * Prototypes and functions for the Data Migration subsystem.
@@ -107,86 +109,6 @@ typedef struct xfs_dmops {
 	(*(mp)->m_dm_ops->xfs_send_unmount)(mp,ip,right,mode,rval,fl)
 
 
-/*
- * Prototypes and functions for the Quota Management subsystem.
- */
-
-struct xfs_dquot;
-struct xfs_dqtrxops;
-struct xfs_quotainfo;
-
-typedef int	(*xfs_qminit_t)(struct xfs_mount *, uint *, uint *);
-typedef int	(*xfs_qmmount_t)(struct xfs_mount *, uint, uint);
-typedef void	(*xfs_qmunmount_t)(struct xfs_mount *);
-typedef void	(*xfs_qmdone_t)(struct xfs_mount *);
-typedef void	(*xfs_dqrele_t)(struct xfs_dquot *);
-typedef int	(*xfs_dqattach_t)(struct xfs_inode *, uint);
-typedef void	(*xfs_dqdetach_t)(struct xfs_inode *);
-typedef int	(*xfs_dqpurgeall_t)(struct xfs_mount *, uint);
-typedef int	(*xfs_dqvopalloc_t)(struct xfs_mount *,
-			struct xfs_inode *, uid_t, gid_t, prid_t, uint,
-			struct xfs_dquot **, struct xfs_dquot **);
-typedef void	(*xfs_dqvopcreate_t)(struct xfs_trans *, struct xfs_inode *,
-			struct xfs_dquot *, struct xfs_dquot *);
-typedef int	(*xfs_dqvoprename_t)(struct xfs_inode **);
-typedef struct xfs_dquot * (*xfs_dqvopchown_t)(
-			struct xfs_trans *, struct xfs_inode *,
-			struct xfs_dquot **, struct xfs_dquot *);
-typedef int	(*xfs_dqvopchownresv_t)(struct xfs_trans *, struct xfs_inode *,
-			struct xfs_dquot *, struct xfs_dquot *, uint);
-typedef void	(*xfs_dqstatvfs_t)(struct xfs_inode *, struct kstatfs *);
-typedef int	(*xfs_dqsync_t)(struct xfs_mount *, int flags);
-
-typedef struct xfs_qmops {
-	xfs_qminit_t		xfs_qminit;
-	xfs_qmdone_t		xfs_qmdone;
-	xfs_qmmount_t		xfs_qmmount;
-	xfs_qmunmount_t		xfs_qmunmount;
-	xfs_dqrele_t		xfs_dqrele;
-	xfs_dqattach_t		xfs_dqattach;
-	xfs_dqdetach_t		xfs_dqdetach;
-	xfs_dqpurgeall_t	xfs_dqpurgeall;
-	xfs_dqvopalloc_t	xfs_dqvopalloc;
-	xfs_dqvopcreate_t	xfs_dqvopcreate;
-	xfs_dqvoprename_t	xfs_dqvoprename;
-	xfs_dqvopchown_t	xfs_dqvopchown;
-	xfs_dqvopchownresv_t	xfs_dqvopchownresv;
-	xfs_dqstatvfs_t		xfs_dqstatvfs;
-	xfs_dqsync_t		xfs_dqsync;
-	struct xfs_dqtrxops	*xfs_dqtrxops;
-} xfs_qmops_t;
-
-#define XFS_QM_INIT(mp, mnt, fl) \
-	(*(mp)->m_qm_ops->xfs_qminit)(mp, mnt, fl)
-#define XFS_QM_MOUNT(mp, mnt, fl) \
-	(*(mp)->m_qm_ops->xfs_qmmount)(mp, mnt, fl)
-#define XFS_QM_UNMOUNT(mp) \
-	(*(mp)->m_qm_ops->xfs_qmunmount)(mp)
-#define XFS_QM_DONE(mp) \
-	(*(mp)->m_qm_ops->xfs_qmdone)(mp)
-#define XFS_QM_DQRELE(mp, dq) \
-	(*(mp)->m_qm_ops->xfs_dqrele)(dq)
-#define XFS_QM_DQATTACH(mp, ip, fl) \
-	(*(mp)->m_qm_ops->xfs_dqattach)(ip, fl)
-#define XFS_QM_DQDETACH(mp, ip) \
-	(*(mp)->m_qm_ops->xfs_dqdetach)(ip)
-#define XFS_QM_DQPURGEALL(mp, fl) \
-	(*(mp)->m_qm_ops->xfs_dqpurgeall)(mp, fl)
-#define XFS_QM_DQVOPALLOC(mp, ip, uid, gid, prid, fl, dq1, dq2) \
-	(*(mp)->m_qm_ops->xfs_dqvopalloc)(mp, ip, uid, gid, prid, fl, dq1, dq2)
-#define XFS_QM_DQVOPCREATE(mp, tp, ip, dq1, dq2) \
-	(*(mp)->m_qm_ops->xfs_dqvopcreate)(tp, ip, dq1, dq2)
-#define XFS_QM_DQVOPRENAME(mp, ip) \
-	(*(mp)->m_qm_ops->xfs_dqvoprename)(ip)
-#define XFS_QM_DQVOPCHOWN(mp, tp, ip, dqp, dq) \
-	(*(mp)->m_qm_ops->xfs_dqvopchown)(tp, ip, dqp, dq)
-#define XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, dq1, dq2, fl) \
-	(*(mp)->m_qm_ops->xfs_dqvopchownresv)(tp, ip, dq1, dq2, fl)
-#define XFS_QM_DQSTATVFS(ip, statp) \
-	(*(ip)->i_mount->m_qm_ops->xfs_dqstatvfs)(ip, statp)
-#define XFS_QM_DQSYNC(mp, flags) \
-	(*(mp)->m_qm_ops->xfs_dqsync)(mp, flags)
-
 #ifdef HAVE_PERCPU_SB
 
 /*
@@ -510,8 +432,6 @@ extern int	xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t);
 
 extern int	xfs_dmops_get(struct xfs_mount *);
 extern void	xfs_dmops_put(struct xfs_mount *);
-extern int	xfs_qmops_get(struct xfs_mount *);
-extern void	xfs_qmops_put(struct xfs_mount *);
 
 extern struct xfs_dmops xfs_dmcore_xfs;
 
diff --git a/fs/xfs/xfs_qmops.c b/fs/xfs/xfs_qmops.c
deleted file mode 100644
index e101790ea8e7..000000000000
--- a/fs/xfs/xfs_qmops.c
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_types.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
-#include "xfs_mount.h"
-#include "xfs_quota.h"
-#include "xfs_error.h"
-
-
-STATIC struct xfs_dquot *
-xfs_dqvopchown_default(
-	struct xfs_trans	*tp,
-	struct xfs_inode	*ip,
-	struct xfs_dquot	**dqp,
-	struct xfs_dquot	*dq)
-{
-	return NULL;
-}
-
-/*
- * Clear the quotaflags in memory and in the superblock.
- */
-int
-xfs_mount_reset_sbqflags(xfs_mount_t *mp)
-{
-	int			error;
-	xfs_trans_t		*tp;
-
-	mp->m_qflags = 0;
-	/*
-	 * It is OK to look at sb_qflags here in mount path,
-	 * without m_sb_lock.
-	 */
-	if (mp->m_sb.sb_qflags == 0)
-		return 0;
-	spin_lock(&mp->m_sb_lock);
-	mp->m_sb.sb_qflags = 0;
-	spin_unlock(&mp->m_sb_lock);
-
-	/*
-	 * if the fs is readonly, let the incore superblock run
-	 * with quotas off but don't flush the update out to disk
-	 */
-	if (mp->m_flags & XFS_MOUNT_RDONLY)
-		return 0;
-#ifdef QUOTADEBUG
-	xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes");
-#endif
-	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
-	if ((error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
-				      XFS_DEFAULT_LOG_COUNT))) {
-		xfs_trans_cancel(tp, 0);
-		xfs_fs_cmn_err(CE_ALERT, mp,
-			"xfs_mount_reset_sbqflags: Superblock update failed!");
-		return error;
-	}
-	xfs_mod_sb(tp, XFS_SB_QFLAGS);
-	error = xfs_trans_commit(tp, 0);
-	return error;
-}
-
-STATIC int
-xfs_noquota_init(
-	xfs_mount_t	*mp,
-	uint		*needquotamount,
-	uint		*quotaflags)
-{
-	int		error = 0;
-
-	*quotaflags = 0;
-	*needquotamount = B_FALSE;
-
-	ASSERT(!XFS_IS_QUOTA_ON(mp));
-
-	/*
-	 * If a file system had quotas running earlier, but decided to
-	 * mount without -o uquota/pquota/gquota options, revoke the
-	 * quotachecked license.
-	 */
-	if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) {
-		cmn_err(CE_NOTE,
-                        "XFS resetting qflags for filesystem %s",
-                        mp->m_fsname);
-
-		error = xfs_mount_reset_sbqflags(mp);
-	}
-	return error;
-}
-
-static struct xfs_qmops xfs_qmcore_stub = {
-	.xfs_qminit		= (xfs_qminit_t) xfs_noquota_init,
-	.xfs_qmdone		= (xfs_qmdone_t) fs_noerr,
-	.xfs_qmmount		= (xfs_qmmount_t) fs_noerr,
-	.xfs_qmunmount		= (xfs_qmunmount_t) fs_noerr,
-	.xfs_dqrele		= (xfs_dqrele_t) fs_noerr,
-	.xfs_dqattach		= (xfs_dqattach_t) fs_noerr,
-	.xfs_dqdetach		= (xfs_dqdetach_t) fs_noerr,
-	.xfs_dqpurgeall		= (xfs_dqpurgeall_t) fs_noerr,
-	.xfs_dqvopalloc		= (xfs_dqvopalloc_t) fs_noerr,
-	.xfs_dqvopcreate	= (xfs_dqvopcreate_t) fs_noerr,
-	.xfs_dqvoprename	= (xfs_dqvoprename_t) fs_noerr,
-	.xfs_dqvopchown		= xfs_dqvopchown_default,
-	.xfs_dqvopchownresv	= (xfs_dqvopchownresv_t) fs_noerr,
-	.xfs_dqstatvfs		= (xfs_dqstatvfs_t) fs_noval,
-	.xfs_dqsync		= (xfs_dqsync_t) fs_noerr,
-};
-
-int
-xfs_qmops_get(struct xfs_mount *mp)
-{
-	if (XFS_IS_QUOTA_RUNNING(mp)) {
-#ifdef CONFIG_XFS_QUOTA
-		mp->m_qm_ops = &xfs_qmcore_xfs;
-#else
-		cmn_err(CE_WARN,
-			"XFS: qouta support not available in this kernel.");
-		return EINVAL;
-#endif
-	} else {
-		mp->m_qm_ops = &xfs_qmcore_stub;
-	}
-
-	return 0;
-}
-
-void
-xfs_qmops_put(struct xfs_mount *mp)
-{
-}
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index f5d1202dde25..079d2e61c626 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -197,7 +197,6 @@ typedef struct xfs_qoff_logformat {
 #define XFS_QMOPT_UMOUNTING	0x0000100 /* filesys is being unmounted */
 #define XFS_QMOPT_DOLOG		0x0000200 /* log buf changes (in quotacheck) */
 #define XFS_QMOPT_DOWARN        0x0000400 /* increase warning cnt if needed */
-#define XFS_QMOPT_ILOCKED	0x0000800 /* inode is already locked (excl) */
 #define XFS_QMOPT_DQREPAIR	0x0001000 /* repair dquot if damaged */
 #define XFS_QMOPT_GQUOTA	0x0002000 /* group dquot requested */
 #define XFS_QMOPT_ENOSPC	0x0004000 /* enospc instead of edquot (prj) */
@@ -302,69 +301,72 @@ typedef struct xfs_dqtrx {
 	long		qt_delrtb_delta;  /* delayed RT blk count changes */
 } xfs_dqtrx_t;
 
-/*
- * Dquot transaction functions, used if quota is enabled.
- */
-typedef void	(*qo_dup_dqinfo_t)(struct xfs_trans *, struct xfs_trans *);
-typedef void	(*qo_mod_dquot_byino_t)(struct xfs_trans *,
-				struct xfs_inode *, uint, long);
-typedef void	(*qo_free_dqinfo_t)(struct xfs_trans *);
-typedef void	(*qo_apply_dquot_deltas_t)(struct xfs_trans *);
-typedef void	(*qo_unreserve_and_mod_dquots_t)(struct xfs_trans *);
-typedef int	(*qo_reserve_quota_nblks_t)(
-				struct xfs_trans *, struct xfs_mount *,
-				struct xfs_inode *, long, long, uint);
-typedef int	(*qo_reserve_quota_bydquots_t)(
-				struct xfs_trans *, struct xfs_mount *,
-				struct xfs_dquot *, struct xfs_dquot *,
-				long, long, uint);
-typedef struct xfs_dqtrxops {
-	qo_dup_dqinfo_t			qo_dup_dqinfo;
-	qo_free_dqinfo_t		qo_free_dqinfo;
-	qo_mod_dquot_byino_t		qo_mod_dquot_byino;
-	qo_apply_dquot_deltas_t		qo_apply_dquot_deltas;
-	qo_reserve_quota_nblks_t	qo_reserve_quota_nblks;
-	qo_reserve_quota_bydquots_t	qo_reserve_quota_bydquots;
-	qo_unreserve_and_mod_dquots_t	qo_unreserve_and_mod_dquots;
-} xfs_dqtrxops_t;
+#ifdef CONFIG_XFS_QUOTA
+extern void xfs_trans_dup_dqinfo(struct xfs_trans *, struct xfs_trans *);
+extern void xfs_trans_free_dqinfo(struct xfs_trans *);
+extern void xfs_trans_mod_dquot_byino(struct xfs_trans *, struct xfs_inode *,
+		uint, long);
+extern void xfs_trans_apply_dquot_deltas(struct xfs_trans *);
+extern void xfs_trans_unreserve_and_mod_dquots(struct xfs_trans *);
+extern int xfs_trans_reserve_quota_nblks(struct xfs_trans *,
+		struct xfs_inode *, long, long, uint);
+extern int xfs_trans_reserve_quota_bydquots(struct xfs_trans *,
+		struct xfs_mount *, struct xfs_dquot *,
+		struct xfs_dquot *, long, long, uint);
 
-#define XFS_DQTRXOP(mp, tp, op, args...) \
-		((mp)->m_qm_ops->xfs_dqtrxops ? \
-		((mp)->m_qm_ops->xfs_dqtrxops->op)(tp, ## args) : 0)
+extern int xfs_qm_vop_dqalloc(struct xfs_inode *, uid_t, gid_t, prid_t, uint,
+		struct xfs_dquot **, struct xfs_dquot **);
+extern void xfs_qm_vop_create_dqattach(struct xfs_trans *, struct xfs_inode *,
+		struct xfs_dquot *, struct xfs_dquot *);
+extern int xfs_qm_vop_rename_dqattach(struct xfs_inode **);
+extern struct xfs_dquot *xfs_qm_vop_chown(struct xfs_trans *,
+		struct xfs_inode *, struct xfs_dquot **, struct xfs_dquot *);
+extern int xfs_qm_vop_chown_reserve(struct xfs_trans *, struct xfs_inode *,
+		struct xfs_dquot *, struct xfs_dquot *, uint);
+extern int xfs_qm_dqattach(struct xfs_inode *, uint);
+extern int xfs_qm_dqattach_locked(struct xfs_inode *, uint);
+extern void xfs_qm_dqdetach(struct xfs_inode *);
+extern void xfs_qm_dqrele(struct xfs_dquot *);
+extern void xfs_qm_statvfs(struct xfs_inode *, struct kstatfs *);
+extern int xfs_qm_sync(struct xfs_mount *, int);
+extern int xfs_qm_newmount(struct xfs_mount *, uint *, uint *);
+extern void xfs_qm_mount_quotas(struct xfs_mount *);
+extern void xfs_qm_unmount(struct xfs_mount *);
+extern void xfs_qm_unmount_quotas(struct xfs_mount *);
 
-#define XFS_DQTRXOP_VOID(mp, tp, op, args...) \
-		((mp)->m_qm_ops->xfs_dqtrxops ? \
-		((mp)->m_qm_ops->xfs_dqtrxops->op)(tp, ## args) : (void)0)
+#else
+#define xfs_trans_dup_dqinfo(tp, tp2)
+#define xfs_trans_free_dqinfo(tp)
+#define xfs_trans_mod_dquot_byino(tp, ip, fields, delta)
+#define xfs_trans_apply_dquot_deltas(tp)
+#define xfs_trans_unreserve_and_mod_dquots(tp)
+#define xfs_trans_reserve_quota_nblks(tp, ip, nblks, ninos, flags)	(0)
+#define xfs_trans_reserve_quota_bydquots(tp, mp, u, g, nb, ni, fl)	(0)
+#define xfs_qm_vop_dqalloc(ip, uid, gid, prid, fl, ou, og)		(0)
+#define xfs_qm_vop_create_dqattach(tp, ip, u, g)
+#define xfs_qm_vop_rename_dqattach(it)					(0)
+#define xfs_qm_vop_chown(tp, ip, old, new)				(NULL)
+#define xfs_qm_vop_chown_reserve(tp, ip, u, g, fl)			(0)
+#define xfs_qm_dqattach(ip, fl)						(0)
+#define xfs_qm_dqattach_locked(ip, fl)					(0)
+#define xfs_qm_dqdetach(ip)
+#define xfs_qm_dqrele(d)
+#define xfs_qm_statvfs(ip, s)
+#define xfs_qm_sync(mp, fl)						(0)
+#define xfs_qm_newmount(mp, a, b)					(0)
+#define xfs_qm_mount_quotas(mp)
+#define xfs_qm_unmount(mp)
+#define xfs_qm_unmount_quotas(mp)					(0)
+#endif /* CONFIG_XFS_QUOTA */
 
-#define XFS_TRANS_DUP_DQINFO(mp, otp, ntp) \
-	XFS_DQTRXOP_VOID(mp, otp, qo_dup_dqinfo, ntp)
-#define XFS_TRANS_FREE_DQINFO(mp, tp) \
-	XFS_DQTRXOP_VOID(mp, tp, qo_free_dqinfo)
-#define XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, field, delta) \
-	XFS_DQTRXOP_VOID(mp, tp, qo_mod_dquot_byino, ip, field, delta)
-#define XFS_TRANS_APPLY_DQUOT_DELTAS(mp, tp) \
-	XFS_DQTRXOP_VOID(mp, tp, qo_apply_dquot_deltas)
-#define XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, nblks, ninos, fl) \
-	XFS_DQTRXOP(mp, tp, qo_reserve_quota_nblks, mp, ip, nblks, ninos, fl)
-#define XFS_TRANS_RESERVE_QUOTA_BYDQUOTS(mp, tp, ud, gd, nb, ni, fl) \
-	XFS_DQTRXOP(mp, tp, qo_reserve_quota_bydquots, mp, ud, gd, nb, ni, fl)
-#define XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(mp, tp) \
-	XFS_DQTRXOP_VOID(mp, tp, qo_unreserve_and_mod_dquots)
-
-#define XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, nblks, ninos, flags) \
-	XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, -(nblks), -(ninos), flags)
-#define XFS_TRANS_RESERVE_QUOTA(mp, tp, ud, gd, nb, ni, f) \
-	XFS_TRANS_RESERVE_QUOTA_BYDQUOTS(mp, tp, ud, gd, nb, ni, \
-				f | XFS_QMOPT_RES_REGBLKS)
-#define XFS_TRANS_UNRESERVE_QUOTA(mp, tp, ud, gd, nb, ni, f) \
-	XFS_TRANS_RESERVE_QUOTA_BYDQUOTS(mp, tp, ud, gd, -(nb), -(ni), \
+#define xfs_trans_unreserve_quota_nblks(tp, ip, nblks, ninos, flags) \
+	xfs_trans_reserve_quota_nblks(tp, ip, -(nblks), -(ninos), flags)
+#define xfs_trans_reserve_quota(tp, mp, ud, gd, nb, ni, f) \
+	xfs_trans_reserve_quota_bydquots(tp, mp, ud, gd, nb, ni, \
 				f | XFS_QMOPT_RES_REGBLKS)
 
 extern int xfs_qm_dqcheck(xfs_disk_dquot_t *, xfs_dqid_t, uint, uint, char *);
 extern int xfs_mount_reset_sbqflags(struct xfs_mount *);
 
-extern struct xfs_qmops xfs_qmcore_xfs;
-
 #endif	/* __KERNEL__ */
-
 #endif	/* __XFS_QUOTA_H__ */
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index 58f85e9cd11d..b81deea0ce19 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -166,7 +166,8 @@ xfs_rename(
 	/*
 	 * Attach the dquots to the inodes
 	 */
-	if ((error = XFS_QM_DQVOPRENAME(mp, inodes))) {
+	error = xfs_qm_vop_rename_dqattach(inodes);
+	if (error) {
 		xfs_trans_cancel(tp, cancel_flags);
 		goto std_return;
 	}
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 8570b826fedd..fffabc0aefcb 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -297,7 +297,7 @@ xfs_trans_dup(
 	tp->t_rtx_res = tp->t_rtx_res_used;
 	ntp->t_pflags = tp->t_pflags;
 
-	XFS_TRANS_DUP_DQINFO(tp->t_mountp, tp, ntp);
+	xfs_trans_dup_dqinfo(tp, ntp);
 
 	atomic_inc(&tp->t_mountp->m_active_trans);
 	return ntp;
@@ -831,7 +831,7 @@ shut_us_down:
 		 * means is that we have some (non-persistent) quota
 		 * reservations that need to be unreserved.
 		 */
-		XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(mp, tp);
+		xfs_trans_unreserve_and_mod_dquots(tp);
 		if (tp->t_ticket) {
 			commit_lsn = xfs_log_done(mp, tp->t_ticket,
 							NULL, log_flags);
@@ -850,10 +850,9 @@ shut_us_down:
 	/*
 	 * If we need to update the superblock, then do it now.
 	 */
-	if (tp->t_flags & XFS_TRANS_SB_DIRTY) {
+	if (tp->t_flags & XFS_TRANS_SB_DIRTY)
 		xfs_trans_apply_sb_deltas(tp);
-	}
-	XFS_TRANS_APPLY_DQUOT_DELTAS(mp, tp);
+	xfs_trans_apply_dquot_deltas(tp);
 
 	/*
 	 * Ask each log item how many log_vector entries it will
@@ -1058,7 +1057,7 @@ xfs_trans_uncommit(
 	}
 
 	xfs_trans_unreserve_and_mod_sb(tp);
-	XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(tp->t_mountp, tp);
+	xfs_trans_unreserve_and_mod_dquots(tp);
 
 	xfs_trans_free_items(tp, flags);
 	xfs_trans_free_busy(tp);
@@ -1183,7 +1182,7 @@ xfs_trans_cancel(
 	}
 #endif
 	xfs_trans_unreserve_and_mod_sb(tp);
-	XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(mp, tp);
+	xfs_trans_unreserve_and_mod_dquots(tp);
 
 	if (tp->t_ticket) {
 		if (flags & XFS_TRANS_RELEASE_LOG_RES) {
@@ -1213,7 +1212,7 @@ xfs_trans_free(
 	xfs_trans_t	*tp)
 {
 	atomic_dec(&tp->t_mountp->m_active_trans);
-	XFS_TRANS_FREE_DQINFO(tp->t_mountp, tp);
+	xfs_trans_free_dqinfo(tp);
 	kmem_zone_free(xfs_trans_zone, tp);
 }
 
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index 79b9e5ea5359..4d88616bde91 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -166,7 +166,7 @@ xfs_dir_ialloc(
 			xfs_buf_relse(ialloc_context);
 			if (dqinfo) {
 				tp->t_dqinfo = dqinfo;
-				XFS_TRANS_FREE_DQINFO(tp->t_mountp, tp);
+				xfs_trans_free_dqinfo(tp);
 			}
 			*tpp = ntp;
 			*ipp = NULL;
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 19cf90a9c762..b56321b2b9f0 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -118,7 +118,7 @@ xfs_setattr(
 		 */
 		ASSERT(udqp == NULL);
 		ASSERT(gdqp == NULL);
-		code = XFS_QM_DQVOPALLOC(mp, ip, uid, gid, ip->i_d.di_projid,
+		code = xfs_qm_vop_dqalloc(ip, uid, gid, ip->i_d.di_projid,
 					 qflags, &udqp, &gdqp);
 		if (code)
 			return code;
@@ -180,10 +180,11 @@ xfs_setattr(
 		 * Do a quota reservation only if uid/gid is actually
 		 * going to change.
 		 */
-		if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
-		    (XFS_IS_GQUOTA_ON(mp) && igid != gid)) {
+		if (XFS_IS_QUOTA_RUNNING(mp) &&
+		    ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
+		     (XFS_IS_GQUOTA_ON(mp) && igid != gid))) {
 			ASSERT(tp);
-			code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp,
+			code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
 						capable(CAP_FOWNER) ?
 						XFS_QMOPT_FORCE_RES : 0);
 			if (code)	/* out of quota */
@@ -217,7 +218,7 @@ xfs_setattr(
 		/*
 		 * Make sure that the dquots are attached to the inode.
 		 */
-		code = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED);
+		code = xfs_qm_dqattach_locked(ip, 0);
 		if (code)
 			goto error_return;
 
@@ -351,21 +352,21 @@ xfs_setattr(
 		 * in the transaction.
 		 */
 		if (iuid != uid) {
-			if (XFS_IS_UQUOTA_ON(mp)) {
+			if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) {
 				ASSERT(mask & ATTR_UID);
 				ASSERT(udqp);
-				olddquot1 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
+				olddquot1 = xfs_qm_vop_chown(tp, ip,
 							&ip->i_udquot, udqp);
 			}
 			ip->i_d.di_uid = uid;
 			inode->i_uid = uid;
 		}
 		if (igid != gid) {
-			if (XFS_IS_GQUOTA_ON(mp)) {
+			if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) {
 				ASSERT(!XFS_IS_PQUOTA_ON(mp));
 				ASSERT(mask & ATTR_GID);
 				ASSERT(gdqp);
-				olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
+				olddquot2 = xfs_qm_vop_chown(tp, ip,
 							&ip->i_gdquot, gdqp);
 			}
 			ip->i_d.di_gid = gid;
@@ -461,10 +462,10 @@ xfs_setattr(
 	/*
 	 * Release any dquot(s) the inode had kept before chown.
 	 */
-	XFS_QM_DQRELE(mp, olddquot1);
-	XFS_QM_DQRELE(mp, olddquot2);
-	XFS_QM_DQRELE(mp, udqp);
-	XFS_QM_DQRELE(mp, gdqp);
+	xfs_qm_dqrele(olddquot1);
+	xfs_qm_dqrele(olddquot2);
+	xfs_qm_dqrele(udqp);
+	xfs_qm_dqrele(gdqp);
 
 	if (code) {
 		return code;
@@ -482,8 +483,8 @@ xfs_setattr(
 	commit_flags |= XFS_TRANS_ABORT;
 	/* FALLTHROUGH */
  error_return:
-	XFS_QM_DQRELE(mp, udqp);
-	XFS_QM_DQRELE(mp, gdqp);
+	xfs_qm_dqrele(udqp);
+	xfs_qm_dqrele(gdqp);
 	if (tp) {
 		xfs_trans_cancel(tp, commit_flags);
 	}
@@ -739,7 +740,8 @@ xfs_free_eofblocks(
 		/*
 		 * Attach the dquots to the inode up front.
 		 */
-		if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
+		error = xfs_qm_dqattach(ip, 0);
+		if (error)
 			return error;
 
 		/*
@@ -1181,7 +1183,8 @@ xfs_inactive(
 
 	ASSERT(ip->i_d.di_nlink == 0);
 
-	if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
+	error = xfs_qm_dqattach(ip, 0);
+	if (error)
 		return VN_INACTIVE_CACHE;
 
 	tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
@@ -1307,7 +1310,7 @@ xfs_inactive(
 		/*
 		 * Credit the quota account(s). The inode is gone.
 		 */
-		XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_ICOUNT, -1);
+		xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1);
 
 		/*
 		 * Just ignore errors at this point.  There is nothing we can
@@ -1323,11 +1326,11 @@ xfs_inactive(
 			xfs_fs_cmn_err(CE_NOTE, mp, "xfs_inactive: "
 				"xfs_trans_commit() returned error %d", error);
 	}
+
 	/*
 	 * Release the dquots held by inode, if any.
 	 */
-	XFS_QM_DQDETACH(mp, ip);
-
+	xfs_qm_dqdetach(ip);
 	xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
 
  out:
@@ -1427,8 +1430,7 @@ xfs_create(
 	/*
 	 * Make sure that we have allocated dquot(s) on disk.
 	 */
-	error = XFS_QM_DQVOPALLOC(mp, dp,
-			current_fsuid(), current_fsgid(), prid,
+	error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid,
 			XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
 	if (error)
 		goto std_return;
@@ -1489,7 +1491,7 @@ xfs_create(
 	/*
 	 * Reserve disk quota and the inode.
 	 */
-	error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0);
+	error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, resblks, 1, 0);
 	if (error)
 		goto out_trans_cancel;
 
@@ -1561,7 +1563,7 @@ xfs_create(
 	 * These ids of the inode couldn't have changed since the new
 	 * inode has been locked ever since it was created.
 	 */
-	XFS_QM_DQVOPCREATE(mp, tp, ip, udqp, gdqp);
+	xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp);
 
 	/*
 	 * xfs_trans_commit normally decrements the vnode ref count
@@ -1580,8 +1582,8 @@ xfs_create(
 		goto out_dqrele;
 	}
 
-	XFS_QM_DQRELE(mp, udqp);
-	XFS_QM_DQRELE(mp, gdqp);
+	xfs_qm_dqrele(udqp);
+	xfs_qm_dqrele(gdqp);
 
 	*ipp = ip;
 
@@ -1602,8 +1604,8 @@ xfs_create(
  out_trans_cancel:
 	xfs_trans_cancel(tp, cancel_flags);
  out_dqrele:
-	XFS_QM_DQRELE(mp, udqp);
-	XFS_QM_DQRELE(mp, gdqp);
+	xfs_qm_dqrele(udqp);
+	xfs_qm_dqrele(gdqp);
 
 	if (unlock_dp_on_error)
 		xfs_iunlock(dp, XFS_ILOCK_EXCL);
@@ -1837,11 +1839,11 @@ xfs_remove(
 			return error;
 	}
 
-	error = XFS_QM_DQATTACH(mp, dp, 0);
+	error = xfs_qm_dqattach(dp, 0);
 	if (error)
 		goto std_return;
 
-	error = XFS_QM_DQATTACH(mp, ip, 0);
+	error = xfs_qm_dqattach(ip, 0);
 	if (error)
 		goto std_return;
 
@@ -2028,11 +2030,11 @@ xfs_link(
 
 	/* Return through std_return after this point. */
 
-	error = XFS_QM_DQATTACH(mp, sip, 0);
+	error = xfs_qm_dqattach(sip, 0);
 	if (error)
 		goto std_return;
 
-	error = XFS_QM_DQATTACH(mp, tdp, 0);
+	error = xfs_qm_dqattach(tdp, 0);
 	if (error)
 		goto std_return;
 
@@ -2205,8 +2207,7 @@ xfs_symlink(
 	/*
 	 * Make sure that we have allocated dquot(s) on disk.
 	 */
-	error = XFS_QM_DQVOPALLOC(mp, dp,
-			current_fsuid(), current_fsgid(), prid,
+	error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid,
 			XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
 	if (error)
 		goto std_return;
@@ -2248,7 +2249,7 @@ xfs_symlink(
 	/*
 	 * Reserve disk quota : blocks and inode.
 	 */
-	error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0);
+	error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, resblks, 1, 0);
 	if (error)
 		goto error_return;
 
@@ -2288,7 +2289,7 @@ xfs_symlink(
 	/*
 	 * Also attach the dquot(s) to it, if applicable.
 	 */
-	XFS_QM_DQVOPCREATE(mp, tp, ip, udqp, gdqp);
+	xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp);
 
 	if (resblks)
 		resblks -= XFS_IALLOC_SPACE_RES(mp);
@@ -2376,8 +2377,8 @@ xfs_symlink(
 		goto error2;
 	}
 	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-	XFS_QM_DQRELE(mp, udqp);
-	XFS_QM_DQRELE(mp, gdqp);
+	xfs_qm_dqrele(udqp);
+	xfs_qm_dqrele(gdqp);
 
 	/* Fall through to std_return with error = 0 or errno from
 	 * xfs_trans_commit	*/
@@ -2401,8 +2402,8 @@ std_return:
 	cancel_flags |= XFS_TRANS_ABORT;
  error_return:
 	xfs_trans_cancel(tp, cancel_flags);
-	XFS_QM_DQRELE(mp, udqp);
-	XFS_QM_DQRELE(mp, gdqp);
+	xfs_qm_dqrele(udqp);
+	xfs_qm_dqrele(gdqp);
 
 	if (unlock_dp_on_error)
 		xfs_iunlock(dp, XFS_ILOCK_EXCL);
@@ -2541,7 +2542,8 @@ xfs_alloc_file_space(
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return XFS_ERROR(EIO);
 
-	if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
+	error = xfs_qm_dqattach(ip, 0);
+	if (error)
 		return error;
 
 	if (len <= 0)
@@ -2628,8 +2630,8 @@ retry:
 			break;
 		}
 		xfs_ilock(ip, XFS_ILOCK_EXCL);
-		error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip,
-						      qblocks, 0, quota_flag);
+		error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks,
+						      0, quota_flag);
 		if (error)
 			goto error1;
 
@@ -2688,7 +2690,7 @@ dmapi_enospc_check:
 
 error0:	/* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
 	xfs_bmap_cancel(&free_list);
-	XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag);
+	xfs_trans_unreserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
 
 error1:	/* Just cancel transaction */
 	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
@@ -2827,7 +2829,8 @@ xfs_free_file_space(
 
 	xfs_itrace_entry(ip);
 
-	if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
+	error = xfs_qm_dqattach(ip, 0);
+	if (error)
 		return error;
 
 	error = 0;
@@ -2953,9 +2956,9 @@ xfs_free_file_space(
 			break;
 		}
 		xfs_ilock(ip, XFS_ILOCK_EXCL);
-		error = XFS_TRANS_RESERVE_QUOTA(mp, tp,
-				ip->i_udquot, ip->i_gdquot, resblks, 0,
-				XFS_QMOPT_RES_REGBLKS);
+		error = xfs_trans_reserve_quota(tp, mp,
+				ip->i_udquot, ip->i_gdquot,
+				resblks, 0, XFS_QMOPT_RES_REGBLKS);
 		if (error)
 			goto error1;
 

From 5a34d5cd096310133f9208db294021208a96660d Mon Sep 17 00:00:00 2001
From: Dave Chinner <david@fromorbit.com>
Date: Mon, 8 Jun 2009 15:35:03 +0200
Subject: [PATCH 11/23] xfs: split inode data writeback from xfs_sync_inodes_ag

In many cases we only want to sync inode data. Start spliting the inode sync
into data sync and inode sync by factoring out the inode data flush.

[hch: minor cleanups]

Signed-off-by: Dave Chinner <david@fromorbit.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Sandeen <sandeen@sandeen.net>
---
 fs/xfs/linux-2.6/xfs_sync.c | 52 +++++++++++++++++++++++--------------
 1 file changed, 32 insertions(+), 20 deletions(-)

diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index b06b95c154cc..7adc62dd14bb 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -48,6 +48,35 @@
 #include <linux/kthread.h>
 #include <linux/freezer.h>
 
+
+STATIC int
+xfs_sync_inode_data(
+	struct xfs_inode	*ip,
+	int			flags)
+{
+	struct inode		*inode = VFS_I(ip);
+	struct address_space *mapping = inode->i_mapping;
+	int			error = 0;
+
+	if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
+		goto out_wait;
+
+	if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) {
+		if (flags & SYNC_TRYLOCK)
+			goto out_wait;
+		xfs_ilock(ip, XFS_IOLOCK_SHARED);
+	}
+
+	error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ?
+				0 : XFS_B_ASYNC, FI_NONE);
+	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+
+ out_wait:
+	if (flags & SYNC_IOWAIT)
+		xfs_ioend_wait(ip);
+	return error;
+}
+
 /*
  * Sync all the inodes in the given AG according to the
  * direction given by the flags.
@@ -123,27 +152,10 @@ xfs_sync_inodes_ag(
 		 * If we have to flush data or wait for I/O completion
 		 * we need to hold the iolock.
 		 */
-		if (flags & SYNC_DELWRI) {
-			if (VN_DIRTY(inode)) {
-				if (flags & SYNC_TRYLOCK) {
-					if (xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
-						lock_flags |= XFS_IOLOCK_SHARED;
-				} else {
-					xfs_ilock(ip, XFS_IOLOCK_SHARED);
-					lock_flags |= XFS_IOLOCK_SHARED;
-				}
-				if (lock_flags & XFS_IOLOCK_SHARED) {
-					error = xfs_flush_pages(ip, 0, -1,
-							(flags & SYNC_WAIT) ? 0
-								: XFS_B_ASYNC,
-							FI_NONE);
-				}
-			}
-			if (VN_CACHED(inode) && (flags & SYNC_IOWAIT))
-				xfs_ioend_wait(ip);
-		}
-		xfs_ilock(ip, XFS_ILOCK_SHARED);
+		if (flags & SYNC_DELWRI)
+			error = xfs_sync_inode_data(ip, flags);
 
+		xfs_ilock(ip, XFS_ILOCK_SHARED);
 		if ((flags & SYNC_ATTR) && !xfs_inode_clean(ip)) {
 			if (flags & SYNC_WAIT) {
 				xfs_iflock(ip);

From 845b6d0cbbc2304e8a54ed4038272c55f85b2269 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 Jun 2009 15:35:05 +0200
Subject: [PATCH 12/23] xfs: split inode flushing from xfs_sync_inodes_ag

In many cases we only want to sync inode metadata. Split out the inode
flushing into a separate helper to prepare factoring the inode sync code.

Based on a patch from Dave Chinner, but redone to keep the current behaviour
exactly and leave changes to the flushing logic to another patch.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Sandeen <sandeen@sandeen.net>
---
 fs/xfs/linux-2.6/xfs_sync.c | 50 ++++++++++++++++++++++++-------------
 1 file changed, 33 insertions(+), 17 deletions(-)

diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 7adc62dd14bb..1712caa12017 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -77,6 +77,35 @@ xfs_sync_inode_data(
 	return error;
 }
 
+STATIC int
+xfs_sync_inode_attr(
+	struct xfs_inode	*ip,
+	int			flags)
+{
+	int			error = 0;
+
+	xfs_ilock(ip, XFS_ILOCK_SHARED);
+	if (xfs_inode_clean(ip))
+		goto out_unlock;
+	if (!xfs_iflock_nowait(ip)) {
+		if (!(flags & SYNC_WAIT))
+			goto out_unlock;
+		xfs_iflock(ip);
+	}
+
+	if (xfs_inode_clean(ip)) {
+		xfs_ifunlock(ip);
+		goto out_unlock;
+	}
+
+	error = xfs_iflush(ip, (flags & SYNC_WAIT) ?
+			   XFS_IFLUSH_SYNC : XFS_IFLUSH_DELWRI);
+
+ out_unlock:
+	xfs_iunlock(ip, XFS_ILOCK_SHARED);
+	return error;
+}
+
 /*
  * Sync all the inodes in the given AG according to the
  * direction given by the flags.
@@ -96,7 +125,6 @@ xfs_sync_inodes_ag(
 	do {
 		struct inode	*inode;
 		xfs_inode_t	*ip = NULL;
-		int		lock_flags = XFS_ILOCK_SHARED;
 
 		/*
 		 * use a gang lookup to find the next inode in the tree
@@ -155,22 +183,10 @@ xfs_sync_inodes_ag(
 		if (flags & SYNC_DELWRI)
 			error = xfs_sync_inode_data(ip, flags);
 
-		xfs_ilock(ip, XFS_ILOCK_SHARED);
-		if ((flags & SYNC_ATTR) && !xfs_inode_clean(ip)) {
-			if (flags & SYNC_WAIT) {
-				xfs_iflock(ip);
-				if (!xfs_inode_clean(ip))
-					error = xfs_iflush(ip, XFS_IFLUSH_SYNC);
-				else
-					xfs_ifunlock(ip);
-			} else if (xfs_iflock_nowait(ip)) {
-				if (!xfs_inode_clean(ip))
-					error = xfs_iflush(ip, XFS_IFLUSH_DELWRI);
-				else
-					xfs_ifunlock(ip);
-			}
-		}
-		xfs_iput(ip, lock_flags);
+		if (flags & SYNC_ATTR)
+			error = xfs_sync_inode_attr(ip, flags);
+
+		IRELE(ip);
 
 		if (error)
 			last_error = error;

From 1da8eecab5f866b4f5be43adbaadf18e259a8cc5 Mon Sep 17 00:00:00 2001
From: Dave Chinner <david@fromorbit.com>
Date: Mon, 8 Jun 2009 15:35:07 +0200
Subject: [PATCH 13/23] xfs: factor out inode validation for sync

Separate the validation of inodes found by the radix
tree walk from the radix tree lookup.

Signed-off-by: Dave Chinner <david@fromorbit.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Sandeen <sandeen@sandeen.net>
---
 fs/xfs/linux-2.6/xfs_sync.c | 59 +++++++++++++++++++++++--------------
 1 file changed, 37 insertions(+), 22 deletions(-)

diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 1712caa12017..c91d5b2a568e 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -49,6 +49,39 @@
 #include <linux/freezer.h>
 
 
+/* must be called with pag_ici_lock held and releases it */
+STATIC int
+xfs_sync_inode_valid(
+	struct xfs_inode	*ip,
+	struct xfs_perag	*pag)
+{
+	struct inode		*inode = VFS_I(ip);
+
+	/* nothing to sync during shutdown */
+	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+		read_unlock(&pag->pag_ici_lock);
+		return EFSCORRUPTED;
+	}
+
+	/*
+	 * If we can't get a reference on the inode, it must be in reclaim.
+	 * Leave it for the reclaim code to flush. Also avoid inodes that
+	 * haven't been fully initialised.
+	 */
+	if (!igrab(inode)) {
+		read_unlock(&pag->pag_ici_lock);
+		return ENOENT;
+	}
+	read_unlock(&pag->pag_ici_lock);
+
+	if (is_bad_inode(inode) || xfs_iflags_test(ip, XFS_INEW)) {
+		IRELE(ip);
+		return ENOENT;
+	}
+
+	return 0;
+}
+
 STATIC int
 xfs_sync_inode_data(
 	struct xfs_inode	*ip,
@@ -123,7 +156,6 @@ xfs_sync_inodes_ag(
 	int		last_error = 0;
 
 	do {
-		struct inode	*inode;
 		xfs_inode_t	*ip = NULL;
 
 		/*
@@ -152,27 +184,10 @@ xfs_sync_inodes_ag(
 			break;
 		}
 
-		/* nothing to sync during shutdown */
-		if (XFS_FORCED_SHUTDOWN(mp)) {
-			read_unlock(&pag->pag_ici_lock);
-			return 0;
-		}
-
-		/*
-		 * If we can't get a reference on the inode, it must be
-		 * in reclaim. Leave it for the reclaim code to flush.
-		 */
-		inode = VFS_I(ip);
-		if (!igrab(inode)) {
-			read_unlock(&pag->pag_ici_lock);
-			continue;
-		}
-		read_unlock(&pag->pag_ici_lock);
-
-		/* avoid new or bad inodes */
-		if (is_bad_inode(inode) ||
-		    xfs_iflags_test(ip, XFS_INEW)) {
-			IRELE(ip);
+		error = xfs_sync_inode_valid(ip, pag);
+		if (error) {
+			if (error == EFSCORRUPTED)
+				return 0;
 			continue;
 		}
 

From abc1064742604e60a47a65fa3214dc1a84db093d Mon Sep 17 00:00:00 2001
From: Dave Chinner <david@fromorbit.com>
Date: Mon, 8 Jun 2009 15:35:12 +0200
Subject: [PATCH 14/23] xfs: remove unused parameter from xfs_reclaim_inodes

The noblock parameter of xfs_reclaim_inodes is only ever set to zero. Remove
it and all the conditional code that is never executed.

Signed-off-by: Dave Chinner <david@fromorbit.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Sandeen <sandeen@sandeen.net>
---
 fs/xfs/linux-2.6/xfs_sync.c | 22 ++++------------------
 fs/xfs/linux-2.6/xfs_sync.h |  2 +-
 fs/xfs/xfs_mount.c          |  2 +-
 3 files changed, 6 insertions(+), 20 deletions(-)

diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index c91d5b2a568e..7eb9d9cca52a 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -385,7 +385,7 @@ xfs_quiesce_fs(
 	int	count = 0, pincount;
 
 	xfs_flush_buftarg(mp->m_ddev_targp, 0);
-	xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
+	xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
 
 	/*
 	 * This loop must run at least twice.  The first instance of the loop
@@ -509,7 +509,7 @@ xfs_sync_worker(
 
 	if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
 		xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
-		xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
+		xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
 		/* dgc: errors ignored here */
 		error = xfs_qm_sync(mp, SYNC_BDFLUSH);
 		error = xfs_sync_fsdata(mp, SYNC_BDFLUSH);
@@ -703,7 +703,6 @@ STATIC void
 xfs_reclaim_inodes_ag(
 	xfs_mount_t	*mp,
 	int		ag,
-	int		noblock,
 	int		mode)
 {
 	xfs_inode_t	*ip = NULL;
@@ -749,25 +748,13 @@ restart:
 			continue;
 		}
 
-		if (noblock) {
-			if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
-				read_unlock(&pag->pag_ici_lock);
-				continue;
-			}
-			if (xfs_ipincount(ip) ||
-			    !xfs_iflock_nowait(ip)) {
-				xfs_iunlock(ip, XFS_ILOCK_EXCL);
-				read_unlock(&pag->pag_ici_lock);
-				continue;
-			}
-		}
 		read_unlock(&pag->pag_ici_lock);
 
 		/*
 		 * hmmm - this is an inode already in reclaim. Do
 		 * we even bother catching it here?
 		 */
-		if (xfs_reclaim_inode(ip, noblock, mode))
+		if (xfs_reclaim_inode(ip, 0, mode))
 			skipped++;
 	} while (nr_found);
 
@@ -782,7 +769,6 @@ restart:
 int
 xfs_reclaim_inodes(
 	xfs_mount_t	*mp,
-	int		 noblock,
 	int		mode)
 {
 	int		i;
@@ -790,7 +776,7 @@ xfs_reclaim_inodes(
 	for (i = 0; i < mp->m_sb.sb_agcount; i++) {
 		if (!mp->m_perag[i].pag_ici_init)
 			continue;
-		xfs_reclaim_inodes_ag(mp, i, noblock, mode);
+		xfs_reclaim_inodes_ag(mp, i, mode);
 	}
 	return 0;
 }
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index 308d5bf6dfbd..d7b2b5f1c387 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -48,7 +48,7 @@ void xfs_quiesce_attr(struct xfs_mount *mp);
 void xfs_flush_inodes(struct xfs_inode *ip);
 
 int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode);
-int xfs_reclaim_inodes(struct xfs_mount *mp, int noblock, int mode);
+int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
 
 void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
 void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index b659db5e7165..5c6f092659c1 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1371,7 +1371,7 @@ xfs_unmountfs(
 	 * need to force the log first.
 	 */
 	xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC);
-	xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_ASYNC);
+	xfs_reclaim_inodes(mp, XFS_IFLUSH_ASYNC);
 
 	xfs_qm_unmount(mp);
 

From 75f3cb1393133682958db6f157e1b6473e5a366b Mon Sep 17 00:00:00 2001
From: Dave Chinner <david@fromorbit.com>
Date: Mon, 8 Jun 2009 15:35:14 +0200
Subject: [PATCH 15/23] xfs: introduce a per-ag inode iterator

Given that we walk across the per-ag inode lists so often, it makes sense to
introduce an iterator for this.

Convert the sync and reclaim code to use this new iterator, quota code will
follow in the next patch.

Also change xfs_reclaim_inode to return -EGAIN instead of 1 for an inode
already under reclaim.  This simplifies the AG iterator and doesn't
matter for the only other caller.

[hch: merged the lookup and execute callbacks back into one to get the
 pag_ici_lock locking correct and simplify the code flow]

Signed-off-by: Dave Chinner <david@fromorbit.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Sandeen <sandeen@sandeen.net>
---
 fs/xfs/linux-2.6/xfs_sync.c | 316 +++++++++++++++++-------------------
 fs/xfs/xfs_ag.h             |   2 +
 2 files changed, 152 insertions(+), 166 deletions(-)

diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 7eb9d9cca52a..9798643feb3b 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -49,6 +49,123 @@
 #include <linux/freezer.h>
 
 
+STATIC xfs_inode_t *
+xfs_inode_ag_lookup(
+	struct xfs_mount	*mp,
+	struct xfs_perag	*pag,
+	uint32_t		*first_index,
+	int			tag)
+{
+	int			nr_found;
+	struct xfs_inode	*ip;
+
+	/*
+	 * use a gang lookup to find the next inode in the tree
+	 * as the tree is sparse and a gang lookup walks to find
+	 * the number of objects requested.
+	 */
+	read_lock(&pag->pag_ici_lock);
+	if (tag == XFS_ICI_NO_TAG) {
+		nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
+				(void **)&ip, *first_index, 1);
+	} else {
+		nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root,
+				(void **)&ip, *first_index, 1, tag);
+	}
+	if (!nr_found)
+		goto unlock;
+
+	/*
+	 * Update the index for the next lookup. Catch overflows
+	 * into the next AG range which can occur if we have inodes
+	 * in the last block of the AG and we are currently
+	 * pointing to the last inode.
+	 */
+	*first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+	if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
+		goto unlock;
+
+	return ip;
+
+unlock:
+	read_unlock(&pag->pag_ici_lock);
+	return NULL;
+}
+
+STATIC int
+xfs_inode_ag_walk(
+	struct xfs_mount	*mp,
+	xfs_agnumber_t		ag,
+	int			(*execute)(struct xfs_inode *ip,
+					   struct xfs_perag *pag, int flags),
+	int			flags,
+	int			tag)
+{
+	struct xfs_perag	*pag = &mp->m_perag[ag];
+	uint32_t		first_index;
+	int			last_error = 0;
+	int			skipped;
+
+restart:
+	skipped = 0;
+	first_index = 0;
+	do {
+		int		error = 0;
+		xfs_inode_t	*ip;
+
+		ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag);
+		if (!ip)
+			break;
+
+		error = execute(ip, pag, flags);
+		if (error == EAGAIN) {
+			skipped++;
+			continue;
+		}
+		if (error)
+			last_error = error;
+		/*
+		 * bail out if the filesystem is corrupted.
+		 */
+		if (error == EFSCORRUPTED)
+			break;
+
+	} while (1);
+
+	if (skipped) {
+		delay(1);
+		goto restart;
+	}
+
+	xfs_put_perag(mp, pag);
+	return last_error;
+}
+
+STATIC int
+xfs_inode_ag_iterator(
+	struct xfs_mount	*mp,
+	int			(*execute)(struct xfs_inode *ip,
+					   struct xfs_perag *pag, int flags),
+	int			flags,
+	int			tag)
+{
+	int			error = 0;
+	int			last_error = 0;
+	xfs_agnumber_t		ag;
+
+	for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
+		if (!mp->m_perag[ag].pag_ici_init)
+			continue;
+		error = xfs_inode_ag_walk(mp, ag, execute, flags, tag);
+		if (error) {
+			last_error = error;
+			if (error == EFSCORRUPTED)
+				break;
+		}
+	}
+	return XFS_ERROR(last_error);
+}
+
 /* must be called with pag_ici_lock held and releases it */
 STATIC int
 xfs_sync_inode_valid(
@@ -85,12 +202,17 @@ xfs_sync_inode_valid(
 STATIC int
 xfs_sync_inode_data(
 	struct xfs_inode	*ip,
+	struct xfs_perag	*pag,
 	int			flags)
 {
 	struct inode		*inode = VFS_I(ip);
 	struct address_space *mapping = inode->i_mapping;
 	int			error = 0;
 
+	error = xfs_sync_inode_valid(ip, pag);
+	if (error)
+		return error;
+
 	if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
 		goto out_wait;
 
@@ -107,16 +229,22 @@ xfs_sync_inode_data(
  out_wait:
 	if (flags & SYNC_IOWAIT)
 		xfs_ioend_wait(ip);
+	IRELE(ip);
 	return error;
 }
 
 STATIC int
 xfs_sync_inode_attr(
 	struct xfs_inode	*ip,
+	struct xfs_perag	*pag,
 	int			flags)
 {
 	int			error = 0;
 
+	error = xfs_sync_inode_valid(ip, pag);
+	if (error)
+		return error;
+
 	xfs_ilock(ip, XFS_ILOCK_SHARED);
 	if (xfs_inode_clean(ip))
 		goto out_unlock;
@@ -136,117 +264,33 @@ xfs_sync_inode_attr(
 
  out_unlock:
 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
+	IRELE(ip);
 	return error;
 }
 
-/*
- * Sync all the inodes in the given AG according to the
- * direction given by the flags.
- */
-STATIC int
-xfs_sync_inodes_ag(
-	xfs_mount_t	*mp,
-	int		ag,
-	int		flags)
-{
-	xfs_perag_t	*pag = &mp->m_perag[ag];
-	int		nr_found;
-	uint32_t	first_index = 0;
-	int		error = 0;
-	int		last_error = 0;
-
-	do {
-		xfs_inode_t	*ip = NULL;
-
-		/*
-		 * use a gang lookup to find the next inode in the tree
-		 * as the tree is sparse and a gang lookup walks to find
-		 * the number of objects requested.
-		 */
-		read_lock(&pag->pag_ici_lock);
-		nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
-				(void**)&ip, first_index, 1);
-
-		if (!nr_found) {
-			read_unlock(&pag->pag_ici_lock);
-			break;
-		}
-
-		/*
-		 * Update the index for the next lookup. Catch overflows
-		 * into the next AG range which can occur if we have inodes
-		 * in the last block of the AG and we are currently
-		 * pointing to the last inode.
-		 */
-		first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
-		if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) {
-			read_unlock(&pag->pag_ici_lock);
-			break;
-		}
-
-		error = xfs_sync_inode_valid(ip, pag);
-		if (error) {
-			if (error == EFSCORRUPTED)
-				return 0;
-			continue;
-		}
-
-		/*
-		 * If we have to flush data or wait for I/O completion
-		 * we need to hold the iolock.
-		 */
-		if (flags & SYNC_DELWRI)
-			error = xfs_sync_inode_data(ip, flags);
-
-		if (flags & SYNC_ATTR)
-			error = xfs_sync_inode_attr(ip, flags);
-
-		IRELE(ip);
-
-		if (error)
-			last_error = error;
-		/*
-		 * bail out if the filesystem is corrupted.
-		 */
-		if (error == EFSCORRUPTED)
-			return XFS_ERROR(error);
-
-	} while (nr_found);
-
-	return last_error;
-}
-
 int
 xfs_sync_inodes(
 	xfs_mount_t	*mp,
 	int		flags)
 {
-	int		error;
-	int		last_error;
-	int		i;
+	int		error = 0;
 	int		lflags = XFS_LOG_FORCE;
 
 	if (mp->m_flags & XFS_MOUNT_RDONLY)
 		return 0;
-	error = 0;
-	last_error = 0;
 
 	if (flags & SYNC_WAIT)
 		lflags |= XFS_LOG_SYNC;
 
-	for (i = 0; i < mp->m_sb.sb_agcount; i++) {
-		if (!mp->m_perag[i].pag_ici_init)
-			continue;
-		error = xfs_sync_inodes_ag(mp, i, flags);
-		if (error)
-			last_error = error;
-		if (error == EFSCORRUPTED)
-			break;
-	}
 	if (flags & SYNC_DELWRI)
-		xfs_log_force(mp, 0, lflags);
+		error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags, XFS_ICI_NO_TAG);
 
-	return XFS_ERROR(last_error);
+	if (flags & SYNC_ATTR)
+		error = xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags, XFS_ICI_NO_TAG);
+
+	if (!error && (flags & SYNC_DELWRI))
+		xfs_log_force(mp, 0, lflags);
+	return XFS_ERROR(error);
 }
 
 STATIC int
@@ -613,7 +657,7 @@ xfs_reclaim_inode(
 			xfs_ifunlock(ip);
 			xfs_iunlock(ip, XFS_ILOCK_EXCL);
 		}
-		return 1;
+		return -EAGAIN;
 	}
 	__xfs_iflags_set(ip, XFS_IRECLAIM);
 	spin_unlock(&ip->i_flags_lock);
@@ -698,72 +742,20 @@ xfs_inode_clear_reclaim_tag(
 	xfs_put_perag(mp, pag);
 }
 
-
-STATIC void
-xfs_reclaim_inodes_ag(
-	xfs_mount_t	*mp,
-	int		ag,
-	int		mode)
+STATIC int
+xfs_reclaim_inode_now(
+	struct xfs_inode	*ip,
+	struct xfs_perag	*pag,
+	int			flags)
 {
-	xfs_inode_t	*ip = NULL;
-	xfs_perag_t	*pag = &mp->m_perag[ag];
-	int		nr_found;
-	uint32_t	first_index;
-	int		skipped;
-
-restart:
-	first_index = 0;
-	skipped = 0;
-	do {
-		/*
-		 * use a gang lookup to find the next inode in the tree
-		 * as the tree is sparse and a gang lookup walks to find
-		 * the number of objects requested.
-		 */
-		read_lock(&pag->pag_ici_lock);
-		nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root,
-					(void**)&ip, first_index, 1,
-					XFS_ICI_RECLAIM_TAG);
-
-		if (!nr_found) {
-			read_unlock(&pag->pag_ici_lock);
-			break;
-		}
-
-		/*
-		 * Update the index for the next lookup. Catch overflows
-		 * into the next AG range which can occur if we have inodes
-		 * in the last block of the AG and we are currently
-		 * pointing to the last inode.
-		 */
-		first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
-		if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) {
-			read_unlock(&pag->pag_ici_lock);
-			break;
-		}
-
-		/* ignore if already under reclaim */
-		if (xfs_iflags_test(ip, XFS_IRECLAIM)) {
-			read_unlock(&pag->pag_ici_lock);
-			continue;
-		}
-
+	/* ignore if already under reclaim */
+	if (xfs_iflags_test(ip, XFS_IRECLAIM)) {
 		read_unlock(&pag->pag_ici_lock);
-
-		/*
-		 * hmmm - this is an inode already in reclaim. Do
-		 * we even bother catching it here?
-		 */
-		if (xfs_reclaim_inode(ip, 0, mode))
-			skipped++;
-	} while (nr_found);
-
-	if (skipped) {
-		delay(1);
-		goto restart;
+		return 0;
 	}
-	return;
+	read_unlock(&pag->pag_ici_lock);
 
+	return xfs_reclaim_inode(ip, 0, flags);
 }
 
 int
@@ -771,14 +763,6 @@ xfs_reclaim_inodes(
 	xfs_mount_t	*mp,
 	int		mode)
 {
-	int		i;
-
-	for (i = 0; i < mp->m_sb.sb_agcount; i++) {
-		if (!mp->m_perag[i].pag_ici_init)
-			continue;
-		xfs_reclaim_inodes_ag(mp, i, mode);
-	}
-	return 0;
+	return xfs_inode_ag_iterator(mp, xfs_reclaim_inode_now, mode,
+					XFS_ICI_RECLAIM_TAG);
 }
-
-
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index c8641f713caa..f24b50b68d03 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -212,6 +212,8 @@ typedef struct xfs_perag
 /*
  * tags for inode radix tree
  */
+#define XFS_ICI_NO_TAG		(-1)	/* special flag for an untagged lookup
+					   in xfs_inode_ag_iterator */
 #define XFS_ICI_RECLAIM_TAG	0	/* inode is to be reclaimed */
 
 #define	XFS_AG_MAXLEVELS(mp)		((mp)->m_ag_maxlevels)

From fe588ed32867b42e0d906db558ca92fd9f8b128e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 Jun 2009 15:35:27 +0200
Subject: [PATCH 16/23] xfs: use generic inode iterator in
 xfs_qm_dqrele_all_inodes

Use xfs_inode_ag_iterator instead of opencoding the inode walk in the
quota code.  Mark xfs_inode_ag_iterator and xfs_sync_inode_valid non-static
to allow using them from the quota code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Reviewed-by: Eric Sandeen <sandeen@sandeen.net>
---
 fs/xfs/linux-2.6/xfs_sync.c    |   4 +-
 fs/xfs/linux-2.6/xfs_sync.h    |   6 ++
 fs/xfs/quota/xfs_qm_syscalls.c | 112 +++++++++------------------------
 3 files changed, 39 insertions(+), 83 deletions(-)

diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 9798643feb3b..a3d2e7713068 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -141,7 +141,7 @@ restart:
 	return last_error;
 }
 
-STATIC int
+int
 xfs_inode_ag_iterator(
 	struct xfs_mount	*mp,
 	int			(*execute)(struct xfs_inode *ip,
@@ -167,7 +167,7 @@ xfs_inode_ag_iterator(
 }
 
 /* must be called with pag_ici_lock held and releases it */
-STATIC int
+int
 xfs_sync_inode_valid(
 	struct xfs_inode	*ip,
 	struct xfs_perag	*pag)
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index d7b2b5f1c387..9bb1253a4023 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -54,4 +54,10 @@ void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
 void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip);
 void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
 				struct xfs_inode *ip);
+
+int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag);
+int xfs_inode_ag_iterator(struct xfs_mount *mp,
+	int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
+	int flags, int tag);
+
 #endif
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index c7b66f6506ce..7126f855e14b 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -847,105 +847,55 @@ xfs_qm_export_flags(
 }
 
 
-/*
- * Release all the dquots on the inodes in an AG.
- */
-STATIC void
-xfs_qm_dqrele_inodes_ag(
-	xfs_mount_t	*mp,
-	int		ag,
-	uint		flags)
+STATIC int
+xfs_dqrele_inode(
+	struct xfs_inode	*ip,
+	struct xfs_perag	*pag,
+	int			flags)
 {
-	xfs_inode_t	*ip = NULL;
-	xfs_perag_t	*pag = &mp->m_perag[ag];
-	int		first_index = 0;
-	int		nr_found;
+	int			error;
 
-	do {
-		/*
-		 * use a gang lookup to find the next inode in the tree
-		 * as the tree is sparse and a gang lookup walks to find
-		 * the number of objects requested.
-		 */
-		read_lock(&pag->pag_ici_lock);
-		nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
-				(void**)&ip, first_index, 1);
-
-		if (!nr_found) {
-			read_unlock(&pag->pag_ici_lock);
-			break;
-		}
-
-		/*
-		 * Update the index for the next lookup. Catch overflows
-		 * into the next AG range which can occur if we have inodes
-		 * in the last block of the AG and we are currently
-		 * pointing to the last inode.
-		 */
-		first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
-		if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) {
-			read_unlock(&pag->pag_ici_lock);
-			break;
-		}
-
-		/* skip quota inodes */
-		if (ip == XFS_QI_UQIP(mp) || ip == XFS_QI_GQIP(mp)) {
-			ASSERT(ip->i_udquot == NULL);
-			ASSERT(ip->i_gdquot == NULL);
-			read_unlock(&pag->pag_ici_lock);
-			continue;
-		}
-
-		/*
-		 * If we can't get a reference on the inode, it must be
-		 * in reclaim. Leave it for the reclaim code to flush.
-		 */
-		if (!igrab(VFS_I(ip))) {
-			read_unlock(&pag->pag_ici_lock);
-			continue;
-		}
+	/* skip quota inodes */
+	if (ip == XFS_QI_UQIP(ip->i_mount) || ip == XFS_QI_GQIP(ip->i_mount)) {
+		ASSERT(ip->i_udquot == NULL);
+		ASSERT(ip->i_gdquot == NULL);
 		read_unlock(&pag->pag_ici_lock);
+		return 0;
+	}
 
-		/* avoid new inodes though we shouldn't find any here */
-		if (xfs_iflags_test(ip, XFS_INEW)) {
-			IRELE(ip);
-			continue;
-		}
+	error = xfs_sync_inode_valid(ip, pag);
+	if (error)
+		return error;
 
-		xfs_ilock(ip, XFS_ILOCK_EXCL);
-		if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) {
-			xfs_qm_dqrele(ip->i_udquot);
-			ip->i_udquot = NULL;
-		}
-		if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) &&
-		    ip->i_gdquot) {
-			xfs_qm_dqrele(ip->i_gdquot);
-			ip->i_gdquot = NULL;
-		}
-		xfs_iput(ip, XFS_ILOCK_EXCL);
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
+	if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) {
+		xfs_qm_dqrele(ip->i_udquot);
+		ip->i_udquot = NULL;
+	}
+	if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && ip->i_gdquot) {
+		xfs_qm_dqrele(ip->i_gdquot);
+		ip->i_gdquot = NULL;
+	}
+	xfs_iput(ip, XFS_ILOCK_EXCL);
+	IRELE(ip);
 
-	} while (nr_found);
+	return 0;
 }
 
+
 /*
  * Go thru all the inodes in the file system, releasing their dquots.
+ *
  * Note that the mount structure gets modified to indicate that quotas are off
- * AFTER this, in the case of quotaoff. This also gets called from
- * xfs_rootumount.
+ * AFTER this, in the case of quotaoff.
  */
 void
 xfs_qm_dqrele_all_inodes(
 	struct xfs_mount *mp,
 	uint		 flags)
 {
-	int		i;
-
 	ASSERT(mp->m_quotainfo);
-	for (i = 0; i < mp->m_sb.sb_agcount; i++) {
-		if (!mp->m_perag[i].pag_ici_init)
-			continue;
-		xfs_qm_dqrele_inodes_ag(mp, i, flags);
-	}
+	xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG);
 }
 
 /*------------------------------------------------------------------------*/

From 075fe1028699f6a280545dfc2cfc5ac82d555c8c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 Jun 2009 15:35:48 +0200
Subject: [PATCH 17/23] xfs: split xfs_sync_inodes

xfs_sync_inodes is used to write back either file data or inode metadata.
In general we always do these separately, except for one fishy case in
xfs_fs_put_super that does both.  So separate xfs_sync_inodes into
separate xfs_sync_data and xfs_sync_attr functions.  In xfs_fs_put_super
we first call the data sync and then the attr sync as that was the previous
order.  The moved log force in that path doesn't make a difference because
we will force the log again as part of the real unmount process.

The filesystem readonly checks are not performed by the new function but
instead moved into the callers, given that most callers alredy have it
further up in the stack.  Also add debug checks that we do not pass in
incorrect flags in the new xfs_sync_data and xfs_sync_attr function and
fix the one place that did pass in a wrong flag.

Also remove a comment mentioning xfs_sync_inodes that has been incorrect
for a while because we always take either the iolock or ilock in the
sync path these days.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Sandeen <sandeen@sandeen.net>
---
 fs/xfs/linux-2.6/xfs_quotaops.c |  4 ++-
 fs/xfs/linux-2.6/xfs_super.c    | 13 +++++++-
 fs/xfs/linux-2.6/xfs_sync.c     | 55 ++++++++++++++++++++-------------
 fs/xfs/linux-2.6/xfs_sync.h     |  5 ++-
 fs/xfs/xfs_filestream.c         |  6 ++--
 5 files changed, 53 insertions(+), 30 deletions(-)

diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c
index 94d9a633d3d9..cb6e2cca214f 100644
--- a/fs/xfs/linux-2.6/xfs_quotaops.c
+++ b/fs/xfs/linux-2.6/xfs_quotaops.c
@@ -50,9 +50,11 @@ xfs_fs_quota_sync(
 {
 	struct xfs_mount	*mp = XFS_M(sb);
 
+	if (sb->s_flags & MS_RDONLY)
+		return -EROFS;
 	if (!XFS_IS_QUOTA_RUNNING(mp))
 		return -ENOSYS;
-	return -xfs_sync_inodes(mp, SYNC_DELWRI);
+	return -xfs_sync_data(mp, 0);
 }
 
 STATIC int
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 0d9b64b219e0..d4e7ef8f8df9 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1071,7 +1071,18 @@ xfs_fs_put_super(
 	int			unmount_event_flags = 0;
 
 	xfs_syncd_stop(mp);
-	xfs_sync_inodes(mp, SYNC_ATTR|SYNC_DELWRI);
+
+	if (!(sb->s_flags & MS_RDONLY)) {
+		/*
+		 * XXX(hch): this should be SYNC_WAIT.
+		 *
+		 * Or more likely not needed at all because the VFS is already
+		 * calling ->sync_fs after shutting down all filestem
+		 * operations and just before calling ->put_super.
+		 */
+		xfs_sync_data(mp, 0);
+		xfs_sync_attr(mp, 0);
+	}
 
 #ifdef HAVE_DMAPI
 	if (mp->m_flags & XFS_MOUNT_DMAPI) {
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index a3d2e7713068..c1a9a1135073 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -268,29 +268,42 @@ xfs_sync_inode_attr(
 	return error;
 }
 
+/*
+ * Write out pagecache data for the whole filesystem.
+ */
 int
-xfs_sync_inodes(
-	xfs_mount_t	*mp,
-	int		flags)
+xfs_sync_data(
+	struct xfs_mount	*mp,
+	int			flags)
 {
-	int		error = 0;
-	int		lflags = XFS_LOG_FORCE;
+	int			error;
 
-	if (mp->m_flags & XFS_MOUNT_RDONLY)
-		return 0;
+	ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT|SYNC_IOWAIT)) == 0);
 
-	if (flags & SYNC_WAIT)
-		lflags |= XFS_LOG_SYNC;
+	error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
+				      XFS_ICI_NO_TAG);
+	if (error)
+		return XFS_ERROR(error);
 
-	if (flags & SYNC_DELWRI)
-		error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags, XFS_ICI_NO_TAG);
+	xfs_log_force(mp, 0,
+		      (flags & SYNC_WAIT) ?
+		       XFS_LOG_FORCE | XFS_LOG_SYNC :
+		       XFS_LOG_FORCE);
+	return 0;
+}
 
-	if (flags & SYNC_ATTR)
-		error = xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags, XFS_ICI_NO_TAG);
+/*
+ * Write out inode metadata (attributes) for the whole filesystem.
+ */
+int
+xfs_sync_attr(
+	struct xfs_mount	*mp,
+	int			flags)
+{
+	ASSERT((flags & ~SYNC_WAIT) == 0);
 
-	if (!error && (flags & SYNC_DELWRI))
-		xfs_log_force(mp, 0, lflags);
-	return XFS_ERROR(error);
+	return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags,
+				     XFS_ICI_NO_TAG);
 }
 
 STATIC int
@@ -404,12 +417,12 @@ xfs_quiesce_data(
 	int error;
 
 	/* push non-blocking */
-	xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_BDFLUSH);
+	xfs_sync_data(mp, 0);
 	xfs_qm_sync(mp, SYNC_BDFLUSH);
 	xfs_filestream_flush(mp);
 
 	/* push and block */
-	xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_WAIT|SYNC_IOWAIT);
+	xfs_sync_data(mp, SYNC_WAIT|SYNC_IOWAIT);
 	xfs_qm_sync(mp, SYNC_WAIT);
 
 	/* write superblock and hoover up shutdown errors */
@@ -438,7 +451,7 @@ xfs_quiesce_fs(
 	 * logged before we can write the unmount record.
 	 */
 	do {
-		xfs_sync_inodes(mp, SYNC_ATTR|SYNC_WAIT);
+		xfs_sync_attr(mp, SYNC_WAIT);
 		pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
 		if (!pincount) {
 			delay(50);
@@ -521,8 +534,8 @@ xfs_flush_inodes_work(
 	void		*arg)
 {
 	struct inode	*inode = arg;
-	xfs_sync_inodes(mp, SYNC_DELWRI | SYNC_TRYLOCK);
-	xfs_sync_inodes(mp, SYNC_DELWRI | SYNC_TRYLOCK | SYNC_IOWAIT);
+	xfs_sync_data(mp, SYNC_TRYLOCK);
+	xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_IOWAIT);
 	iput(inode);
 }
 
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index 9bb1253a4023..26bfb5c42e76 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -29,8 +29,6 @@ typedef struct xfs_sync_work {
 	struct completion	*w_completion;
 } xfs_sync_work_t;
 
-#define SYNC_ATTR		0x0001	/* sync attributes */
-#define SYNC_DELWRI		0x0002	/* look at delayed writes */
 #define SYNC_WAIT		0x0004	/* wait for i/o to complete */
 #define SYNC_BDFLUSH		0x0008	/* BDFLUSH is calling -- don't block */
 #define SYNC_IOWAIT		0x0010  /* wait for all I/O to complete */
@@ -39,7 +37,8 @@ typedef struct xfs_sync_work {
 int xfs_syncd_init(struct xfs_mount *mp);
 void xfs_syncd_stop(struct xfs_mount *mp);
 
-int xfs_sync_inodes(struct xfs_mount *mp, int flags);
+int xfs_sync_attr(struct xfs_mount *mp, int flags);
+int xfs_sync_data(struct xfs_mount *mp, int flags);
 int xfs_sync_fsdata(struct xfs_mount *mp, int flags);
 
 int xfs_quiesce_data(struct xfs_mount *mp);
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 6c87c8f304ef..edf8bdf4141f 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -542,10 +542,8 @@ xfs_filestream_associate(
 	 * waiting for the lock because someone else is waiting on the lock we
 	 * hold and we cannot drop that as we are in a transaction here.
 	 *
-	 * Lucky for us, this inversion is rarely a problem because it's a
-	 * directory inode that we are trying to lock here and that means the
-	 * only place that matters is xfs_sync_inodes() and SYNC_DELWRI is
-	 * used. i.e. freeze, remount-ro, quotasync or unmount.
+	 * Lucky for us, this inversion is not a problem because it's a
+	 * directory inode that we are trying to lock here.
 	 *
 	 * So, if we can't get the iolock without sleeping then just give up
 	 */

From b0710ccc6d9fa8fb908b5f6d1b0782a09d80e24f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 Jun 2009 15:37:11 +0200
Subject: [PATCH 18/23] xfs: remove SYNC_IOWAIT

We want to wait for all I/O to finish when we do data integrity syncs.  So
there is no reason to keep SYNC_WAIT separate from SYNC_IOWAIT.  This
causes a little change in behaviour for the ENOSPC flushing code which now
does a second submission and wait of buffered I/O, but that should finish
ASAP as we already did an asynchronous writeout earlier.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Reviewed-by: Eric Sandeen <sandeen@sandeen.net>
---
 fs/xfs/linux-2.6/xfs_sync.c | 8 ++++----
 fs/xfs/linux-2.6/xfs_sync.h | 1 -
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index c1a9a1135073..32abd96b1095 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -227,7 +227,7 @@ xfs_sync_inode_data(
 	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
 
  out_wait:
-	if (flags & SYNC_IOWAIT)
+	if (flags & SYNC_WAIT)
 		xfs_ioend_wait(ip);
 	IRELE(ip);
 	return error;
@@ -278,7 +278,7 @@ xfs_sync_data(
 {
 	int			error;
 
-	ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT|SYNC_IOWAIT)) == 0);
+	ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
 
 	error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
 				      XFS_ICI_NO_TAG);
@@ -422,7 +422,7 @@ xfs_quiesce_data(
 	xfs_filestream_flush(mp);
 
 	/* push and block */
-	xfs_sync_data(mp, SYNC_WAIT|SYNC_IOWAIT);
+	xfs_sync_data(mp, SYNC_WAIT);
 	xfs_qm_sync(mp, SYNC_WAIT);
 
 	/* write superblock and hoover up shutdown errors */
@@ -535,7 +535,7 @@ xfs_flush_inodes_work(
 {
 	struct inode	*inode = arg;
 	xfs_sync_data(mp, SYNC_TRYLOCK);
-	xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_IOWAIT);
+	xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT);
 	iput(inode);
 }
 
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index 26bfb5c42e76..bda33a03e12b 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -31,7 +31,6 @@ typedef struct xfs_sync_work {
 
 #define SYNC_WAIT		0x0004	/* wait for i/o to complete */
 #define SYNC_BDFLUSH		0x0008	/* BDFLUSH is calling -- don't block */
-#define SYNC_IOWAIT		0x0010  /* wait for all I/O to complete */
 #define SYNC_TRYLOCK		0x0020  /* only try to lock inodes */
 
 int xfs_syncd_init(struct xfs_mount *mp);

From 8b5403a6d772d340541cfb30a668fde119c40ac1 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 8 Jun 2009 15:37:16 +0200
Subject: [PATCH 19/23] xfs: remove SYNC_BDFLUSH

SYNC_BDFLUSH is a leftover from IRIX and rather misnamed for todays
code.  Make xfs_sync_fsdata and xfs_dq_sync use the SYNC_TRYLOCK flag
for not blocking on logs just as the inode sync code already does.

For xfs_sync_fsdata it's a trivial 1:1 replacement, but for xfs_qm_sync
I use the opportunity to decouple the non-blocking lock case from the
different flushing modes, similar to the inode sync code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Sandeen <sandeen@sandeen.net>
---
 fs/xfs/linux-2.6/xfs_sync.c |  8 ++++----
 fs/xfs/linux-2.6/xfs_sync.h |  5 ++---
 fs/xfs/quota/xfs_qm.c       | 28 +++++-----------------------
 3 files changed, 11 insertions(+), 30 deletions(-)

diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 32abd96b1095..b619d6b8ca43 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -353,7 +353,7 @@ xfs_sync_fsdata(
 	 * If this is xfssyncd() then only sync the superblock if we can
 	 * lock it without sleeping and it is not pinned.
 	 */
-	if (flags & SYNC_BDFLUSH) {
+	if (flags & SYNC_TRYLOCK) {
 		ASSERT(!(flags & SYNC_WAIT));
 
 		bp = xfs_getsb(mp, XFS_BUF_TRYLOCK);
@@ -418,7 +418,7 @@ xfs_quiesce_data(
 
 	/* push non-blocking */
 	xfs_sync_data(mp, 0);
-	xfs_qm_sync(mp, SYNC_BDFLUSH);
+	xfs_qm_sync(mp, SYNC_TRYLOCK);
 	xfs_filestream_flush(mp);
 
 	/* push and block */
@@ -568,8 +568,8 @@ xfs_sync_worker(
 		xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
 		xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
 		/* dgc: errors ignored here */
-		error = xfs_qm_sync(mp, SYNC_BDFLUSH);
-		error = xfs_sync_fsdata(mp, SYNC_BDFLUSH);
+		error = xfs_qm_sync(mp, SYNC_TRYLOCK);
+		error = xfs_sync_fsdata(mp, SYNC_TRYLOCK);
 		if (xfs_log_need_covered(mp))
 			error = xfs_commit_dummy_trans(mp, XFS_LOG_FORCE);
 	}
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index bda33a03e12b..2a10301c99c7 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -29,9 +29,8 @@ typedef struct xfs_sync_work {
 	struct completion	*w_completion;
 } xfs_sync_work_t;
 
-#define SYNC_WAIT		0x0004	/* wait for i/o to complete */
-#define SYNC_BDFLUSH		0x0008	/* BDFLUSH is calling -- don't block */
-#define SYNC_TRYLOCK		0x0020  /* only try to lock inodes */
+#define SYNC_WAIT		0x0001	/* wait for i/o to complete */
+#define SYNC_TRYLOCK		0x0002  /* only try to lock inodes */
 
 int xfs_syncd_init(struct xfs_mount *mp);
 void xfs_syncd_stop(struct xfs_mount *mp);
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index aa5d8212661c..22b7c8d364e8 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -905,11 +905,6 @@ xfs_qm_dqdetach(
 	}
 }
 
-/*
- * This is called to sync quotas. We can be told to use non-blocking
- * semantics by either the SYNC_BDFLUSH flag or the absence of the
- * SYNC_WAIT flag.
- */
 int
 xfs_qm_sync(
 	xfs_mount_t	*mp,
@@ -918,17 +913,13 @@ xfs_qm_sync(
 	int		recl, restarts;
 	xfs_dquot_t	*dqp;
 	uint		flush_flags;
-	boolean_t	nowait;
 	int		error;
 
 	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
 		return 0;
 
+	flush_flags = (flags & SYNC_WAIT) ? XFS_QMOPT_SYNC : XFS_QMOPT_DELWRI;
 	restarts = 0;
-	/*
-	 * We won't block unless we are asked to.
-	 */
-	nowait = (boolean_t)(flags & SYNC_BDFLUSH || (flags & SYNC_WAIT) == 0);
 
   again:
 	xfs_qm_mplist_lock(mp);
@@ -948,18 +939,10 @@ xfs_qm_sync(
 		 * don't 'seem' to be dirty. ie. don't acquire dqlock.
 		 * This is very similar to what xfs_sync does with inodes.
 		 */
-		if (flags & SYNC_BDFLUSH) {
-			if (! XFS_DQ_IS_DIRTY(dqp))
+		if (flags & SYNC_TRYLOCK) {
+			if (!XFS_DQ_IS_DIRTY(dqp))
 				continue;
-		}
-
-		if (nowait) {
-			/*
-			 * Try to acquire the dquot lock. We are NOT out of
-			 * lock order, but we just don't want to wait for this
-			 * lock, unless somebody wanted us to.
-			 */
-			if (! xfs_qm_dqlock_nowait(dqp))
+			if (!xfs_qm_dqlock_nowait(dqp))
 				continue;
 		} else {
 			xfs_dqlock(dqp);
@@ -976,7 +959,7 @@ xfs_qm_sync(
 		/* XXX a sentinel would be better */
 		recl = XFS_QI_MPLRECLAIMS(mp);
 		if (!xfs_dqflock_nowait(dqp)) {
-			if (nowait) {
+			if (flags & SYNC_TRYLOCK) {
 				xfs_dqunlock(dqp);
 				continue;
 			}
@@ -994,7 +977,6 @@ xfs_qm_sync(
 		 * Let go of the mplist lock. We don't want to hold it
 		 * across a disk write
 		 */
-		flush_flags = (nowait) ? XFS_QMOPT_DELWRI : XFS_QMOPT_SYNC;
 		xfs_qm_mplist_unlock(mp);
 		xfs_dqtrace_entry(dqp, "XQM_SYNC: DQFLUSH");
 		error = xfs_qm_dqflush(dqp, flush_flags);

From ef14f0c1578dce4b688726eb2603e50b62d6665a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 10 Jun 2009 17:07:47 +0200
Subject: [PATCH 20/23] xfs: use generic Posix ACL code

This patch rips out the XFS ACL handling code and uses the generic
fs/posix_acl.c code instead.  The ondisk format is of course left
unchanged.

This also introduces the same ACL caching all other Linux filesystems do
by adding pointers to the acl and default acl in struct xfs_inode.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Sandeen <sandeen@sandeen.net>
---
 fs/xfs/Kconfig                 |   1 +
 fs/xfs/Makefile                |   2 +-
 fs/xfs/linux-2.6/xfs_acl.c     | 523 ++++++++++++++++++++
 fs/xfs/linux-2.6/xfs_ioctl.c   |   1 -
 fs/xfs/linux-2.6/xfs_iops.c    |  53 +-
 fs/xfs/linux-2.6/xfs_lrw.c     |   1 -
 fs/xfs/linux-2.6/xfs_super.c   |  14 -
 fs/xfs/linux-2.6/xfs_xattr.c   |  67 +--
 fs/xfs/quota/xfs_dquot.c       |   1 -
 fs/xfs/quota/xfs_dquot_item.c  |   1 -
 fs/xfs/quota/xfs_qm.c          |   1 -
 fs/xfs/quota/xfs_qm_bhv.c      |   1 -
 fs/xfs/quota/xfs_qm_stats.c    |   1 -
 fs/xfs/quota/xfs_qm_syscalls.c |   1 -
 fs/xfs/quota/xfs_trans_dquot.c |   1 -
 fs/xfs/xfs_acl.c               | 874 ---------------------------------
 fs/xfs/xfs_acl.h               |  97 ++--
 fs/xfs/xfs_arch.h              |  32 --
 fs/xfs/xfs_attr.c              |   1 -
 fs/xfs/xfs_iget.c              |   3 +
 fs/xfs/xfs_inode.c             |   1 -
 fs/xfs/xfs_inode.h             |   6 +
 fs/xfs/xfs_iomap.c             |   1 -
 fs/xfs/xfs_rw.c                |   1 -
 fs/xfs/xfs_vnodeops.c          |  15 +-
 fs/xfs/xfs_vnodeops.h          |   1 +
 26 files changed, 598 insertions(+), 1103 deletions(-)
 create mode 100644 fs/xfs/linux-2.6/xfs_acl.c
 delete mode 100644 fs/xfs/xfs_acl.c

diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 29228f5899cd..480f28127f09 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -39,6 +39,7 @@ config XFS_QUOTA
 config XFS_POSIX_ACL
 	bool "XFS POSIX ACL support"
 	depends on XFS_FS
+	select FS_POSIX_ACL
 	help
 	  POSIX Access Control Lists (ACLs) support permissions for users and
 	  groups beyond the owner/group/world scheme.
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 222c59e541ce..7a59daed1782 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -40,7 +40,7 @@ xfs-$(CONFIG_PROC_FS)		+= quota/xfs_qm_stats.o
 endif
 
 xfs-$(CONFIG_XFS_RT)		+= xfs_rtalloc.o
-xfs-$(CONFIG_XFS_POSIX_ACL)	+= xfs_acl.o
+xfs-$(CONFIG_XFS_POSIX_ACL)	+= $(XFS_LINUX)/xfs_acl.o
 xfs-$(CONFIG_PROC_FS)		+= $(XFS_LINUX)/xfs_stats.o
 xfs-$(CONFIG_SYSCTL)		+= $(XFS_LINUX)/xfs_sysctl.o
 xfs-$(CONFIG_COMPAT)		+= $(XFS_LINUX)/xfs_ioctl32.o
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
new file mode 100644
index 000000000000..1e9d1246eebc
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -0,0 +1,523 @@
+/*
+ * Copyright (c) 2008, Christoph Hellwig
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_acl.h"
+#include "xfs_attr.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_vnodeops.h"
+#include <linux/xattr.h>
+#include <linux/posix_acl_xattr.h>
+
+
+#define XFS_ACL_NOT_CACHED	((void *)-1)
+
+/*
+ * Locking scheme:
+ *  - all ACL updates are protected by inode->i_mutex, which is taken before
+ *    calling into this file.
+ *  - access and updates to the ip->i_acl and ip->i_default_acl pointers are
+ *    protected by inode->i_lock.
+ */
+
+STATIC struct posix_acl *
+xfs_acl_from_disk(struct xfs_acl *aclp)
+{
+	struct posix_acl_entry *acl_e;
+	struct posix_acl *acl;
+	struct xfs_acl_entry *ace;
+	int count, i;
+
+	count = be32_to_cpu(aclp->acl_cnt);
+
+	acl = posix_acl_alloc(count, GFP_KERNEL);
+	if (!acl)
+		return ERR_PTR(-ENOMEM);
+
+	for (i = 0; i < count; i++) {
+		acl_e = &acl->a_entries[i];
+		ace = &aclp->acl_entry[i];
+
+		/*
+		 * The tag is 32 bits on disk and 16 bits in core.
+		 *
+		 * Because every access to it goes through the core
+		 * format first this is not a problem.
+		 */
+		acl_e->e_tag = be32_to_cpu(ace->ae_tag);
+		acl_e->e_perm = be16_to_cpu(ace->ae_perm);
+
+		switch (acl_e->e_tag) {
+		case ACL_USER:
+		case ACL_GROUP:
+			acl_e->e_id = be32_to_cpu(ace->ae_id);
+			break;
+		case ACL_USER_OBJ:
+		case ACL_GROUP_OBJ:
+		case ACL_MASK:
+		case ACL_OTHER:
+			acl_e->e_id = ACL_UNDEFINED_ID;
+			break;
+		default:
+			goto fail;
+		}
+	}
+	return acl;
+
+fail:
+	posix_acl_release(acl);
+	return ERR_PTR(-EINVAL);
+}
+
+STATIC void
+xfs_acl_to_disk(struct xfs_acl *aclp, const struct posix_acl *acl)
+{
+	const struct posix_acl_entry *acl_e;
+	struct xfs_acl_entry *ace;
+	int i;
+
+	aclp->acl_cnt = cpu_to_be32(acl->a_count);
+	for (i = 0; i < acl->a_count; i++) {
+		ace = &aclp->acl_entry[i];
+		acl_e = &acl->a_entries[i];
+
+		ace->ae_tag = cpu_to_be32(acl_e->e_tag);
+		ace->ae_id = cpu_to_be32(acl_e->e_id);
+		ace->ae_perm = cpu_to_be16(acl_e->e_perm);
+	}
+}
+
+/*
+ * Update the cached ACL pointer in the inode.
+ *
+ * Because we don't hold any locks while reading/writing the attribute
+ * from/to disk another thread could have raced and updated the cached
+ * ACL value before us. In that case we release the previous cached value
+ * and update it with our new value.
+ */
+STATIC void
+xfs_update_cached_acl(struct inode *inode, struct posix_acl **p_acl,
+		struct posix_acl *acl)
+{
+	spin_lock(&inode->i_lock);
+	if (*p_acl && *p_acl != XFS_ACL_NOT_CACHED)
+		posix_acl_release(*p_acl);
+	*p_acl = posix_acl_dup(acl);
+	spin_unlock(&inode->i_lock);
+}
+
+struct posix_acl *
+xfs_get_acl(struct inode *inode, int type)
+{
+	struct xfs_inode *ip = XFS_I(inode);
+	struct posix_acl *acl = NULL, **p_acl;
+	struct xfs_acl *xfs_acl;
+	int len = sizeof(struct xfs_acl);
+	char *ea_name;
+	int error;
+
+	switch (type) {
+	case ACL_TYPE_ACCESS:
+		ea_name = SGI_ACL_FILE;
+		p_acl = &ip->i_acl;
+		break;
+	case ACL_TYPE_DEFAULT:
+		ea_name = SGI_ACL_DEFAULT;
+		p_acl = &ip->i_default_acl;
+		break;
+	default:
+		return ERR_PTR(-EINVAL);
+	}
+
+	spin_lock(&inode->i_lock);
+	if (*p_acl != XFS_ACL_NOT_CACHED)
+		acl = posix_acl_dup(*p_acl);
+	spin_unlock(&inode->i_lock);
+
+	/*
+	 * If we have a cached ACLs value just return it, not need to
+	 * go out to the disk.
+	 */
+	if (acl)
+		return acl;
+
+	xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL);
+	if (!xfs_acl)
+		return ERR_PTR(-ENOMEM);
+
+	error = -xfs_attr_get(ip, ea_name, (char *)xfs_acl, &len, ATTR_ROOT);
+	if (error) {
+		/*
+		 * If the attribute doesn't exist make sure we have a negative
+		 * cache entry, for any other error assume it is transient and
+		 * leave the cache entry as XFS_ACL_NOT_CACHED.
+		 */
+		if (error == -ENOATTR) {
+			acl = NULL;
+			goto out_update_cache;
+		}
+		goto out;
+	}
+
+	acl = xfs_acl_from_disk(xfs_acl);
+	if (IS_ERR(acl))
+		goto out;
+
+ out_update_cache:
+	xfs_update_cached_acl(inode, p_acl, acl);
+ out:
+	kfree(xfs_acl);
+	return acl;
+}
+
+STATIC int
+xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
+{
+	struct xfs_inode *ip = XFS_I(inode);
+	struct posix_acl **p_acl;
+	char *ea_name;
+	int error;
+
+	if (S_ISLNK(inode->i_mode))
+		return -EOPNOTSUPP;
+
+	switch (type) {
+	case ACL_TYPE_ACCESS:
+		ea_name = SGI_ACL_FILE;
+		p_acl = &ip->i_acl;
+		break;
+	case ACL_TYPE_DEFAULT:
+		if (!S_ISDIR(inode->i_mode))
+			return acl ? -EACCES : 0;
+		ea_name = SGI_ACL_DEFAULT;
+		p_acl = &ip->i_default_acl;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (acl) {
+		struct xfs_acl *xfs_acl;
+		int len;
+
+		xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL);
+		if (!xfs_acl)
+			return -ENOMEM;
+
+		xfs_acl_to_disk(xfs_acl, acl);
+		len = sizeof(struct xfs_acl) -
+			(sizeof(struct xfs_acl_entry) *
+			 (XFS_ACL_MAX_ENTRIES - acl->a_count));
+
+		error = -xfs_attr_set(ip, ea_name, (char *)xfs_acl,
+				len, ATTR_ROOT);
+
+		kfree(xfs_acl);
+	} else {
+		/*
+		 * A NULL ACL argument means we want to remove the ACL.
+		 */
+		error = -xfs_attr_remove(ip, ea_name, ATTR_ROOT);
+
+		/*
+		 * If the attribute didn't exist to start with that's fine.
+		 */
+		if (error == -ENOATTR)
+			error = 0;
+	}
+
+	if (!error)
+		xfs_update_cached_acl(inode, p_acl, acl);
+	return error;
+}
+
+int
+xfs_check_acl(struct inode *inode, int mask)
+{
+	struct xfs_inode *ip = XFS_I(inode);
+	struct posix_acl *acl;
+	int error = -EAGAIN;
+
+	xfs_itrace_entry(ip);
+
+	/*
+	 * If there is no attribute fork no ACL exists on this inode and
+	 * we can skip the whole exercise.
+	 */
+	if (!XFS_IFORK_Q(ip))
+		return -EAGAIN;
+
+	acl = xfs_get_acl(inode, ACL_TYPE_ACCESS);
+	if (IS_ERR(acl))
+		return PTR_ERR(acl);
+	if (acl) {
+		error = posix_acl_permission(inode, acl, mask);
+		posix_acl_release(acl);
+	}
+
+	return error;
+}
+
+static int
+xfs_set_mode(struct inode *inode, mode_t mode)
+{
+	int error = 0;
+
+	if (mode != inode->i_mode) {
+		struct iattr iattr;
+
+		iattr.ia_valid = ATTR_MODE;
+		iattr.ia_mode = mode;
+
+		error = -xfs_setattr(XFS_I(inode), &iattr, XFS_ATTR_NOACL);
+	}
+
+	return error;
+}
+
+static int
+xfs_acl_exists(struct inode *inode, char *name)
+{
+	int len = sizeof(struct xfs_acl);
+
+	return (xfs_attr_get(XFS_I(inode), name, NULL, &len,
+			    ATTR_ROOT|ATTR_KERNOVAL) == 0);
+}
+
+int
+posix_acl_access_exists(struct inode *inode)
+{
+	return xfs_acl_exists(inode, SGI_ACL_FILE);
+}
+
+int
+posix_acl_default_exists(struct inode *inode)
+{
+	if (!S_ISDIR(inode->i_mode))
+		return 0;
+	return xfs_acl_exists(inode, SGI_ACL_DEFAULT);
+}
+
+/*
+ * No need for i_mutex because the inode is not yet exposed to the VFS.
+ */
+int
+xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl)
+{
+	struct posix_acl *clone;
+	mode_t mode;
+	int error = 0, inherit = 0;
+
+	if (S_ISDIR(inode->i_mode)) {
+		error = xfs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl);
+		if (error)
+			return error;
+	}
+
+	clone = posix_acl_clone(default_acl, GFP_KERNEL);
+	if (!clone)
+		return -ENOMEM;
+
+	mode = inode->i_mode;
+	error = posix_acl_create_masq(clone, &mode);
+	if (error < 0)
+		goto out_release_clone;
+
+	/*
+	 * If posix_acl_create_masq returns a positive value we need to
+	 * inherit a permission that can't be represented using the Unix
+	 * mode bits and we actually need to set an ACL.
+	 */
+	if (error > 0)
+		inherit = 1;
+
+	error = xfs_set_mode(inode, mode);
+	if (error)
+		goto out_release_clone;
+
+	if (inherit)
+		error = xfs_set_acl(inode, ACL_TYPE_ACCESS, clone);
+
+ out_release_clone:
+	posix_acl_release(clone);
+	return error;
+}
+
+int
+xfs_acl_chmod(struct inode *inode)
+{
+	struct posix_acl *acl, *clone;
+	int error;
+
+	if (S_ISLNK(inode->i_mode))
+		return -EOPNOTSUPP;
+
+	acl = xfs_get_acl(inode, ACL_TYPE_ACCESS);
+	if (IS_ERR(acl) || !acl)
+		return PTR_ERR(acl);
+
+	clone = posix_acl_clone(acl, GFP_KERNEL);
+	posix_acl_release(acl);
+	if (!clone)
+		return -ENOMEM;
+
+	error = posix_acl_chmod_masq(clone, inode->i_mode);
+	if (!error)
+		error = xfs_set_acl(inode, ACL_TYPE_ACCESS, clone);
+
+	posix_acl_release(clone);
+	return error;
+}
+
+void
+xfs_inode_init_acls(struct xfs_inode *ip)
+{
+	/*
+	 * No need for locking, inode is not live yet.
+	 */
+	ip->i_acl = XFS_ACL_NOT_CACHED;
+	ip->i_default_acl = XFS_ACL_NOT_CACHED;
+}
+
+void
+xfs_inode_clear_acls(struct xfs_inode *ip)
+{
+	/*
+	 * No need for locking here, the inode is not live anymore
+	 * and just about to be freed.
+	 */
+	if (ip->i_acl != XFS_ACL_NOT_CACHED)
+		posix_acl_release(ip->i_acl);
+	if (ip->i_default_acl != XFS_ACL_NOT_CACHED)
+		posix_acl_release(ip->i_default_acl);
+}
+
+
+/*
+ * System xattr handlers.
+ *
+ * Currently Posix ACLs are the only system namespace extended attribute
+ * handlers supported by XFS, so we just implement the handlers here.
+ * If we ever support other system extended attributes this will need
+ * some refactoring.
+ */
+
+static int
+xfs_decode_acl(const char *name)
+{
+	if (strcmp(name, "posix_acl_access") == 0)
+		return ACL_TYPE_ACCESS;
+	else if (strcmp(name, "posix_acl_default") == 0)
+		return ACL_TYPE_DEFAULT;
+	return -EINVAL;
+}
+
+static int
+xfs_xattr_system_get(struct inode *inode, const char *name,
+		void *value, size_t size)
+{
+	struct posix_acl *acl;
+	int type, error;
+
+	type = xfs_decode_acl(name);
+	if (type < 0)
+		return type;
+
+	acl = xfs_get_acl(inode, type);
+	if (IS_ERR(acl))
+		return PTR_ERR(acl);
+	if (acl == NULL)
+		return -ENODATA;
+
+	error = posix_acl_to_xattr(acl, value, size);
+	posix_acl_release(acl);
+
+	return error;
+}
+
+static int
+xfs_xattr_system_set(struct inode *inode, const char *name,
+		const void *value, size_t size, int flags)
+{
+	struct posix_acl *acl = NULL;
+	int error = 0, type;
+
+	type = xfs_decode_acl(name);
+	if (type < 0)
+		return type;
+	if (flags & XATTR_CREATE)
+		return -EINVAL;
+	if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
+		return value ? -EACCES : 0;
+	if ((current_fsuid() != inode->i_uid) && !capable(CAP_FOWNER))
+		return -EPERM;
+
+	if (!value)
+		goto set_acl;
+
+	acl = posix_acl_from_xattr(value, size);
+	if (!acl) {
+		/*
+		 * acl_set_file(3) may request that we set default ACLs with
+		 * zero length -- defend (gracefully) against that here.
+		 */
+		goto out;
+	}
+	if (IS_ERR(acl)) {
+		error = PTR_ERR(acl);
+		goto out;
+	}
+
+	error = posix_acl_valid(acl);
+	if (error)
+		goto out_release;
+
+	error = -EINVAL;
+	if (acl->a_count > XFS_ACL_MAX_ENTRIES)
+		goto out_release;
+
+	if (type == ACL_TYPE_ACCESS) {
+		mode_t mode = inode->i_mode;
+		error = posix_acl_equiv_mode(acl, &mode);
+
+		if (error <= 0) {
+			posix_acl_release(acl);
+			acl = NULL;
+
+			if (error < 0)
+				return error;
+		}
+
+		error = xfs_set_mode(inode, mode);
+		if (error)
+			goto out_release;
+	}
+
+ set_acl:
+	error = xfs_set_acl(inode, type, acl);
+ out_release:
+	posix_acl_release(acl);
+ out:
+	return error;
+}
+
+struct xattr_handler xfs_xattr_system_handler = {
+	.prefix	= XATTR_SYSTEM_PREFIX,
+	.get	= xfs_xattr_system_get,
+	.set	= xfs_xattr_system_set,
+};
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index c7d684f02f89..182d4be9ffab 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -41,7 +41,6 @@
 #include "xfs_itable.h"
 #include "xfs_error.h"
 #include "xfs_rw.h"
-#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_bmap.h"
 #include "xfs_buf_item.h"
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 631d0137551e..84e391ab70b0 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -17,6 +17,7 @@
  */
 #include "xfs.h"
 #include "xfs_fs.h"
+#include "xfs_acl.h"
 #include "xfs_bit.h"
 #include "xfs_log.h"
 #include "xfs_inum.h"
@@ -51,6 +52,7 @@
 #include <linux/capability.h>
 #include <linux/xattr.h>
 #include <linux/namei.h>
+#include <linux/posix_acl.h>
 #include <linux/security.h>
 #include <linux/falloc.h>
 #include <linux/fiemap.h>
@@ -202,9 +204,8 @@ xfs_vn_mknod(
 {
 	struct inode	*inode;
 	struct xfs_inode *ip = NULL;
-	xfs_acl_t	*default_acl = NULL;
+	struct posix_acl *default_acl = NULL;
 	struct xfs_name	name;
-	int (*test_default_acl)(struct inode *) = _ACL_DEFAULT_EXISTS;
 	int		error;
 
 	/*
@@ -219,18 +220,14 @@ xfs_vn_mknod(
 		rdev = 0;
 	}
 
-	if (test_default_acl && test_default_acl(dir)) {
-		if (!_ACL_ALLOC(default_acl)) {
-			return -ENOMEM;
-		}
-		if (!_ACL_GET_DEFAULT(dir, default_acl)) {
-			_ACL_FREE(default_acl);
-			default_acl = NULL;
-		}
-	}
+	if (IS_POSIXACL(dir)) {
+		default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT);
+		if (IS_ERR(default_acl))
+			return -PTR_ERR(default_acl);
 
-	if (IS_POSIXACL(dir) && !default_acl)
-		mode &= ~current->fs->umask;
+		if (!default_acl)
+			mode &= ~current->fs->umask;
+	}
 
 	xfs_dentry_to_name(&name, dentry);
 	error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip, NULL);
@@ -244,10 +241,10 @@ xfs_vn_mknod(
 		goto out_cleanup_inode;
 
 	if (default_acl) {
-		error = _ACL_INHERIT(inode, mode, default_acl);
+		error = -xfs_inherit_acl(inode, default_acl);
 		if (unlikely(error))
 			goto out_cleanup_inode;
-		_ACL_FREE(default_acl);
+		posix_acl_release(default_acl);
 	}
 
 
@@ -257,8 +254,7 @@ xfs_vn_mknod(
  out_cleanup_inode:
 	xfs_cleanup_inode(dir, inode, dentry);
  out_free_acl:
-	if (default_acl)
-		_ACL_FREE(default_acl);
+	posix_acl_release(default_acl);
 	return -error;
 }
 
@@ -488,26 +484,6 @@ xfs_vn_put_link(
 		kfree(s);
 }
 
-#ifdef CONFIG_XFS_POSIX_ACL
-STATIC int
-xfs_check_acl(
-	struct inode		*inode,
-	int			mask)
-{
-	struct xfs_inode	*ip = XFS_I(inode);
-	int			error;
-
-	xfs_itrace_entry(ip);
-
-	if (XFS_IFORK_Q(ip)) {
-		error = xfs_acl_iaccess(ip, mask, NULL);
-		if (error != -1)
-			return -error;
-	}
-
-	return -EAGAIN;
-}
-
 STATIC int
 xfs_vn_permission(
 	struct inode		*inode,
@@ -515,9 +491,6 @@ xfs_vn_permission(
 {
 	return generic_permission(inode, mask, xfs_check_acl);
 }
-#else
-#define xfs_vn_permission NULL
-#endif
 
 STATIC int
 xfs_vn_getattr(
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 9142192ccbe6..7078974a6eee 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -42,7 +42,6 @@
 #include "xfs_error.h"
 #include "xfs_itable.h"
 #include "xfs_rw.h"
-#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_inode_item.h"
 #include "xfs_buf_item.h"
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index d4e7ef8f8df9..36fb8a657c55 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -43,7 +43,6 @@
 #include "xfs_itable.h"
 #include "xfs_fsops.h"
 #include "xfs_rw.h"
-#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_utils.h"
@@ -1735,18 +1734,8 @@ xfs_init_zones(void)
 	if (!xfs_ili_zone)
 		goto out_destroy_inode_zone;
 
-#ifdef CONFIG_XFS_POSIX_ACL
-	xfs_acl_zone = kmem_zone_init(sizeof(xfs_acl_t), "xfs_acl");
-	if (!xfs_acl_zone)
-		goto out_destroy_ili_zone;
-#endif
-
 	return 0;
 
-#ifdef CONFIG_XFS_POSIX_ACL
- out_destroy_ili_zone:
-#endif
-	kmem_zone_destroy(xfs_ili_zone);
  out_destroy_inode_zone:
 	kmem_zone_destroy(xfs_inode_zone);
  out_destroy_efi_zone:
@@ -1780,9 +1769,6 @@ xfs_init_zones(void)
 STATIC void
 xfs_destroy_zones(void)
 {
-#ifdef CONFIG_XFS_POSIX_ACL
-	kmem_zone_destroy(xfs_acl_zone);
-#endif
 	kmem_zone_destroy(xfs_ili_zone);
 	kmem_zone_destroy(xfs_inode_zone);
 	kmem_zone_destroy(xfs_efi_zone);
diff --git a/fs/xfs/linux-2.6/xfs_xattr.c b/fs/xfs/linux-2.6/xfs_xattr.c
index 964621fde6ed..497c7fb75cc1 100644
--- a/fs/xfs/linux-2.6/xfs_xattr.c
+++ b/fs/xfs/linux-2.6/xfs_xattr.c
@@ -29,67 +29,6 @@
 #include <linux/xattr.h>
 
 
-/*
- * ACL handling.  Should eventually be moved into xfs_acl.c
- */
-
-static int
-xfs_decode_acl(const char *name)
-{
-	if (strcmp(name, "posix_acl_access") == 0)
-		return _ACL_TYPE_ACCESS;
-	else if (strcmp(name, "posix_acl_default") == 0)
-		return _ACL_TYPE_DEFAULT;
-	return -EINVAL;
-}
-
-/*
- * Get system extended attributes which at the moment only
- * includes Posix ACLs.
- */
-static int
-xfs_xattr_system_get(struct inode *inode, const char *name,
-		void *buffer, size_t size)
-{
-	int acl;
-
-	acl = xfs_decode_acl(name);
-	if (acl < 0)
-		return acl;
-
-	return xfs_acl_vget(inode, buffer, size, acl);
-}
-
-static int
-xfs_xattr_system_set(struct inode *inode, const char *name,
-		const void *value, size_t size, int flags)
-{
-	int acl;
-
-	acl = xfs_decode_acl(name);
-	if (acl < 0)
-		return acl;
-	if (flags & XATTR_CREATE)
-		return -EINVAL;
-
-	if (!value)
-		return xfs_acl_vremove(inode, acl);
-
-	return xfs_acl_vset(inode, (void *)value, size, acl);
-}
-
-static struct xattr_handler xfs_xattr_system_handler = {
-	.prefix	= XATTR_SYSTEM_PREFIX,
-	.get	= xfs_xattr_system_get,
-	.set	= xfs_xattr_system_set,
-};
-
-
-/*
- * Real xattr handling.  The only difference between the namespaces is
- * a flag passed to the low-level attr code.
- */
-
 static int
 __xfs_xattr_get(struct inode *inode, const char *name,
 		void *value, size_t size, int xflags)
@@ -199,7 +138,9 @@ struct xattr_handler *xfs_xattr_handlers[] = {
 	&xfs_xattr_user_handler,
 	&xfs_xattr_trusted_handler,
 	&xfs_xattr_security_handler,
+#ifdef CONFIG_XFS_POSIX_ACL
 	&xfs_xattr_system_handler,
+#endif
 	NULL
 };
 
@@ -310,7 +251,7 @@ xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
 	/*
 	 * Then add the two synthetic ACL attributes.
 	 */
-	if (xfs_acl_vhasacl_access(inode)) {
+	if (posix_acl_access_exists(inode)) {
 		error = list_one_attr(POSIX_ACL_XATTR_ACCESS,
 				strlen(POSIX_ACL_XATTR_ACCESS) + 1,
 				data, size, &context.count);
@@ -318,7 +259,7 @@ xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
 			return error;
 	}
 
-	if (xfs_acl_vhasacl_default(inode)) {
+	if (posix_acl_default_exists(inode)) {
 		error = list_one_attr(POSIX_ACL_XATTR_DEFAULT,
 				strlen(POSIX_ACL_XATTR_DEFAULT) + 1,
 				data, size, &context.count);
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 4d6d051ee56e..2f3f2229eaaf 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -42,7 +42,6 @@
 #include "xfs_error.h"
 #include "xfs_itable.h"
 #include "xfs_rw.h"
-#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_trans_space.h"
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index 1728f6a7c4f5..d0d4a9a0bbd7 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -42,7 +42,6 @@
 #include "xfs_error.h"
 #include "xfs_itable.h"
 #include "xfs_rw.h"
-#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_trans_priv.h"
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 22b7c8d364e8..45b1bfef7388 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -42,7 +42,6 @@
 #include "xfs_error.h"
 #include "xfs_bmap.h"
 #include "xfs_rw.h"
-#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_trans_space.h"
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index 56a5965f3c8b..a5346630dfae 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -42,7 +42,6 @@
 #include "xfs_rtalloc.h"
 #include "xfs_error.h"
 #include "xfs_rw.h"
-#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_qm.h"
diff --git a/fs/xfs/quota/xfs_qm_stats.c b/fs/xfs/quota/xfs_qm_stats.c
index 709f5f545cf5..21b08c0396a1 100644
--- a/fs/xfs/quota/xfs_qm_stats.c
+++ b/fs/xfs/quota/xfs_qm_stats.c
@@ -42,7 +42,6 @@
 #include "xfs_rtalloc.h"
 #include "xfs_error.h"
 #include "xfs_rw.h"
-#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_qm.h"
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 7126f855e14b..4e4276b956e8 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -45,7 +45,6 @@
 #include "xfs_rtalloc.h"
 #include "xfs_error.h"
 #include "xfs_rw.h"
-#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_utils.h"
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
index eafa7ab34085..97ac9640be98 100644
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ b/fs/xfs/quota/xfs_trans_dquot.c
@@ -42,7 +42,6 @@
 #include "xfs_rtalloc.h"
 #include "xfs_error.h"
 #include "xfs_rw.h"
-#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_trans_priv.h"
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
deleted file mode 100644
index a8cdd73999a4..000000000000
--- a/fs/xfs/xfs_acl.c
+++ /dev/null
@@ -1,874 +0,0 @@
-/*
- * Copyright (c) 2001-2002,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_types.h"
-#include "xfs_bit.h"
-#include "xfs_inum.h"
-#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_btree.h"
-#include "xfs_acl.h"
-#include "xfs_attr.h"
-#include "xfs_vnodeops.h"
-
-#include <linux/capability.h>
-#include <linux/posix_acl_xattr.h>
-
-STATIC int	xfs_acl_setmode(struct inode *, xfs_acl_t *, int *);
-STATIC void     xfs_acl_filter_mode(mode_t, xfs_acl_t *);
-STATIC void	xfs_acl_get_endian(xfs_acl_t *);
-STATIC int	xfs_acl_access(uid_t, gid_t, xfs_acl_t *, mode_t, cred_t *);
-STATIC int	xfs_acl_invalid(xfs_acl_t *);
-STATIC void	xfs_acl_sync_mode(mode_t, xfs_acl_t *);
-STATIC void	xfs_acl_get_attr(struct inode *, xfs_acl_t *, int, int, int *);
-STATIC void	xfs_acl_set_attr(struct inode *, xfs_acl_t *, int, int *);
-STATIC int	xfs_acl_allow_set(struct inode *, int);
-
-kmem_zone_t *xfs_acl_zone;
-
-
-/*
- * Test for existence of access ACL attribute as efficiently as possible.
- */
-int
-xfs_acl_vhasacl_access(
-	struct inode	*vp)
-{
-	int		error;
-
-	xfs_acl_get_attr(vp, NULL, _ACL_TYPE_ACCESS, ATTR_KERNOVAL, &error);
-	return (error == 0);
-}
-
-/*
- * Test for existence of default ACL attribute as efficiently as possible.
- */
-int
-xfs_acl_vhasacl_default(
-	struct inode	*vp)
-{
-	int		error;
-
-	if (!S_ISDIR(vp->i_mode))
-		return 0;
-	xfs_acl_get_attr(vp, NULL, _ACL_TYPE_DEFAULT, ATTR_KERNOVAL, &error);
-	return (error == 0);
-}
-
-/*
- * Convert from extended attribute representation to in-memory for XFS.
- */
-STATIC int
-posix_acl_xattr_to_xfs(
-	posix_acl_xattr_header	*src,
-	size_t			size,
-	xfs_acl_t		*dest)
-{
-	posix_acl_xattr_entry	*src_entry;
-	xfs_acl_entry_t		*dest_entry;
-	int			n;
-
-	if (!src || !dest)
-		return EINVAL;
-
-	if (size < sizeof(posix_acl_xattr_header))
-		return EINVAL;
-
-	if (src->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION))
-		return EOPNOTSUPP;
-
-	memset(dest, 0, sizeof(xfs_acl_t));
-	dest->acl_cnt = posix_acl_xattr_count(size);
-	if (dest->acl_cnt < 0 || dest->acl_cnt > XFS_ACL_MAX_ENTRIES)
-		return EINVAL;
-
-	/*
-	 * acl_set_file(3) may request that we set default ACLs with
-	 * zero length -- defend (gracefully) against that here.
-	 */
-	if (!dest->acl_cnt)
-		return 0;
-
-	src_entry = (posix_acl_xattr_entry *)((char *)src + sizeof(*src));
-	dest_entry = &dest->acl_entry[0];
-
-	for (n = 0; n < dest->acl_cnt; n++, src_entry++, dest_entry++) {
-		dest_entry->ae_perm = le16_to_cpu(src_entry->e_perm);
-		if (_ACL_PERM_INVALID(dest_entry->ae_perm))
-			return EINVAL;
-		dest_entry->ae_tag  = le16_to_cpu(src_entry->e_tag);
-		switch(dest_entry->ae_tag) {
-		case ACL_USER:
-		case ACL_GROUP:
-			dest_entry->ae_id = le32_to_cpu(src_entry->e_id);
-			break;
-		case ACL_USER_OBJ:
-		case ACL_GROUP_OBJ:
-		case ACL_MASK:
-		case ACL_OTHER:
-			dest_entry->ae_id = ACL_UNDEFINED_ID;
-			break;
-		default:
-			return EINVAL;
-		}
-	}
-	if (xfs_acl_invalid(dest))
-		return EINVAL;
-
-	return 0;
-}
-
-/*
- * Comparison function called from xfs_sort().
- * Primary key is ae_tag, secondary key is ae_id.
- */
-STATIC int
-xfs_acl_entry_compare(
-	const void	*va,
-	const void	*vb)
-{
-	xfs_acl_entry_t	*a = (xfs_acl_entry_t *)va,
-			*b = (xfs_acl_entry_t *)vb;
-
-	if (a->ae_tag == b->ae_tag)
-		return (a->ae_id - b->ae_id);
-	return (a->ae_tag - b->ae_tag);
-}
-
-/*
- * Convert from in-memory XFS to extended attribute representation.
- */
-STATIC int
-posix_acl_xfs_to_xattr(
-	xfs_acl_t		*src,
-	posix_acl_xattr_header	*dest,
-	size_t			size)
-{
-	int			n;
-	size_t			new_size = posix_acl_xattr_size(src->acl_cnt);
-	posix_acl_xattr_entry	*dest_entry;
-	xfs_acl_entry_t		*src_entry;
-
-	if (size < new_size)
-		return -ERANGE;
-
-	/* Need to sort src XFS ACL by <ae_tag,ae_id> */
-	xfs_sort(src->acl_entry, src->acl_cnt, sizeof(src->acl_entry[0]),
-		 xfs_acl_entry_compare);
-
-	dest->a_version = cpu_to_le32(POSIX_ACL_XATTR_VERSION);
-	dest_entry = &dest->a_entries[0];
-	src_entry = &src->acl_entry[0];
-	for (n = 0; n < src->acl_cnt; n++, dest_entry++, src_entry++) {
-		dest_entry->e_perm = cpu_to_le16(src_entry->ae_perm);
-		if (_ACL_PERM_INVALID(src_entry->ae_perm))
-			return -EINVAL;
-		dest_entry->e_tag  = cpu_to_le16(src_entry->ae_tag);
-		switch (src_entry->ae_tag) {
-		case ACL_USER:
-		case ACL_GROUP:
-			dest_entry->e_id = cpu_to_le32(src_entry->ae_id);
-				break;
-		case ACL_USER_OBJ:
-		case ACL_GROUP_OBJ:
-		case ACL_MASK:
-		case ACL_OTHER:
-			dest_entry->e_id = cpu_to_le32(ACL_UNDEFINED_ID);
-			break;
-		default:
-			return -EINVAL;
-		}
-	}
-	return new_size;
-}
-
-int
-xfs_acl_vget(
-	struct inode	*vp,
-	void		*acl,
-	size_t		size,
-	int		kind)
-{
-	int			error;
-	xfs_acl_t		*xfs_acl = NULL;
-	posix_acl_xattr_header	*ext_acl = acl;
-	int			flags = 0;
-
-	if(size) {
-		if (!(_ACL_ALLOC(xfs_acl))) {
-			error = ENOMEM;
-			goto out;
-		}
-		memset(xfs_acl, 0, sizeof(xfs_acl_t));
-	} else
-		flags = ATTR_KERNOVAL;
-
-	xfs_acl_get_attr(vp, xfs_acl, kind, flags, &error);
-	if (error)
-		goto out;
-
-	if (!size) {
-		error = -posix_acl_xattr_size(XFS_ACL_MAX_ENTRIES);
-	} else {
-		if (xfs_acl_invalid(xfs_acl)) {
-			error = EINVAL;
-			goto out;
-		}
-		if (kind == _ACL_TYPE_ACCESS)
-			xfs_acl_sync_mode(XFS_I(vp)->i_d.di_mode, xfs_acl);
-		error = -posix_acl_xfs_to_xattr(xfs_acl, ext_acl, size);
-	}
-out:
-	if(xfs_acl)
-		_ACL_FREE(xfs_acl);
-	return -error;
-}
-
-int
-xfs_acl_vremove(
-	struct inode	*vp,
-	int		kind)
-{
-	int		error;
-
-	error = xfs_acl_allow_set(vp, kind);
-	if (!error) {
-		error = xfs_attr_remove(XFS_I(vp),
-						kind == _ACL_TYPE_DEFAULT?
-						SGI_ACL_DEFAULT: SGI_ACL_FILE,
-						ATTR_ROOT);
-		if (error == ENOATTR)
-			error = 0;	/* 'scool */
-	}
-	return -error;
-}
-
-int
-xfs_acl_vset(
-	struct inode		*vp,
-	void			*acl,
-	size_t			size,
-	int			kind)
-{
-	posix_acl_xattr_header	*ext_acl = acl;
-	xfs_acl_t		*xfs_acl;
-	int			error;
-	int			basicperms = 0; /* more than std unix perms? */
-
-	if (!acl)
-		return -EINVAL;
-
-	if (!(_ACL_ALLOC(xfs_acl)))
-		return -ENOMEM;
-
-	error = posix_acl_xattr_to_xfs(ext_acl, size, xfs_acl);
-	if (error) {
-		_ACL_FREE(xfs_acl);
-		return -error;
-	}
-	if (!xfs_acl->acl_cnt) {
-		_ACL_FREE(xfs_acl);
-		return 0;
-	}
-
-	error = xfs_acl_allow_set(vp, kind);
-
-	/* Incoming ACL exists, set file mode based on its value */
-	if (!error && kind == _ACL_TYPE_ACCESS)
-		error = xfs_acl_setmode(vp, xfs_acl, &basicperms);
-
-	if (error)
-		goto out;
-
-	/*
-	 * If we have more than std unix permissions, set up the actual attr.
-	 * Otherwise, delete any existing attr.  This prevents us from
-	 * having actual attrs for permissions that can be stored in the
-	 * standard permission bits.
-	 */
-	if (!basicperms) {
-		xfs_acl_set_attr(vp, xfs_acl, kind, &error);
-	} else {
-		error = -xfs_acl_vremove(vp, _ACL_TYPE_ACCESS);
-	}
-
-out:
-	_ACL_FREE(xfs_acl);
-	return -error;
-}
-
-int
-xfs_acl_iaccess(
-	xfs_inode_t	*ip,
-	mode_t		mode,
-	cred_t		*cr)
-{
-	xfs_acl_t	*acl;
-	int		rval;
-	struct xfs_name	acl_name = {SGI_ACL_FILE, SGI_ACL_FILE_SIZE};
-
-	if (!(_ACL_ALLOC(acl)))
-		return -1;
-
-	/* If the file has no ACL return -1. */
-	rval = sizeof(xfs_acl_t);
-	if (xfs_attr_fetch(ip, &acl_name, (char *)acl, &rval, ATTR_ROOT)) {
-		_ACL_FREE(acl);
-		return -1;
-	}
-	xfs_acl_get_endian(acl);
-
-	/* If the file has an empty ACL return -1. */
-	if (acl->acl_cnt == XFS_ACL_NOT_PRESENT) {
-		_ACL_FREE(acl);
-		return -1;
-	}
-
-	/* Synchronize ACL with mode bits */
-	xfs_acl_sync_mode(ip->i_d.di_mode, acl);
-
-	rval = xfs_acl_access(ip->i_d.di_uid, ip->i_d.di_gid, acl, mode, cr);
-	_ACL_FREE(acl);
-	return rval;
-}
-
-STATIC int
-xfs_acl_allow_set(
-	struct inode	*vp,
-	int		kind)
-{
-	if (vp->i_flags & (S_IMMUTABLE|S_APPEND))
-		return EPERM;
-	if (kind == _ACL_TYPE_DEFAULT && !S_ISDIR(vp->i_mode))
-		return ENOTDIR;
-	if (vp->i_sb->s_flags & MS_RDONLY)
-		return EROFS;
-	if (XFS_I(vp)->i_d.di_uid != current_fsuid() && !capable(CAP_FOWNER))
-		return EPERM;
-	return 0;
-}
-
-/*
- * Note: cr is only used here for the capability check if the ACL test fails.
- *       It is not used to find out the credentials uid or groups etc, as was
- *       done in IRIX. It is assumed that the uid and groups for the current
- *       thread are taken from "current" instead of the cr parameter.
- */
-STATIC int
-xfs_acl_access(
-	uid_t		fuid,
-	gid_t		fgid,
-	xfs_acl_t	*fap,
-	mode_t		md,
-	cred_t		*cr)
-{
-	xfs_acl_entry_t	matched;
-	int		i, allows;
-	int		maskallows = -1;	/* true, but not 1, either */
-	int		seen_userobj = 0;
-
-	matched.ae_tag = 0;	/* Invalid type */
-	matched.ae_perm = 0;
-
-	for (i = 0; i < fap->acl_cnt; i++) {
-		/*
-		 * Break out if we've got a user_obj entry or
-		 * a user entry and the mask (and have processed USER_OBJ)
-		 */
-		if (matched.ae_tag == ACL_USER_OBJ)
-			break;
-		if (matched.ae_tag == ACL_USER) {
-			if (maskallows != -1 && seen_userobj)
-				break;
-			if (fap->acl_entry[i].ae_tag != ACL_MASK &&
-			    fap->acl_entry[i].ae_tag != ACL_USER_OBJ)
-				continue;
-		}
-		/* True if this entry allows the requested access */
-		allows = ((fap->acl_entry[i].ae_perm & md) == md);
-
-		switch (fap->acl_entry[i].ae_tag) {
-		case ACL_USER_OBJ:
-			seen_userobj = 1;
-			if (fuid != current_fsuid())
-				continue;
-			matched.ae_tag = ACL_USER_OBJ;
-			matched.ae_perm = allows;
-			break;
-		case ACL_USER:
-			if (fap->acl_entry[i].ae_id != current_fsuid())
-				continue;
-			matched.ae_tag = ACL_USER;
-			matched.ae_perm = allows;
-			break;
-		case ACL_GROUP_OBJ:
-			if ((matched.ae_tag == ACL_GROUP_OBJ ||
-			    matched.ae_tag == ACL_GROUP) && !allows)
-				continue;
-			if (!in_group_p(fgid))
-				continue;
-			matched.ae_tag = ACL_GROUP_OBJ;
-			matched.ae_perm = allows;
-			break;
-		case ACL_GROUP:
-			if ((matched.ae_tag == ACL_GROUP_OBJ ||
-			    matched.ae_tag == ACL_GROUP) && !allows)
-				continue;
-			if (!in_group_p(fap->acl_entry[i].ae_id))
-				continue;
-			matched.ae_tag = ACL_GROUP;
-			matched.ae_perm = allows;
-			break;
-		case ACL_MASK:
-			maskallows = allows;
-			break;
-		case ACL_OTHER:
-			if (matched.ae_tag != 0)
-				continue;
-			matched.ae_tag = ACL_OTHER;
-			matched.ae_perm = allows;
-			break;
-		}
-	}
-	/*
-	 * First possibility is that no matched entry allows access.
-	 * The capability to override DAC may exist, so check for it.
-	 */
-	switch (matched.ae_tag) {
-	case ACL_OTHER:
-	case ACL_USER_OBJ:
-		if (matched.ae_perm)
-			return 0;
-		break;
-	case ACL_USER:
-	case ACL_GROUP_OBJ:
-	case ACL_GROUP:
-		if (maskallows && matched.ae_perm)
-			return 0;
-		break;
-	case 0:
-		break;
-	}
-
-	/* EACCES tells generic_permission to check for capability overrides */
-	return EACCES;
-}
-
-/*
- * ACL validity checker.
- *   This acl validation routine checks each ACL entry read in makes sense.
- */
-STATIC int
-xfs_acl_invalid(
-	xfs_acl_t	*aclp)
-{
-	xfs_acl_entry_t	*entry, *e;
-	int		user = 0, group = 0, other = 0, mask = 0;
-	int		mask_required = 0;
-	int		i, j;
-
-	if (!aclp)
-		goto acl_invalid;
-
-	if (aclp->acl_cnt > XFS_ACL_MAX_ENTRIES)
-		goto acl_invalid;
-
-	for (i = 0; i < aclp->acl_cnt; i++) {
-		entry = &aclp->acl_entry[i];
-		switch (entry->ae_tag) {
-		case ACL_USER_OBJ:
-			if (user++)
-				goto acl_invalid;
-			break;
-		case ACL_GROUP_OBJ:
-			if (group++)
-				goto acl_invalid;
-			break;
-		case ACL_OTHER:
-			if (other++)
-				goto acl_invalid;
-			break;
-		case ACL_USER:
-		case ACL_GROUP:
-			for (j = i + 1; j < aclp->acl_cnt; j++) {
-				e = &aclp->acl_entry[j];
-				if (e->ae_id == entry->ae_id &&
-				    e->ae_tag == entry->ae_tag)
-					goto acl_invalid;
-			}
-			mask_required++;
-			break;
-		case ACL_MASK:
-			if (mask++)
-				goto acl_invalid;
-			break;
-		default:
-			goto acl_invalid;
-		}
-	}
-	if (!user || !group || !other || (mask_required && !mask))
-		goto acl_invalid;
-	else
-		return 0;
-acl_invalid:
-	return EINVAL;
-}
-
-/*
- * Do ACL endian conversion.
- */
-STATIC void
-xfs_acl_get_endian(
-	xfs_acl_t	*aclp)
-{
-	xfs_acl_entry_t	*ace, *end;
-
-	INT_SET(aclp->acl_cnt, ARCH_CONVERT, aclp->acl_cnt);
-	end = &aclp->acl_entry[0]+aclp->acl_cnt;
-	for (ace = &aclp->acl_entry[0]; ace < end; ace++) {
-		INT_SET(ace->ae_tag, ARCH_CONVERT, ace->ae_tag);
-		INT_SET(ace->ae_id, ARCH_CONVERT, ace->ae_id);
-		INT_SET(ace->ae_perm, ARCH_CONVERT, ace->ae_perm);
-	}
-}
-
-/*
- * Get the ACL from the EA and do endian conversion.
- */
-STATIC void
-xfs_acl_get_attr(
-	struct inode	*vp,
-	xfs_acl_t	*aclp,
-	int		kind,
-	int		flags,
-	int		*error)
-{
-	int		len = sizeof(xfs_acl_t);
-
-	ASSERT((flags & ATTR_KERNOVAL) ? (aclp == NULL) : 1);
-	flags |= ATTR_ROOT;
-	*error = xfs_attr_get(XFS_I(vp),
-					kind == _ACL_TYPE_ACCESS ?
-					SGI_ACL_FILE : SGI_ACL_DEFAULT,
-					(char *)aclp, &len, flags);
-	if (*error || (flags & ATTR_KERNOVAL))
-		return;
-	xfs_acl_get_endian(aclp);
-}
-
-/*
- * Set the EA with the ACL and do endian conversion.
- */
-STATIC void
-xfs_acl_set_attr(
-	struct inode	*vp,
-	xfs_acl_t	*aclp,
-	int		kind,
-	int		*error)
-{
-	xfs_acl_entry_t	*ace, *newace, *end;
-	xfs_acl_t	*newacl;
-	int		len;
-
-	if (!(_ACL_ALLOC(newacl))) {
-		*error = ENOMEM;
-		return;
-	}
-
-	len = sizeof(xfs_acl_t) -
-	      (sizeof(xfs_acl_entry_t) * (XFS_ACL_MAX_ENTRIES - aclp->acl_cnt));
-	end = &aclp->acl_entry[0]+aclp->acl_cnt;
-	for (ace = &aclp->acl_entry[0], newace = &newacl->acl_entry[0];
-	     ace < end;
-	     ace++, newace++) {
-		INT_SET(newace->ae_tag, ARCH_CONVERT, ace->ae_tag);
-		INT_SET(newace->ae_id, ARCH_CONVERT, ace->ae_id);
-		INT_SET(newace->ae_perm, ARCH_CONVERT, ace->ae_perm);
-	}
-	INT_SET(newacl->acl_cnt, ARCH_CONVERT, aclp->acl_cnt);
-	*error = xfs_attr_set(XFS_I(vp),
-				kind == _ACL_TYPE_ACCESS ?
-				SGI_ACL_FILE: SGI_ACL_DEFAULT,
-				(char *)newacl, len, ATTR_ROOT);
-	_ACL_FREE(newacl);
-}
-
-int
-xfs_acl_vtoacl(
-	struct inode	*vp,
-	xfs_acl_t	*access_acl,
-	xfs_acl_t	*default_acl)
-{
-	int		error = 0;
-
-	if (access_acl) {
-		/*
-		 * Get the Access ACL and the mode.  If either cannot
-		 * be obtained for some reason, invalidate the access ACL.
-		 */
-		xfs_acl_get_attr(vp, access_acl, _ACL_TYPE_ACCESS, 0, &error);
-		if (error)
-			access_acl->acl_cnt = XFS_ACL_NOT_PRESENT;
-		else /* We have a good ACL and the file mode, synchronize. */
-			xfs_acl_sync_mode(XFS_I(vp)->i_d.di_mode, access_acl);
-	}
-
-	if (default_acl) {
-		xfs_acl_get_attr(vp, default_acl, _ACL_TYPE_DEFAULT, 0, &error);
-		if (error)
-			default_acl->acl_cnt = XFS_ACL_NOT_PRESENT;
-	}
-	return error;
-}
-
-/*
- * This function retrieves the parent directory's acl, processes it
- * and lets the child inherit the acl(s) that it should.
- */
-int
-xfs_acl_inherit(
-	struct inode	*vp,
-	mode_t		mode,
-	xfs_acl_t	*pdaclp)
-{
-	xfs_acl_t	*cacl;
-	int		error = 0;
-	int		basicperms = 0;
-
-	/*
-	 * If the parent does not have a default ACL, or it's an
-	 * invalid ACL, we're done.
-	 */
-	if (!vp)
-		return 0;
-	if (!pdaclp || xfs_acl_invalid(pdaclp))
-		return 0;
-
-	/*
-	 * Copy the default ACL of the containing directory to
-	 * the access ACL of the new file and use the mode that
-	 * was passed in to set up the correct initial values for
-	 * the u::,g::[m::], and o:: entries.  This is what makes
-	 * umask() "work" with ACL's.
-	 */
-
-	if (!(_ACL_ALLOC(cacl)))
-		return ENOMEM;
-
-	memcpy(cacl, pdaclp, sizeof(xfs_acl_t));
-	xfs_acl_filter_mode(mode, cacl);
-	error = xfs_acl_setmode(vp, cacl, &basicperms);
-	if (error)
-		goto out_error;
-
-	/*
-	 * Set the Default and Access ACL on the file.  The mode is already
-	 * set on the file, so we don't need to worry about that.
-	 *
-	 * If the new file is a directory, its default ACL is a copy of
-	 * the containing directory's default ACL.
-	 */
-	if (S_ISDIR(vp->i_mode))
-		xfs_acl_set_attr(vp, pdaclp, _ACL_TYPE_DEFAULT, &error);
-	if (!error && !basicperms)
-		xfs_acl_set_attr(vp, cacl, _ACL_TYPE_ACCESS, &error);
-out_error:
-	_ACL_FREE(cacl);
-	return error;
-}
-
-/*
- * Set up the correct mode on the file based on the supplied ACL.  This
- * makes sure that the mode on the file reflects the state of the
- * u::,g::[m::], and o:: entries in the ACL.  Since the mode is where
- * the ACL is going to get the permissions for these entries, we must
- * synchronize the mode whenever we set the ACL on a file.
- */
-STATIC int
-xfs_acl_setmode(
-	struct inode	*vp,
-	xfs_acl_t	*acl,
-	int		*basicperms)
-{
-	struct iattr	iattr;
-	xfs_acl_entry_t	*ap;
-	xfs_acl_entry_t	*gap = NULL;
-	int		i, nomask = 1;
-
-	*basicperms = 1;
-
-	if (acl->acl_cnt == XFS_ACL_NOT_PRESENT)
-		return 0;
-
-	/*
-	 * Copy the u::, g::, o::, and m:: bits from the ACL into the
-	 * mode.  The m:: bits take precedence over the g:: bits.
-	 */
-	iattr.ia_valid = ATTR_MODE;
-	iattr.ia_mode = XFS_I(vp)->i_d.di_mode;
-	iattr.ia_mode &= ~(S_IRWXU|S_IRWXG|S_IRWXO);
-	ap = acl->acl_entry;
-	for (i = 0; i < acl->acl_cnt; ++i) {
-		switch (ap->ae_tag) {
-		case ACL_USER_OBJ:
-			iattr.ia_mode |= ap->ae_perm << 6;
-			break;
-		case ACL_GROUP_OBJ:
-			gap = ap;
-			break;
-		case ACL_MASK:	/* more than just standard modes */
-			nomask = 0;
-			iattr.ia_mode |= ap->ae_perm << 3;
-			*basicperms = 0;
-			break;
-		case ACL_OTHER:
-			iattr.ia_mode |= ap->ae_perm;
-			break;
-		default:	/* more than just standard modes */
-			*basicperms = 0;
-			break;
-		}
-		ap++;
-	}
-
-	/* Set the group bits from ACL_GROUP_OBJ if there's no ACL_MASK */
-	if (gap && nomask)
-		iattr.ia_mode |= gap->ae_perm << 3;
-
-	return xfs_setattr(XFS_I(vp), &iattr, 0);
-}
-
-/*
- * The permissions for the special ACL entries (u::, g::[m::], o::) are
- * actually stored in the file mode (if there is both a group and a mask,
- * the group is stored in the ACL entry and the mask is stored on the file).
- * This allows the mode to remain automatically in sync with the ACL without
- * the need for a call-back to the ACL system at every point where the mode
- * could change.  This function takes the permissions from the specified mode
- * and places it in the supplied ACL.
- *
- * This implementation draws its validity from the fact that, when the ACL
- * was assigned, the mode was copied from the ACL.
- * If the mode did not change, therefore, the mode remains exactly what was
- * taken from the special ACL entries at assignment.
- * If a subsequent chmod() was done, the POSIX spec says that the change in
- * mode must cause an update to the ACL seen at user level and used for
- * access checks.  Before and after a mode change, therefore, the file mode
- * most accurately reflects what the special ACL entries should permit/deny.
- *
- * CAVEAT: If someone sets the SGI_ACL_FILE attribute directly,
- *         the existing mode bits will override whatever is in the
- *         ACL. Similarly, if there is a pre-existing ACL that was
- *         never in sync with its mode (owing to a bug in 6.5 and
- *         before), it will now magically (or mystically) be
- *         synchronized.  This could cause slight astonishment, but
- *         it is better than inconsistent permissions.
- *
- * The supplied ACL is a template that may contain any combination
- * of special entries.  These are treated as place holders when we fill
- * out the ACL.  This routine does not add or remove special entries, it
- * simply unites each special entry with its associated set of permissions.
- */
-STATIC void
-xfs_acl_sync_mode(
-	mode_t		mode,
-	xfs_acl_t	*acl)
-{
-	int		i, nomask = 1;
-	xfs_acl_entry_t	*ap;
-	xfs_acl_entry_t	*gap = NULL;
-
-	/*
-	 * Set ACL entries. POSIX1003.1eD16 requires that the MASK
-	 * be set instead of the GROUP entry, if there is a MASK.
-	 */
-	for (ap = acl->acl_entry, i = 0; i < acl->acl_cnt; ap++, i++) {
-		switch (ap->ae_tag) {
-		case ACL_USER_OBJ:
-			ap->ae_perm = (mode >> 6) & 0x7;
-			break;
-		case ACL_GROUP_OBJ:
-			gap = ap;
-			break;
-		case ACL_MASK:
-			nomask = 0;
-			ap->ae_perm = (mode >> 3) & 0x7;
-			break;
-		case ACL_OTHER:
-			ap->ae_perm = mode & 0x7;
-			break;
-		default:
-			break;
-		}
-	}
-	/* Set the ACL_GROUP_OBJ if there's no ACL_MASK */
-	if (gap && nomask)
-		gap->ae_perm = (mode >> 3) & 0x7;
-}
-
-/*
- * When inheriting an Access ACL from a directory Default ACL,
- * the ACL bits are set to the intersection of the ACL default
- * permission bits and the file permission bits in mode. If there
- * are no permission bits on the file then we must not give them
- * the ACL. This is what what makes umask() work with ACLs.
- */
-STATIC void
-xfs_acl_filter_mode(
-	mode_t		mode,
-	xfs_acl_t	*acl)
-{
-	int		i, nomask = 1;
-	xfs_acl_entry_t	*ap;
-	xfs_acl_entry_t	*gap = NULL;
-
-	/*
-	 * Set ACL entries. POSIX1003.1eD16 requires that the MASK
-	 * be merged with GROUP entry, if there is a MASK.
-	 */
-	for (ap = acl->acl_entry, i = 0; i < acl->acl_cnt; ap++, i++) {
-		switch (ap->ae_tag) {
-		case ACL_USER_OBJ:
-			ap->ae_perm &= (mode >> 6) & 0x7;
-			break;
-		case ACL_GROUP_OBJ:
-			gap = ap;
-			break;
-		case ACL_MASK:
-			nomask = 0;
-			ap->ae_perm &= (mode >> 3) & 0x7;
-			break;
-		case ACL_OTHER:
-			ap->ae_perm &= mode & 0x7;
-			break;
-		default:
-			break;
-		}
-	}
-	/* Set the ACL_GROUP_OBJ if there's no ACL_MASK */
-	if (gap && nomask)
-		gap->ae_perm &= (mode >> 3) & 0x7;
-}
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 642f1db4def4..63dc1f2efad5 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -18,81 +18,48 @@
 #ifndef __XFS_ACL_H__
 #define __XFS_ACL_H__
 
-/*
- * Access Control Lists
- */
-typedef __uint16_t	xfs_acl_perm_t;
-typedef __int32_t	xfs_acl_tag_t;
-typedef __int32_t	xfs_acl_id_t;
+struct inode;
+struct posix_acl;
+struct xfs_inode;
 
 #define XFS_ACL_MAX_ENTRIES 25
 #define XFS_ACL_NOT_PRESENT (-1)
 
-typedef struct xfs_acl_entry {
-	xfs_acl_tag_t	ae_tag;
-	xfs_acl_id_t	ae_id;
-	xfs_acl_perm_t	ae_perm;
-} xfs_acl_entry_t;
-
-typedef struct xfs_acl {
-	__int32_t	acl_cnt;
-	xfs_acl_entry_t	acl_entry[XFS_ACL_MAX_ENTRIES];
-} xfs_acl_t;
+/* On-disk XFS access control list structure */
+struct xfs_acl {
+	__be32		acl_cnt;
+	struct xfs_acl_entry {
+		__be32	ae_tag;
+		__be32	ae_id;
+		__be16	ae_perm;
+	} acl_entry[XFS_ACL_MAX_ENTRIES];
+};
 
 /* On-disk XFS extended attribute names */
-#define SGI_ACL_FILE	"SGI_ACL_FILE"
-#define SGI_ACL_DEFAULT	"SGI_ACL_DEFAULT"
+#define SGI_ACL_FILE		"SGI_ACL_FILE"
+#define SGI_ACL_DEFAULT		"SGI_ACL_DEFAULT"
 #define SGI_ACL_FILE_SIZE	(sizeof(SGI_ACL_FILE)-1)
 #define SGI_ACL_DEFAULT_SIZE	(sizeof(SGI_ACL_DEFAULT)-1)
 
-#define _ACL_TYPE_ACCESS	1
-#define _ACL_TYPE_DEFAULT	2
-
 #ifdef CONFIG_XFS_POSIX_ACL
+extern int xfs_check_acl(struct inode *inode, int mask);
+extern struct posix_acl *xfs_get_acl(struct inode *inode, int type);
+extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl);
+extern int xfs_acl_chmod(struct inode *inode);
+extern void xfs_inode_init_acls(struct xfs_inode *ip);
+extern void xfs_inode_clear_acls(struct xfs_inode *ip);
+extern int posix_acl_access_exists(struct inode *inode);
+extern int posix_acl_default_exists(struct inode *inode);
 
-struct vattr;
-struct xfs_inode;
-
-extern struct kmem_zone *xfs_acl_zone;
-#define xfs_acl_zone_init(zone, name)	\
-		(zone) = kmem_zone_init(sizeof(xfs_acl_t), (name))
-#define xfs_acl_zone_destroy(zone)	kmem_zone_destroy(zone)
-
-extern int xfs_acl_inherit(struct inode *, mode_t mode, xfs_acl_t *);
-extern int xfs_acl_iaccess(struct xfs_inode *, mode_t, cred_t *);
-extern int xfs_acl_vtoacl(struct inode *, xfs_acl_t *, xfs_acl_t *);
-extern int xfs_acl_vhasacl_access(struct inode *);
-extern int xfs_acl_vhasacl_default(struct inode *);
-extern int xfs_acl_vset(struct inode *, void *, size_t, int);
-extern int xfs_acl_vget(struct inode *, void *, size_t, int);
-extern int xfs_acl_vremove(struct inode *, int);
-
-#define _ACL_PERM_INVALID(perm)	((perm) & ~(ACL_READ|ACL_WRITE|ACL_EXECUTE))
-
-#define _ACL_INHERIT(c,m,d)	(xfs_acl_inherit(c,m,d))
-#define _ACL_GET_ACCESS(pv,pa)	(xfs_acl_vtoacl(pv,pa,NULL) == 0)
-#define _ACL_GET_DEFAULT(pv,pd)	(xfs_acl_vtoacl(pv,NULL,pd) == 0)
-#define _ACL_ACCESS_EXISTS	xfs_acl_vhasacl_access
-#define _ACL_DEFAULT_EXISTS	xfs_acl_vhasacl_default
-
-#define _ACL_ALLOC(a)		((a) = kmem_zone_alloc(xfs_acl_zone, KM_SLEEP))
-#define _ACL_FREE(a)		((a)? kmem_zone_free(xfs_acl_zone, (a)):(void)0)
-
+extern struct xattr_handler xfs_xattr_system_handler;
 #else
-#define xfs_acl_zone_init(zone,name)
-#define xfs_acl_zone_destroy(zone)
-#define xfs_acl_vset(v,p,sz,t)	(-EOPNOTSUPP)
-#define xfs_acl_vget(v,p,sz,t)	(-EOPNOTSUPP)
-#define xfs_acl_vremove(v,t)	(-EOPNOTSUPP)
-#define xfs_acl_vhasacl_access(v)	(0)
-#define xfs_acl_vhasacl_default(v)	(0)
-#define _ACL_ALLOC(a)		(1)	/* successfully allocate nothing */
-#define _ACL_FREE(a)		((void)0)
-#define _ACL_INHERIT(c,m,d)	(0)
-#define _ACL_GET_ACCESS(pv,pa)	(0)
-#define _ACL_GET_DEFAULT(pv,pd)	(0)
-#define _ACL_ACCESS_EXISTS	(NULL)
-#define _ACL_DEFAULT_EXISTS	(NULL)
-#endif
-
+# define xfs_check_acl					NULL
+# define xfs_get_acl(inode, type)			NULL
+# define xfs_inherit_acl(inode, default_acl)		0
+# define xfs_acl_chmod(inode)				0
+# define xfs_inode_init_acls(ip)
+# define xfs_inode_clear_acls(ip)
+# define posix_acl_access_exists(inode)			0
+# define posix_acl_default_exists(inode)		0
+#endif /* CONFIG_XFS_POSIX_ACL */
 #endif	/* __XFS_ACL_H__ */
diff --git a/fs/xfs/xfs_arch.h b/fs/xfs/xfs_arch.h
index 53d5e70d1360..0902249354a0 100644
--- a/fs/xfs/xfs_arch.h
+++ b/fs/xfs/xfs_arch.h
@@ -73,28 +73,6 @@ static inline void be64_add_cpu(__be64 *a, __s64 b)
 
 #endif	/* __KERNEL__ */
 
-/* do we need conversion? */
-#define ARCH_NOCONVERT 1
-#ifdef XFS_NATIVE_HOST
-# define ARCH_CONVERT	ARCH_NOCONVERT
-#else
-# define ARCH_CONVERT	0
-#endif
-
-/* generic swapping macros */
-
-#ifndef HAVE_SWABMACROS
-#define INT_SWAP16(type,var) ((typeof(type))(__swab16((__u16)(var))))
-#define INT_SWAP32(type,var) ((typeof(type))(__swab32((__u32)(var))))
-#define INT_SWAP64(type,var) ((typeof(type))(__swab64((__u64)(var))))
-#endif
-
-#define INT_SWAP(type, var) \
-    ((sizeof(type) == 8) ? INT_SWAP64(type,var) : \
-    ((sizeof(type) == 4) ? INT_SWAP32(type,var) : \
-    ((sizeof(type) == 2) ? INT_SWAP16(type,var) : \
-    (var))))
-
 /*
  * get and set integers from potentially unaligned locations
  */
@@ -107,16 +85,6 @@ static inline void be64_add_cpu(__be64 *a, __s64 b)
 	((__u8*)(pointer))[1] = (((value)     ) & 0xff); \
     }
 
-/* does not return a value */
-#define INT_SET(reference,arch,valueref) \
-    (__builtin_constant_p(valueref) ? \
-	(void)( (reference) = ( ((arch) != ARCH_NOCONVERT) ? (INT_SWAP((reference),(valueref))) : (valueref)) ) : \
-	(void)( \
-	    ((reference) = (valueref)), \
-	    ( ((arch) != ARCH_NOCONVERT) ? (reference) = INT_SWAP((reference),(reference)) : 0 ) \
-	) \
-    )
-
 /*
  * In directories inode numbers are stored as unaligned arrays of unsigned
  * 8bit integers on disk.
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index cd1008b1c4cd..db15feb906ff 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -45,7 +45,6 @@
 #include "xfs_error.h"
 #include "xfs_quota.h"
 #include "xfs_trans_space.h"
-#include "xfs_acl.h"
 #include "xfs_rw.h"
 #include "xfs_vnodeops.h"
 
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 73e1c0d767ac..76c540f719e4 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -18,6 +18,7 @@
 #include "xfs.h"
 #include "xfs_fs.h"
 #include "xfs_types.h"
+#include "xfs_acl.h"
 #include "xfs_bit.h"
 #include "xfs_log.h"
 #include "xfs_inum.h"
@@ -82,6 +83,7 @@ xfs_inode_alloc(
 	memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
 	ip->i_size = 0;
 	ip->i_new_size = 0;
+	xfs_inode_init_acls(ip);
 
 	/*
 	 * Initialize inode's trace buffers.
@@ -558,6 +560,7 @@ xfs_ireclaim(
 	ASSERT(atomic_read(&ip->i_pincount) == 0);
 	ASSERT(!spin_is_locked(&ip->i_flags_lock));
 	ASSERT(completion_done(&ip->i_flush));
+	xfs_inode_clear_acls(ip);
 	kmem_zone_free(xfs_inode_zone, ip);
 }
 
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 123b20c8cbf2..1f22d65fed0a 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -49,7 +49,6 @@
 #include "xfs_utils.h"
 #include "xfs_dir2_trace.h"
 #include "xfs_quota.h"
-#include "xfs_acl.h"
 #include "xfs_filestream.h"
 #include "xfs_vnodeops.h"
 
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index f879c1bc4b96..77016702938b 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -18,6 +18,7 @@
 #ifndef	__XFS_INODE_H__
 #define	__XFS_INODE_H__
 
+struct posix_acl;
 struct xfs_dinode;
 struct xfs_inode;
 
@@ -272,6 +273,11 @@ typedef struct xfs_inode {
 	/* VFS inode */
 	struct inode		i_vnode;	/* embedded VFS inode */
 
+#ifdef CONFIG_XFS_POSIX_ACL
+	struct posix_acl	*i_acl;
+	struct posix_acl	*i_default_acl;
+#endif
+
 	/* Trace buffers per inode. */
 #ifdef XFS_INODE_TRACE
 	struct ktrace		*i_trace;	/* general inode trace */
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index feb30a92549b..67ae5555a30a 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -42,7 +42,6 @@
 #include "xfs_error.h"
 #include "xfs_itable.h"
 #include "xfs_rw.h"
-#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_trans_space.h"
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index 36f3a21c54d2..fea68615ed23 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -41,7 +41,6 @@
 #include "xfs_ialloc.h"
 #include "xfs_attr.h"
 #include "xfs_bmap.h"
-#include "xfs_acl.h"
 #include "xfs_error.h"
 #include "xfs_buf_item.h"
 #include "xfs_rw.h"
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index b56321b2b9f0..c4eca5ed5dab 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -42,6 +42,7 @@
 #include "xfs_ialloc.h"
 #include "xfs_alloc.h"
 #include "xfs_bmap.h"
+#include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_rw.h"
 #include "xfs_error.h"
@@ -467,8 +468,20 @@ xfs_setattr(
 	xfs_qm_dqrele(udqp);
 	xfs_qm_dqrele(gdqp);
 
-	if (code) {
+	if (code)
 		return code;
+
+	/*
+	 * XXX(hch): Updating the ACL entries is not atomic vs the i_mode
+	 * 	     update.  We could avoid this with linked transactions
+	 * 	     and passing down the transaction pointer all the way
+	 *	     to attr_set.  No previous user of the generic
+	 * 	     Posix ACL code seems to care about this issue either.
+	 */
+	if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) {
+		code = -xfs_acl_chmod(inode);
+		if (code)
+			return XFS_ERROR(code);
 	}
 
 	if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE) &&
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 04373c6c61ff..a9e102de71a1 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -18,6 +18,7 @@ int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags);
 #define	XFS_ATTR_DMI		0x01	/* invocation from a DMI function */
 #define	XFS_ATTR_NONBLOCK	0x02	/* return EAGAIN if operation would block */
 #define XFS_ATTR_NOLOCK		0x04	/* Don't grab any conflicting locks */
+#define XFS_ATTR_NOACL		0x08	/* Don't call xfs_acl_chmod */
 
 int xfs_readlink(struct xfs_inode *ip, char *link);
 int xfs_fsync(struct xfs_inode *ip);

From 7747a0b0af5976ba3828796b4f7a7adc3bb76dbd Mon Sep 17 00:00:00 2001
From: Felix Blyakher <felixb@sgi.com>
Date: Thu, 11 Jun 2009 17:07:28 -0500
Subject: [PATCH 21/23] xfs: fix freeing memory in xfs_getbmap()

Regression from commit 28e211700a81b0a934b6c7a4b8e7dda843634d2f.
Need to free temporary buffer allocated in xfs_getbmap().

Signed-off-by: Felix Blyakher <felixb@sgi.com>
Signed-off-by: Hedi Berriche <hedi@sgi.com>
Reported-by: Justin Piszcz <jpiszcz@lucidpixels.com>
Reviewed-by: Eric Sandeen <sandeen@sandeen.net>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_bmap.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 4b0f6efb046c..7928b9983c1d 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -6086,6 +6086,7 @@ xfs_getbmap(
 			break;
 	}
 
+	kmem_free(out);
 	return error;
 }
 

From 493b87e5ed352cf548e6456ddfc36576e28278ea Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Fri, 12 Jun 2009 11:34:55 -0400
Subject: [PATCH 22/23] xfs: fix warnings with CONFIG_XFS_QUOTA disabled

Fix warnings about unitialized dquot variables by making sure
xfs_qm_vop_dqalloc touches it even when quotas are disabled.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Felix Blyakher <felixb@sgi.com>
Signed-off-by: Felix Blyakher <felixb@sgi.com>
---
 fs/xfs/xfs_quota.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index 079d2e61c626..3ec91ac74c2a 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -335,6 +335,14 @@ extern void xfs_qm_unmount(struct xfs_mount *);
 extern void xfs_qm_unmount_quotas(struct xfs_mount *);
 
 #else
+static inline int
+xfs_qm_vop_dqalloc(struct xfs_inode *ip, uid_t uid, gid_t gid, prid_t prid,
+		uint flags, struct xfs_dquot **udqp, struct xfs_dquot **gdqp)
+{
+	*udqp = NULL;
+	*gdqp = NULL;
+	return 0;
+}
 #define xfs_trans_dup_dqinfo(tp, tp2)
 #define xfs_trans_free_dqinfo(tp)
 #define xfs_trans_mod_dquot_byino(tp, ip, fields, delta)
@@ -342,7 +350,6 @@ extern void xfs_qm_unmount_quotas(struct xfs_mount *);
 #define xfs_trans_unreserve_and_mod_dquots(tp)
 #define xfs_trans_reserve_quota_nblks(tp, ip, nblks, ninos, flags)	(0)
 #define xfs_trans_reserve_quota_bydquots(tp, mp, u, g, nb, ni, fl)	(0)
-#define xfs_qm_vop_dqalloc(ip, uid, gid, prid, fl, ou, og)		(0)
 #define xfs_qm_vop_create_dqattach(tp, ip, u, g)
 #define xfs_qm_vop_rename_dqattach(it)					(0)
 #define xfs_qm_vop_chown(tp, ip, old, new)				(NULL)

From e83f1eb6bfc4004c19a99ee5f5aa65bd3fbecec3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Fri, 12 Jun 2009 11:19:11 -0400
Subject: [PATCH 23/23] xfs: fix small mismerge in xfs_vn_mknod

Identation got messed up when merging the current_umask changes with
the generic ACL support.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Felix Blyakher <felixb@sgi.com>
Signed-off-by: Felix Blyakher <felixb@sgi.com>
---
 fs/xfs/linux-2.6/xfs_iops.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 800dd4f98d4e..58973bb46038 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -225,8 +225,8 @@ xfs_vn_mknod(
 		if (IS_ERR(default_acl))
 			return -PTR_ERR(default_acl);
 
-	if (!default_acl)
-		mode &= ~current_umask();
+		if (!default_acl)
+			mode &= ~current_umask();
 	}
 
 	xfs_dentry_to_name(&name, dentry);