From a8bf2bc212e129dd59a8b06cdbc15079cc3bd876 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 24 Jun 2010 19:15:09 -0400 Subject: [PATCH 1/5] GFS2: O_TRUNC not working on stuffed files across cluster This patch replaces a statement that got dropped out by accident. Without the patch, truncates on stuffed (very small) files cause those files to have an unpredictable size. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/bmap.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 4a48c0f4b402..84da64b551b2 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1041,6 +1041,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size) if (gfs2_is_stuffed(ip)) { u64 dsize = size + sizeof(struct gfs2_inode); + ip->i_disksize = size; ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); From b7dc2df5725fe7355fd76000ead7e39728e1b8a9 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 23 Jun 2010 11:44:47 -0400 Subject: [PATCH 2/5] GFS2: recovery stuck on transaction lock This patch fixes bugzilla bug #590878: GFS2: recovery stuck on transaction lock. We set the frozen flag on the glock when we receive a completion that cannot be delivered due to blocked locks. At that point we check to see whether the first waiting holder has the noexp flag set. If the noexp lock is queued later, then we need to unfreeze the glock at that point in time, namely, in the glock work function. This patch was originally written by Steve Whitehouse, but since he's on holiday, I'm submitting it. It's been well tested with a complex recovery test called revolver. Signed-off-by: Steve Whitehouse Signed-off-by: Bob Peterson --- fs/gfs2/glock.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index ddcdbf493536..dbab3fdc2582 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -706,8 +706,18 @@ static void glock_work_func(struct work_struct *work) { unsigned long delay = 0; struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work); + struct gfs2_holder *gh; int drop_ref = 0; + if (unlikely(test_bit(GLF_FROZEN, &gl->gl_flags))) { + spin_lock(&gl->gl_spin); + gh = find_first_waiter(gl); + if (gh && (gh->gh_flags & LM_FLAG_NOEXP) && + test_and_clear_bit(GLF_FROZEN, &gl->gl_flags)) + set_bit(GLF_REPLY_PENDING, &gl->gl_flags); + spin_unlock(&gl->gl_spin); + } + if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) { finish_xmote(gl, gl->gl_reply); drop_ref = 1; From b1becbdee776b447f203aa8da9a40488d5a75e1d Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 17 Jun 2010 16:45:37 -0400 Subject: [PATCH 3/5] GFS2: Fix kernel NULL pointer dereference by dlm_astd This patch fixes a problem in an error path when looking up dinodes. There are two sister-functions, gfs2_inode_lookup and gfs2_process_unlinked_inode. Both functions acquire and hold the i_iopen glock for the dinode being looked up. The last thing they try to do is hold the i_gl glock for the dinode. If that glock fails for some reason, the error path was incorrectly calling gfs2_glock_put for the i_iopen glock twice. This resulted in the glock being prematurely freed. The "minimum hold time" usually kept the glock in memory, but the lock interface to dlm (aka lock_dlm) freed its memory for the glock. In some circumstances, it would cause dlm's dlm_astd daemon to try to call the bast function for the freed lock_dlm memory, which resulted in a NULL pointer dereference. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/inode.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index b5612cbb62a5..f03afd9c44bc 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -169,7 +169,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, { struct inode *inode; struct gfs2_inode *ip; - struct gfs2_glock *io_gl; + struct gfs2_glock *io_gl = NULL; int error; inode = gfs2_iget(sb, no_addr); @@ -198,6 +198,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, ip->i_iopen_gh.gh_gl->gl_object = ip; gfs2_glock_put(io_gl); + io_gl = NULL; if ((type == DT_UNKNOWN) && (no_formal_ino == 0)) goto gfs2_nfsbypass; @@ -228,7 +229,8 @@ gfs2_nfsbypass: fail_glock: gfs2_glock_dq(&ip->i_iopen_gh); fail_iopen: - gfs2_glock_put(io_gl); + if (io_gl) + gfs2_glock_put(io_gl); fail_put: if (inode->i_state & I_NEW) ip->i_gl->gl_object = NULL; @@ -256,7 +258,7 @@ void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr) { struct gfs2_sbd *sdp; struct gfs2_inode *ip; - struct gfs2_glock *io_gl; + struct gfs2_glock *io_gl = NULL; int error; struct gfs2_holder gh; struct inode *inode; @@ -293,6 +295,7 @@ void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr) ip->i_iopen_gh.gh_gl->gl_object = ip; gfs2_glock_put(io_gl); + io_gl = NULL; inode->i_mode = DT2IF(DT_UNKNOWN); @@ -319,7 +322,8 @@ void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr) fail_glock: gfs2_glock_dq(&ip->i_iopen_gh); fail_iopen: - gfs2_glock_put(io_gl); + if (io_gl) + gfs2_glock_put(io_gl); fail_put: ip->i_gl->gl_object = NULL; gfs2_glock_put(ip->i_gl); From 8b4216018bdbfbb1b76150d202b15ee68c38e991 Mon Sep 17 00:00:00 2001 From: Abhijith Das Date: Sun, 4 Jul 2010 01:33:24 -0400 Subject: [PATCH 4/5] GFS2: BUG in gfs2_adjust_quota HighMem pages on i686 do not get mapped to the buffer_heads and this was causing a NULL pointer dereference when we were trying to memset page buffers to zero. We now use zero_user() that kmaps the page and directly manipulates page data. This patch also fixes a boundary condition that was incorrect. Signed-off-by: Abhi Das Signed-off-by: Steven Whitehouse --- fs/gfs2/quota.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 49667d68769e..b256d6f24288 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -694,10 +694,8 @@ get_a_page: if (!buffer_mapped(bh)) goto unlock_out; /* If it's a newly allocated disk block for quota, zero it */ - if (buffer_new(bh)) { - memset(bh->b_data, 0, bh->b_size); - set_buffer_uptodate(bh); - } + if (buffer_new(bh)) + zero_user(page, pos - blocksize, bh->b_size); } if (PageUptodate(page)) @@ -723,7 +721,7 @@ get_a_page: /* If quota straddles page boundary, we need to update the rest of the * quota at the beginning of the next page */ - if (offset != 0) { /* first page, offset is closer to PAGE_CACHE_SIZE */ + if ((offset + sizeof(struct gfs2_quota)) > PAGE_CACHE_SIZE) { ptr = ptr + nbytes; nbytes = sizeof(struct gfs2_quota) - nbytes; offset = 0; From 728a756b8fcd22d80e2dbba8117a8a3aafd3f203 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 14 Jul 2010 18:12:26 -0400 Subject: [PATCH 5/5] GFS2: rename causes kernel Oops This patch fixes a kernel Oops in the GFS2 rename code. The problem was in the way the gfs2 directory code was trying to re-use sentinel directory entries. In the failing case, gfs2's rename function was renaming a file to another name that had the same non-trivial length. The file being renamed happened to be the first directory entry on the leaf block. First, the rename code (gfs2_rename in ops_inode.c) found the original directory entry and decided it could do its job by simply replacing the directory entry with another. Therefore it determined correctly that no block allocations were needed. Next, the rename code deleted the old directory entry prior to replacing it with the new name. Therefore, the soon-to-be replaced directory entry was temporarily made into a directory entry "sentinel" or a place holder at the start of a leaf block. Lastly, it went to re-add the replacement directory entry in that leaf block. However, when gfs2_dirent_find_space was looking for space in the leaf block, it used the wrong value for the sentinel. That threw off its calculations so later it decides it can't really re-use the sentinel and therefore must allocate a new leaf block. But because it previously decided to re-use the directory entry, it didn't waste the time to grab a new block allocation for the inode. Therefore, the inode's i_alloc pointer was still NULL and it crashes trying to reference it. In the case of sentinel directory entries, the entire dirent is reused, not just the "free space" portion of it, and therefore the function gfs2_dirent_find_space should use the value 0 rather than GFS2_DIRENT_SIZE(0) for the actual dirent size. Fixing this calculation enables the reproducer programs to work properly. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 8295c5b5d4a9..26ca3361a8bc 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -392,7 +392,7 @@ static int gfs2_dirent_find_space(const struct gfs2_dirent *dent, unsigned totlen = be16_to_cpu(dent->de_rec_len); if (gfs2_dirent_sentinel(dent)) - actual = GFS2_DIRENT_SIZE(0); + actual = 0; if (totlen - actual >= required) return 1; return 0;