From 79272b3562bb44ce7dc720cd13136f5a4a53c618 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Fri, 20 Jun 2014 09:36:41 -0400 Subject: [PATCH 1/8] GFS2: Only wait for demote when last holder is dequeued Function gfs2_glock_dq_wait is supposed to dequeue a glock and then wait for the lock to be demoted. The problem is, if this is a shared lock, its demote will depend on the other holders, which means you might end up waiting forever because the other process is blocked. This problem is especially apparent when dealing with nested flocks. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index c355f7320e44..278fae5b6982 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1128,7 +1128,9 @@ void gfs2_glock_dq_wait(struct gfs2_holder *gh) struct gfs2_glock *gl = gh->gh_gl; gfs2_glock_dq(gh); might_sleep(); - wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, TASK_UNINTERRUPTIBLE); + if (!find_first_holder(gl)) + wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, + TASK_UNINTERRUPTIBLE); } /** From 94a09a3999ee978e097b5aad74034ed43bae56db Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 23 Jun 2014 14:43:32 +0100 Subject: [PATCH 2/8] GFS2: Fix race in glock lru glock disposal We must not leave items on the LRU list with GLF_LOCK set, since they can be removed if the glock is brought back into use, which may then potentially result in a hang, waiting for GLF_LOCK to clear. It doesn't happen very often, since it requires a glock that has not been used for a long time to be brought back into use at the same moment that the shrinker is part way through disposing of glocks. The fix is to set GLF_LOCK at a later time, when we already know that the other locks can be obtained. Also, we now only release the lru_lock in case a resched is needed, rather than on every iteration. Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 278fae5b6982..c1e5b126d2ca 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1406,12 +1406,16 @@ __acquires(&lru_lock) gl = list_entry(list->next, struct gfs2_glock, gl_lru); list_del_init(&gl->gl_lru); if (!spin_trylock(&gl->gl_spin)) { +add_back_to_lru: list_add(&gl->gl_lru, &lru_list); atomic_inc(&lru_count); continue; } + if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { + spin_unlock(&gl->gl_spin); + goto add_back_to_lru; + } clear_bit(GLF_LRU, &gl->gl_flags); - spin_unlock(&lru_lock); gl->gl_lockref.count++; if (demote_ok(gl)) handle_callback(gl, LM_ST_UNLOCKED, 0, false); @@ -1419,7 +1423,7 @@ __acquires(&lru_lock) if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) gl->gl_lockref.count--; spin_unlock(&gl->gl_spin); - spin_lock(&lru_lock); + cond_resched_lock(&lru_lock); } } @@ -1444,7 +1448,7 @@ static long gfs2_scan_glock_lru(int nr) gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru); /* Test for being demotable */ - if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { + if (!test_bit(GLF_LOCK, &gl->gl_flags)) { list_move(&gl->gl_lru, &dispose); atomic_dec(&lru_count); freed++; From fe0bbd2986996b9efe3a78bf5a591b0496c7afea Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 23 Jun 2014 14:50:20 +0100 Subject: [PATCH 3/8] GFS2: Use GFP_NOFS when allocating glocks Normally GFP_KERNEL is ok here, but there is now a rarely used code path relating to deallocation of unlinked inodes (in certain corner cases) which if hit at times of memory shortage can cause recursion while trying to free memory. One solution would be to try and move the gfs2_glock_get() call so that it is no longer called while another glock is held, but that doesn't look at all easy, so GFP_NOFS is the best solution for the time being. Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index c1e5b126d2ca..b703dcc91588 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -731,14 +731,14 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, cachep = gfs2_glock_aspace_cachep; else cachep = gfs2_glock_cachep; - gl = kmem_cache_alloc(cachep, GFP_KERNEL); + gl = kmem_cache_alloc(cachep, GFP_NOFS); if (!gl) return -ENOMEM; memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb)); if (glops->go_flags & GLOF_LVB) { - gl->gl_lksb.sb_lvbptr = kzalloc(GFS2_MIN_LVB_SIZE, GFP_KERNEL); + gl->gl_lksb.sb_lvbptr = kzalloc(GFS2_MIN_LVB_SIZE, GFP_NOFS); if (!gl->gl_lksb.sb_lvbptr) { kmem_cache_free(cachep, gl); return -ENOMEM; From 6ec43b1838bd71633ac3f853c63ddf1f5940b1ed Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Wed, 25 Jun 2014 20:40:45 +0200 Subject: [PATCH 4/8] GFS2: replace count*size kzalloc by kcalloc kcalloc manages count*sizeof overflow. Cc: cluster-devel@redhat.com Signed-off-by: Fabian Frederick Signed-off-by: Steven Whitehouse --- fs/gfs2/lock_dlm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 91f274de1246..4fafea1c9ecf 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -1036,8 +1036,8 @@ static int set_recover_size(struct gfs2_sbd *sdp, struct dlm_slot *slots, new_size = old_size + RECOVER_SIZE_INC; - submit = kzalloc(new_size * sizeof(uint32_t), GFP_NOFS); - result = kzalloc(new_size * sizeof(uint32_t), GFP_NOFS); + submit = kcalloc(new_size, sizeof(uint32_t), GFP_NOFS); + result = kcalloc(new_size, sizeof(uint32_t), GFP_NOFS); if (!submit || !result) { kfree(submit); kfree(result); From 5bef3e7cf18c56cc733777c61b6b61a0b8a62b35 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 26 Jun 2014 10:46:25 -0400 Subject: [PATCH 5/8] GFS2: Allow flocks to use normal glock dq rather than dq_wait This patch allows flock glocks to use a non-blocking dequeue rather than dq_wait. It also reverts the previous patch I had posted regarding dq_wait. The reverted patch isn't necessarily a bad idea, but I decided this might avoid unforeseen side effects, and was therefore safer. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/file.c | 2 +- fs/gfs2/glock.c | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 4fc3a3046174..491e8e023598 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -991,7 +991,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl) goto out; flock_lock_file_wait(file, &(struct file_lock){.fl_type = F_UNLCK}); - gfs2_glock_dq_wait(fl_gh); + gfs2_glock_dq(fl_gh); gfs2_holder_reinit(state, flags, fl_gh); } else { error = gfs2_glock_get(GFS2_SB(&ip->i_inode), ip->i_no_addr, diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index b703dcc91588..ee4e04fe60fc 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1128,9 +1128,7 @@ void gfs2_glock_dq_wait(struct gfs2_holder *gh) struct gfs2_glock *gl = gh->gh_gl; gfs2_glock_dq(gh); might_sleep(); - if (!find_first_holder(gl)) - wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, - TASK_UNINTERRUPTIBLE); + wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, TASK_UNINTERRUPTIBLE); } /** From 97a4f1d7653684fff0d50e9328917506f06e9d79 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 26 Jun 2014 10:47:48 -0400 Subject: [PATCH 6/8] GFS2: Allow caching of glocks for flock This patch removes the GLF_NOCACHE flag from the glocks associated with flocks. There should be no good reason not to cache glocks for flocks: they only force the glock to be demoted before they can be reacquired, which can slow down performance and even cause glock hangs, especially in cases where the flocks are held in Shared (SH) mode. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 491e8e023598..26b3f952e6b1 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -981,7 +981,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl) int error = 0; state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED; - flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE; + flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT; mutex_lock(&fp->f_fl_mutex); From 6b49d1d9c3c1088758c6a2758aaa5d236ef609e2 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 29 Jun 2014 12:21:39 +0200 Subject: [PATCH 7/8] GFS2: memcontrol: Spelling s/invlidate/invalidate/ Signed-off-by: Geert Uytterhoeven Cc: cluster-devel@redhat.com Signed-off-by: Steven Whitehouse --- fs/gfs2/glops.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index fc1100781bbc..2ffc67dce87f 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -234,8 +234,8 @@ static void inode_go_sync(struct gfs2_glock *gl) * inode_go_inval - prepare a inode glock to be released * @gl: the glock * @flags: - * - * Normally we invlidate everything, but if we are moving into + * + * Normally we invalidate everything, but if we are moving into * LM_ST_DEFERRED from LM_ST_SHARED or LM_ST_EXCLUSIVE then we * can keep hold of the metadata, since it won't have changed. * From 27ff6a0f7f5bf500e9d2a8760c062789b52c551f Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Wed, 2 Jul 2014 22:05:27 +0200 Subject: [PATCH 8/8] GFS2: fs/gfs2/rgrp.c: kernel-doc warning fixes Cc: cluster-devel@redhat.com Signed-off-by: Fabian Frederick Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index db629d1bd1bd..f4cb9c0d6bbd 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -337,7 +337,7 @@ static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *le /** * gfs2_free_extlen - Return extent length of free blocks - * @rbm: Starting position + * @rrbm: Starting position * @len: Max length to check * * Starting at the block specified by the rbm, see how many free blocks @@ -2522,7 +2522,7 @@ void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state) /** * gfs2_rlist_free - free a resource group list - * @list: the list of resource groups + * @rlist: the list of resource groups * */