From 0c775d5208284700de423e6746259da54a42e1f5 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 25 Nov 2013 11:12:43 +1100 Subject: [PATCH 1/3] md/raid5: fix new memory-reference bug in alloc_thread_groups. In alloc_thread_groups, worker_groups is a pointer to an array, not an array of pointers. So worker_groups[i] is wrong. It should be &(*worker_groups)[i] Found-by: coverity Fixes: 60aaf9338545 Reported-by: Ben Hutchings Cc: majianpeng Signed-off-by: NeilBrown --- drivers/md/raid5.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 47da0af6322b..676d8b7c5117 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5471,7 +5471,7 @@ static int alloc_thread_groups(struct r5conf *conf, int cnt, for (i = 0; i < *group_cnt; i++) { struct r5worker_group *group; - group = worker_groups[i]; + group = &(*worker_groups)[i]; INIT_LIST_HEAD(&group->handle_list); group->conf = conf; group->workers = workers + i * cnt; From 142d44c310819e1965ca70b4d55d7679f5797e25 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 28 Nov 2013 10:34:18 +1100 Subject: [PATCH 2/3] md: test mddev->flags more safely in md_check_recovery. commit 7a0a5355cbc71efa md: Don't test all of mddev->flags at once. made most tests on mddev->flags safer, but missed one. When commit 260fa034ef7a4ff8b7306 md: avoid deadlock when dirty buffers during md_stop. added MD_STILL_CLOSED, this caused md_check_recovery to misbehave. It can think there is something to do but find nothing. This can lead to the md thread spinning during array shutdown. https://bugzilla.kernel.org/show_bug.cgi?id=65721 Reported-and-tested-by: Richard W.M. Jones Fixes: 260fa034ef7a4ff8b7306 Cc: stable@vger.kernel.org (3.12) Signed-off-by: NeilBrown --- drivers/md/md.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index b6b7a2866c9e..e60cebf3f519 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -7777,7 +7777,7 @@ void md_check_recovery(struct mddev *mddev) if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) return; if ( ! ( - (mddev->flags & ~ (1<flags & MD_UPDATE_SB_FLAGS & ~ (1<recovery) || test_bit(MD_RECOVERY_DONE, &mddev->recovery) || (mddev->external == 0 && mddev->safemode == 1) || From 6d183de4077191d1201283a9035ce57a9b05254d Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 28 Nov 2013 10:55:27 +1100 Subject: [PATCH 3/3] md/raid5: fix newly-broken locking in get_active_stripe. commit 566c09c53455d7c4f1 raid5: relieve lock contention in get_active_stripe() modified the locking in get_active_stripe() reducing the range protected by the (highly contended) device_lock. Unfortunately it reduced the range too much opening up some races. One race can occur if get_priority_stripe runs between the test on sh->count and device_lock being taken. This will mean that sh->lru is not empty while get_active_stripe thinks ->count is zero resulting in a 'BUG' firing. Another race happens if __release_stripe is called immediately after sh->count is tested and found to be non-zero. If STRIPE_HANDLE is not set, get_active_stripe should increment ->active_stripes when it increments ->count from 0, but as it didn't think it was 0, it doesn't. Extending device_lock to cover the test on sh->count close these races. While we are here, fix the two BUG tests: -If count is zero, then lru really must not be empty, or we've lock the stripe_head somehow - no other tests are relevant. -STRIPE_ON_RELEASE_LIST is completely independent of ->lru so testing it is pointless. Reported-and-tested-by: Brassow Jonathan Reviewed-by: Shaohua Li Fixes: 566c09c53455d7c4f1 Signed-off-by: NeilBrown --- drivers/md/raid5.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 676d8b7c5117..cc055da02e2a 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -678,26 +678,23 @@ get_active_stripe(struct r5conf *conf, sector_t sector, } else init_stripe(sh, sector, previous); } else { + spin_lock(&conf->device_lock); if (atomic_read(&sh->count)) { BUG_ON(!list_empty(&sh->lru) && !test_bit(STRIPE_EXPANDING, &sh->state) && !test_bit(STRIPE_ON_UNPLUG_LIST, &sh->state) - && !test_bit(STRIPE_ON_RELEASE_LIST, &sh->state)); + ); } else { - spin_lock(&conf->device_lock); if (!test_bit(STRIPE_HANDLE, &sh->state)) atomic_inc(&conf->active_stripes); - if (list_empty(&sh->lru) && - !test_bit(STRIPE_ON_RELEASE_LIST, &sh->state) && - !test_bit(STRIPE_EXPANDING, &sh->state)) - BUG(); + BUG_ON(list_empty(&sh->lru)); list_del_init(&sh->lru); if (sh->group) { sh->group->stripes_cnt--; sh->group = NULL; } - spin_unlock(&conf->device_lock); } + spin_unlock(&conf->device_lock); } } while (sh == NULL);