Merge branch 'md-next' of https://github.com/liu-song-6/linux into for-5.4/block

Pull MD changes from Song. * 'md-next' of https://github.com/liu-song-6/linux: raid1: factor out a common routine to handle the completion of sync write md: don't call spare_active in md_reap_sync_thread if all member devices can't work md: don't set In_sync if array is frozen md: allow last device to be forcibly removed from RAID1/RAID10. md: Convert to use int_pow() md/raid10: end bio when the device faulty md/raid1: end bio when the device faulty md/raid6: Set R5_ReadError when there is read failure on parity disk raid1: use an int as the return value of raise_barrier()
2019-08-07 12:26:53 -06:00 · 2019-08-07 12:26:53 -06:00 · e8fc87f6a9
parent 00ec4f3039 449808a254
commit e8fc87f6a9
5 changed files with 101 additions and 61 deletions
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@ -1826,8 +1826,15 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
 				if (!(le32_to_cpu(sb->feature_map) &
 				      MD_FEATURE_RECOVERY_BITMAP))
 					rdev->saved_raid_disk = -1;
-			} else
+			} else {
-				set_bit(In_sync, &rdev->flags);
+				/*
 				 * If the array is FROZEN, then the device can't
 				 * be in_sync with rest of array.
 				 */
 				if (!test_bit(MD_RECOVERY_FROZEN,
 					      &mddev->recovery))
 					set_bit(In_sync, &rdev->flags);
 			}
 			rdev->raid_disk = role;
 			break;
 		}
@ -3664,11 +3671,7 @@ int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
 		return -EINVAL;
 	if (decimals < 0)
 		decimals = 0;
-	while (decimals < scale) {
+	*res = result * int_pow(10, scale - decimals);
 		result *= 10;
 		decimals ++;
 	}
 	*res = result;
 	return 0;
 }
@ -5182,6 +5185,34 @@ static struct md_sysfs_entry md_consistency_policy =
 __ATTR(consistency_policy, S_IRUGO | S_IWUSR, consistency_policy_show,
       consistency_policy_store);
 static ssize_t fail_last_dev_show(struct mddev *mddev, char *page)
 {
 	return sprintf(page, "%d\n", mddev->fail_last_dev);
 }
 /*
 * Setting fail_last_dev to true to allow last device to be forcibly removed
 * from RAID1/RAID10.
 */
 static ssize_t
 fail_last_dev_store(struct mddev *mddev, const char *buf, size_t len)
 {
 	int ret;
 	bool value;
 	ret = kstrtobool(buf, &value);
 	if (ret)
 		return ret;
 	if (value != mddev->fail_last_dev)
 		mddev->fail_last_dev = value;
 	return len;
 }
 static struct md_sysfs_entry md_fail_last_dev =
 __ATTR(fail_last_dev, S_IRUGO | S_IWUSR, fail_last_dev_show,
       fail_last_dev_store);
 static struct attribute *md_default_attrs[] = {
 	&md_level.attr,
 	&md_layout.attr,
@ -5198,6 +5229,7 @@ static struct attribute *md_default_attrs[] = {
 	&md_array_size.attr,
 	&max_corr_read_errors.attr,
 	&md_consistency_policy.attr,
 	&md_fail_last_dev.attr,
 	NULL,
 };
@ -9043,7 +9075,8 @@ void md_reap_sync_thread(struct mddev *mddev)
 	/* resync has finished, collect result */
 	md_unregister_thread(&mddev->sync_thread);
 	if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
-	    !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
+	    !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
 	    mddev->degraded != mddev->raid_disks) {
 		/* success...*/
 		/* activate any spares */
 		if (mddev->pers->spare_active(mddev)) {
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@ -487,6 +487,7 @@ struct mddev {
 	unsigned int			good_device_nr;	/* good device num within cluster raid */
 	bool	has_superblocks:1;
 	bool	fail_last_dev:1;
 };
 enum recovery_flags {
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@ -447,19 +447,21 @@ static void raid1_end_write_request(struct bio *bio)
 		    /* We never try FailFast to WriteMostly devices */
 		    !test_bit(WriteMostly, &rdev->flags)) {
 			md_error(r1_bio->mddev, rdev);
-			if (!test_bit(Faulty, &rdev->flags))
+		}
-				/* This is the only remaining device,
+
-				 * We need to retry the write without
+		/*
-				 * FailFast
+		 * When the device is faulty, it is not necessary to
-				 */
+		 * handle write error.
-				set_bit(R1BIO_WriteError, &r1_bio->state);
+		 * For failfast, this is the only remaining device,
-			else {
+		 * We need to retry the write without FailFast.
-				/* Finished with this branch */
+		 */
-				r1_bio->bios[mirror] = NULL;
+		if (!test_bit(Faulty, &rdev->flags))
 				to_put = bio;
 			}
 		} else
 			set_bit(R1BIO_WriteError, &r1_bio->state);
 		else {
 			/* Finished with this branch */
 			r1_bio->bios[mirror] = NULL;
 			to_put = bio;
 		}
 	} else {
 		/*
 		 * Set R1BIO_Uptodate in our master bio, so that we
@ -872,8 +874,11 @@ static void flush_pending_writes(struct r1conf *conf)
 * backgroup IO calls must call raise_barrier.  Once that returns
 *    there is no normal IO happeing.  It must arrange to call
 *    lower_barrier when the particular background IO completes.
 *
 * If resync/recovery is interrupted, returns -EINTR;
 * Otherwise, returns 0.
 */
-static sector_t raise_barrier(struct r1conf *conf, sector_t sector_nr)
+static int raise_barrier(struct r1conf *conf, sector_t sector_nr)
 {
 	int idx = sector_to_idx(sector_nr);
@ -1612,12 +1617,12 @@ static void raid1_error(struct mddev *mddev, struct md_rdev *rdev)
 	/*
 	 * If it is not operational, then we have already marked it as dead
-	 * else if it is the last working disks, ignore the error, let the
+	 * else if it is the last working disks with "fail_last_dev == false",
-	 * next level up know.
+	 * ignore the error, let the next level up know.
 	 * else mark the drive as failed
 	 */
 	spin_lock_irqsave(&conf->device_lock, flags);
-	if (test_bit(In_sync, &rdev->flags)
+	if (test_bit(In_sync, &rdev->flags) && !mddev->fail_last_dev
 	    && (conf->raid_disks - mddev->degraded) == 1) {
 		/*
 		 * Don't fail the drive, act as though we were just a
@ -1901,6 +1906,22 @@ static void abort_sync_write(struct mddev *mddev, struct r1bio *r1_bio)
 	} while (sectors_to_go > 0);
 }
 static void put_sync_write_buf(struct r1bio *r1_bio, int uptodate)
 {
 	if (atomic_dec_and_test(&r1_bio->remaining)) {
 		struct mddev *mddev = r1_bio->mddev;
 		int s = r1_bio->sectors;
 		if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
 		    test_bit(R1BIO_WriteError, &r1_bio->state))
 			reschedule_retry(r1_bio);
 		else {
 			put_buf(r1_bio);
 			md_done_sync(mddev, s, uptodate);
 		}
 	}
 }
 static void end_sync_write(struct bio *bio)
 {
 	int uptodate = !bio->bi_status;
@ -1927,16 +1948,7 @@ static void end_sync_write(struct bio *bio)
 		)
 		set_bit(R1BIO_MadeGood, &r1_bio->state);
-	if (atomic_dec_and_test(&r1_bio->remaining)) {
+	put_sync_write_buf(r1_bio, uptodate);
 		int s = r1_bio->sectors;
 		if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
 		    test_bit(R1BIO_WriteError, &r1_bio->state))
 			reschedule_retry(r1_bio);
 		else {
 			put_buf(r1_bio);
 			md_done_sync(mddev, s, uptodate);
 		}
 	}
 }
 static int r1_sync_page_io(struct md_rdev *rdev, sector_t sector,
@ -2219,17 +2231,7 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio)
 		generic_make_request(wbio);
 	}
-	if (atomic_dec_and_test(&r1_bio->remaining)) {
+	put_sync_write_buf(r1_bio, 1);
 		/* if we're here, all write(s) have completed, so clean up */
 		int s = r1_bio->sectors;
 		if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
 		    test_bit(R1BIO_WriteError, &r1_bio->state))
 			reschedule_retry(r1_bio);
 		else {
 			put_buf(r1_bio);
 			md_done_sync(mddev, s, 1);
 		}
 	}
 }
 /*
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@ -465,19 +465,21 @@ static void raid10_end_write_request(struct bio *bio)
 			if (test_bit(FailFast, &rdev->flags) &&
 			    (bio->bi_opf & MD_FAILFAST)) {
 				md_error(rdev->mddev, rdev);
-				if (!test_bit(Faulty, &rdev->flags))
+			}
-					/* This is the only remaining device,
+
-					 * We need to retry the write without
+			/*
-					 * FailFast
+			 * When the device is faulty, it is not necessary to
-					 */
+			 * handle write error.
-					set_bit(R10BIO_WriteError, &r10_bio->state);
+			 * For failfast, this is the only remaining device,
-				else {
+			 * We need to retry the write without FailFast.
-					r10_bio->devs[slot].bio = NULL;
+			 */
-					to_put = bio;
+			if (!test_bit(Faulty, &rdev->flags))
 					dec_rdev = 1;
 				}
 			} else
 				set_bit(R10BIO_WriteError, &r10_bio->state);
 			else {
 				r10_bio->devs[slot].bio = NULL;
 				to_put = bio;
 				dec_rdev = 1;
 			}
 		}
 	} else {
 		/*
@ -1638,12 +1640,12 @@ static void raid10_error(struct mddev *mddev, struct md_rdev *rdev)
 	/*
 	 * If it is not operational, then we have already marked it as dead
-	 * else if it is the last working disks, ignore the error, let the
+	 * else if it is the last working disks with "fail_last_dev == false",
-	 * next level up know.
+	 * ignore the error, let the next level up know.
 	 * else mark the drive as failed
 	 */
 	spin_lock_irqsave(&conf->device_lock, flags);
-	if (test_bit(In_sync, &rdev->flags)
+	if (test_bit(In_sync, &rdev->flags) && !mddev->fail_last_dev
 	    && !enough(conf, rdev->raid_disk)) {
 		/*
 		 * Don't fail the drive, just return an IO error.
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@ -2558,7 +2558,9 @@ static void raid5_end_read_request(struct bio * bi)
 		    && !test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
 			retry = 1;
 		if (retry)
-			if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) {
+			if (sh->qd_idx >= 0 && sh->pd_idx == i)
 				set_bit(R5_ReadError, &sh->dev[i].flags);
 			else if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) {
 				set_bit(R5_ReadError, &sh->dev[i].flags);
 				clear_bit(R5_ReadNoMerge, &sh->dev[i].flags);
 			} else