md/raid10: Refactor raid10_make_request

Refactor raid10_make_request into seperate read and write functions to clean up the code. Shaohua: add the recovery check back to read path Signed-off-by: Robert LeBlanc <robert@leblancnet.us> Signed-off-by: Shaohua Li <shli@fb.com>
2016-12-05 13:02:58 -07:00 · 2016-12-05 13:02:58 -07:00 · bb5f1ed70b
parent 3b046a97cb
commit bb5f1ed70b
1 changed files with 140 additions and 105 deletions
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@ -1087,25 +1087,18 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
 	kfree(plug);
 }

-static void __make_request(struct mddev *mddev, struct bio *bio)
+static void raid10_read_request(struct mddev *mddev, struct bio *bio,
+				struct r10bio *r10_bio)
 {
 	struct r10conf *conf = mddev->private;
-	struct r10bio *r10_bio;
 	struct bio *read_bio;
-	int i;
 	const int op = bio_op(bio);
-	const int rw = bio_data_dir(bio);
 	const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
-	const unsigned long do_fua = (bio->bi_opf & REQ_FUA);
-	unsigned long flags;
-	struct md_rdev *blocked_rdev;
-	struct blk_plug_cb *cb;
-	struct raid10_plug_cb *plug = NULL;
 	int sectors_handled;
 	int max_sectors;
-	int sectors;
-
-	md_write_start(mddev, bio);
+	sector_t sectors;
+	struct md_rdev *rdev;
+	int slot;

 	/*
 	 * Register the new request and wait if the reconstruction
@ -1118,8 +1111,9 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
 	while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
 	    bio->bi_iter.bi_sector < conf->reshape_progress &&
 	    bio->bi_iter.bi_sector + sectors > conf->reshape_progress) {
-		/* IO spans the reshape position.  Need to wait for
-		 * reshape to pass
+		/*
+		 * IO spans the reshape position.  Need to wait for reshape to
+		 * pass
 		 */
 		raid10_log(conf->mddev, "wait reshape");
 		allow_barrier(conf);
@ -1129,50 +1123,6 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
 			   sectors);
 		wait_barrier(conf);
 	}
-	if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
-	    bio_data_dir(bio) == WRITE &&
-	    (mddev->reshape_backwards
-	     ? (bio->bi_iter.bi_sector < conf->reshape_safe &&
-		bio->bi_iter.bi_sector + sectors > conf->reshape_progress)
-	     : (bio->bi_iter.bi_sector + sectors > conf->reshape_safe &&
-		bio->bi_iter.bi_sector < conf->reshape_progress))) {
-		/* Need to update reshape_position in metadata */
-		mddev->reshape_position = conf->reshape_progress;
-		set_mask_bits(&mddev->sb_flags, 0,
-			      BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_PENDING));
-		md_wakeup_thread(mddev->thread);
-		raid10_log(conf->mddev, "wait reshape metadata");
-		wait_event(mddev->sb_wait,
-			   !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
-
-		conf->reshape_safe = mddev->reshape_position;
-	}
-
-	r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
-
-	r10_bio->master_bio = bio;
-	r10_bio->sectors = sectors;
-
-	r10_bio->mddev = mddev;
-	r10_bio->sector = bio->bi_iter.bi_sector;
-	r10_bio->state = 0;
-
-	/* We might need to issue multiple reads to different
-	 * devices if there are bad blocks around, so we keep
-	 * track of the number of reads in bio->bi_phys_segments.
-	 * If this is 0, there is only one r10_bio and no locking
-	 * will be needed when the request completes.  If it is
-	 * non-zero, then it is the number of not-completed requests.
-	 */
-	bio->bi_phys_segments = 0;
-	bio_clear_flag(bio, BIO_SEG_VALID);
-
-	if (rw == READ) {
-		/*
-		 * read balancing logic:
-		 */
-		struct md_rdev *rdev;
-		int slot;

 read_again:
 	rdev = read_balance(conf, r10_bio, &max_sectors);
@ -1204,8 +1154,9 @@ read_again:
 	                              read_bio, disk_devt(mddev->gendisk),
 	                              r10_bio->sector);
 	if (max_sectors < r10_bio->sectors) {
-			/* Could not read all from this device, so we will
-			 * need another r10_bio.
+		/*
+		 * Could not read all from this device, so we will need another
+		 * r10_bio.
 		 */
 		sectors_handled = (r10_bio->sector + max_sectors
 				   - bio->bi_iter.bi_sector);
@ -1216,10 +1167,11 @@ read_again:
 		else
 			bio->bi_phys_segments++;
 		spin_unlock_irq(&conf->device_lock);
-			/* Cannot call generic_make_request directly
-			 * as that will be queued in __generic_make_request
-			 * and subsequent mempool_alloc might block
-			 * waiting for it.  so hand bio over to raid10d.
+		/*
+		 * Cannot call generic_make_request directly as that will be
+		 * queued in __generic_make_request and subsequent
+		 * mempool_alloc might block waiting for it.  so hand bio over
+		 * to raid10d.
 		 */
 		reschedule_retry(r10_bio);

@ -1229,17 +1181,73 @@ read_again:
 		r10_bio->sectors = bio_sectors(bio) - sectors_handled;
 		r10_bio->state = 0;
 		r10_bio->mddev = mddev;
-			r10_bio->sector = bio->bi_iter.bi_sector +
-				sectors_handled;
+		r10_bio->sector = bio->bi_iter.bi_sector + sectors_handled;
 		goto read_again;
 	} else
 		generic_make_request(read_bio);
 	return;
 }

+static void raid10_write_request(struct mddev *mddev, struct bio *bio,
+				 struct r10bio *r10_bio)
+{
+	struct r10conf *conf = mddev->private;
+	int i;
+	const int op = bio_op(bio);
+	const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
+	const unsigned long do_fua = (bio->bi_opf & REQ_FUA);
+	unsigned long flags;
+	struct md_rdev *blocked_rdev;
+	struct blk_plug_cb *cb;
+	struct raid10_plug_cb *plug = NULL;
+	sector_t sectors;
+	int sectors_handled;
+	int max_sectors;
+
+	md_write_start(mddev, bio);
+
 	/*
-	 * WRITE:
+	 * Register the new request and wait if the reconstruction
+	 * thread has put up a bar for new requests.
+	 * Continue immediately if no resync is active currently.
 	 */
+	wait_barrier(conf);
+
+	sectors = bio_sectors(bio);
+	while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
+	    bio->bi_iter.bi_sector < conf->reshape_progress &&
+	    bio->bi_iter.bi_sector + sectors > conf->reshape_progress) {
+		/*
+		 * IO spans the reshape position.  Need to wait for reshape to
+		 * pass
+		 */
+		raid10_log(conf->mddev, "wait reshape");
+		allow_barrier(conf);
+		wait_event(conf->wait_barrier,
+			   conf->reshape_progress <= bio->bi_iter.bi_sector ||
+			   conf->reshape_progress >= bio->bi_iter.bi_sector +
+			   sectors);
+		wait_barrier(conf);
+	}
+
+	if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
+	    (mddev->reshape_backwards
+	     ? (bio->bi_iter.bi_sector < conf->reshape_safe &&
+		bio->bi_iter.bi_sector + sectors > conf->reshape_progress)
+	     : (bio->bi_iter.bi_sector + sectors > conf->reshape_safe &&
+		bio->bi_iter.bi_sector < conf->reshape_progress))) {
+		/* Need to update reshape_position in metadata */
+		mddev->reshape_position = conf->reshape_progress;
+		set_mask_bits(&mddev->sb_flags, 0,
+			      BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_PENDING));
+		md_wakeup_thread(mddev->thread);
+		raid10_log(conf->mddev, "wait reshape metadata");
+		wait_event(mddev->sb_wait,
+			   !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
+
+		conf->reshape_safe = mddev->reshape_position;
+	}
+
 	if (conf->pending_count >= max_queued_requests) {
 		md_wakeup_thread(mddev->thread);
 		raid10_log(mddev, "wait queued");
@ -1300,8 +1308,7 @@ retry_write:
 			int bad_sectors;
 			int is_bad;

-			is_bad = is_badblock(rdev, dev_sector,
-					     max_sectors,
+			is_bad = is_badblock(rdev, dev_sector, max_sectors,
 					     &first_bad, &bad_sectors);
 			if (is_bad < 0) {
 				/* Mustn't write here until the bad block
@ -1405,8 +1412,7 @@ retry_write:
 			r10_bio->devs[i].bio = mbio;

 			mbio->bi_iter.bi_sector	= (r10_bio->devs[i].addr+
-					   choose_data_offset(r10_bio,
-							      rdev));
+					   choose_data_offset(r10_bio, rdev));
 			mbio->bi_bdev = rdev->bdev;
 			mbio->bi_end_io	= raid10_end_write_request;
 			bio_set_op_attrs(mbio, op, do_sync | do_fua);
@ -1457,8 +1463,7 @@ retry_write:
 			r10_bio->devs[i].repl_bio = mbio;

 			mbio->bi_iter.bi_sector	= (r10_bio->devs[i].addr +
-					   choose_data_offset(
-						   r10_bio, rdev));
+					   choose_data_offset(r10_bio, rdev));
 			mbio->bi_bdev = rdev->bdev;
 			mbio->bi_end_io	= raid10_end_write_request;
 			bio_set_op_attrs(mbio, op, do_sync | do_fua);
@ -1503,6 +1508,36 @@ retry_write:
 	one_write_done(r10_bio);
 }

+static void __make_request(struct mddev *mddev, struct bio *bio)
+{
+	struct r10conf *conf = mddev->private;
+	struct r10bio *r10_bio;
+
+	r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
+
+	r10_bio->master_bio = bio;
+	r10_bio->sectors = bio_sectors(bio);
+
+	r10_bio->mddev = mddev;
+	r10_bio->sector = bio->bi_iter.bi_sector;
+	r10_bio->state = 0;
+
+	/*
+	 * We might need to issue multiple reads to different devices if there
+	 * are bad blocks around, so we keep track of the number of reads in
+	 * bio->bi_phys_segments.  If this is 0, there is only one r10_bio and
+	 * no locking will be needed when the request completes.  If it is
+	 * non-zero, then it is the number of not-completed requests.
+	 */
+	bio->bi_phys_segments = 0;
+	bio_clear_flag(bio, BIO_SEG_VALID);
+
+	if (bio_data_dir(bio) == READ)
+		raid10_read_request(mddev, bio, r10_bio);
+	else
+		raid10_write_request(mddev, bio, r10_bio);
+}
+
 static void raid10_make_request(struct mddev *mddev, struct bio *bio)
 {
 	struct r10conf *conf = mddev->private;