dm zoned: reduce overhead of backing device checks
commit5.4-rM2-2.2.x-imx-squashede7fad909b6
upstream. Commit75d66ffb48
added backing device health checks and as a part of these checks, check_events() block ops template call is invoked in dm-zoned mapping path as well as in reclaim and flush path. Calling check_events() with ATA or SCSI backing devices introduces a blocking scsi_test_unit_ready() call being made in sd_check_events(). Even though the overhead of calling scsi_test_unit_ready() is small for ATA zoned devices, it is much larger for SCSI and it affects performance in a very negative way. Fix this performance regression by executing check_events() only in case of any I/O errors. The function dmz_bdev_is_dying() is modified to call only blk_queue_dying(), while calls to check_events() are made in a new helper function, dmz_check_bdev(). Reported-by: zhangxiaoxu <zhangxiaoxu5@huawei.com> Fixes:75d66ffb48
("dm zoned: properly handle backing device failure") Cc: stable@vger.kernel.org Signed-off-by: Dmitry Fomichev <dmitry.fomichev@wdc.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
parent
26fe630624
commit
fca436251d
|
@ -554,6 +554,7 @@ static struct dmz_mblock *dmz_get_mblock(struct dmz_metadata *zmd,
|
||||||
TASK_UNINTERRUPTIBLE);
|
TASK_UNINTERRUPTIBLE);
|
||||||
if (test_bit(DMZ_META_ERROR, &mblk->state)) {
|
if (test_bit(DMZ_META_ERROR, &mblk->state)) {
|
||||||
dmz_release_mblock(zmd, mblk);
|
dmz_release_mblock(zmd, mblk);
|
||||||
|
dmz_check_bdev(zmd->dev);
|
||||||
return ERR_PTR(-EIO);
|
return ERR_PTR(-EIO);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -625,6 +626,8 @@ static int dmz_rdwr_block(struct dmz_metadata *zmd, int op, sector_t block,
|
||||||
ret = submit_bio_wait(bio);
|
ret = submit_bio_wait(bio);
|
||||||
bio_put(bio);
|
bio_put(bio);
|
||||||
|
|
||||||
|
if (ret)
|
||||||
|
dmz_check_bdev(zmd->dev);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -691,6 +694,7 @@ static int dmz_write_dirty_mblocks(struct dmz_metadata *zmd,
|
||||||
TASK_UNINTERRUPTIBLE);
|
TASK_UNINTERRUPTIBLE);
|
||||||
if (test_bit(DMZ_META_ERROR, &mblk->state)) {
|
if (test_bit(DMZ_META_ERROR, &mblk->state)) {
|
||||||
clear_bit(DMZ_META_ERROR, &mblk->state);
|
clear_bit(DMZ_META_ERROR, &mblk->state);
|
||||||
|
dmz_check_bdev(zmd->dev);
|
||||||
ret = -EIO;
|
ret = -EIO;
|
||||||
}
|
}
|
||||||
nr_mblks_submitted--;
|
nr_mblks_submitted--;
|
||||||
|
@ -768,7 +772,7 @@ int dmz_flush_metadata(struct dmz_metadata *zmd)
|
||||||
/* If there are no dirty metadata blocks, just flush the device cache */
|
/* If there are no dirty metadata blocks, just flush the device cache */
|
||||||
if (list_empty(&write_list)) {
|
if (list_empty(&write_list)) {
|
||||||
ret = blkdev_issue_flush(zmd->dev->bdev, GFP_NOIO, NULL);
|
ret = blkdev_issue_flush(zmd->dev->bdev, GFP_NOIO, NULL);
|
||||||
goto out;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -778,7 +782,7 @@ int dmz_flush_metadata(struct dmz_metadata *zmd)
|
||||||
*/
|
*/
|
||||||
ret = dmz_log_dirty_mblocks(zmd, &write_list);
|
ret = dmz_log_dirty_mblocks(zmd, &write_list);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out;
|
goto err;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The log is on disk. It is now safe to update in place
|
* The log is on disk. It is now safe to update in place
|
||||||
|
@ -786,11 +790,11 @@ int dmz_flush_metadata(struct dmz_metadata *zmd)
|
||||||
*/
|
*/
|
||||||
ret = dmz_write_dirty_mblocks(zmd, &write_list, zmd->mblk_primary);
|
ret = dmz_write_dirty_mblocks(zmd, &write_list, zmd->mblk_primary);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out;
|
goto err;
|
||||||
|
|
||||||
ret = dmz_write_sb(zmd, zmd->mblk_primary);
|
ret = dmz_write_sb(zmd, zmd->mblk_primary);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out;
|
goto err;
|
||||||
|
|
||||||
while (!list_empty(&write_list)) {
|
while (!list_empty(&write_list)) {
|
||||||
mblk = list_first_entry(&write_list, struct dmz_mblock, link);
|
mblk = list_first_entry(&write_list, struct dmz_mblock, link);
|
||||||
|
@ -805,16 +809,20 @@ int dmz_flush_metadata(struct dmz_metadata *zmd)
|
||||||
|
|
||||||
zmd->sb_gen++;
|
zmd->sb_gen++;
|
||||||
out:
|
out:
|
||||||
if (ret && !list_empty(&write_list)) {
|
|
||||||
spin_lock(&zmd->mblk_lock);
|
|
||||||
list_splice(&write_list, &zmd->mblk_dirty_list);
|
|
||||||
spin_unlock(&zmd->mblk_lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
dmz_unlock_flush(zmd);
|
dmz_unlock_flush(zmd);
|
||||||
up_write(&zmd->mblk_sem);
|
up_write(&zmd->mblk_sem);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
|
err:
|
||||||
|
if (!list_empty(&write_list)) {
|
||||||
|
spin_lock(&zmd->mblk_lock);
|
||||||
|
list_splice(&write_list, &zmd->mblk_dirty_list);
|
||||||
|
spin_unlock(&zmd->mblk_lock);
|
||||||
|
}
|
||||||
|
if (!dmz_check_bdev(zmd->dev))
|
||||||
|
ret = -EIO;
|
||||||
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1244,6 +1252,7 @@ static int dmz_update_zone(struct dmz_metadata *zmd, struct dm_zone *zone)
|
||||||
if (ret) {
|
if (ret) {
|
||||||
dmz_dev_err(zmd->dev, "Get zone %u report failed",
|
dmz_dev_err(zmd->dev, "Get zone %u report failed",
|
||||||
dmz_id(zmd, zone));
|
dmz_id(zmd, zone));
|
||||||
|
dmz_check_bdev(zmd->dev);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -82,6 +82,7 @@ static int dmz_reclaim_align_wp(struct dmz_reclaim *zrc, struct dm_zone *zone,
|
||||||
"Align zone %u wp %llu to %llu (wp+%u) blocks failed %d",
|
"Align zone %u wp %llu to %llu (wp+%u) blocks failed %d",
|
||||||
dmz_id(zmd, zone), (unsigned long long)wp_block,
|
dmz_id(zmd, zone), (unsigned long long)wp_block,
|
||||||
(unsigned long long)block, nr_blocks, ret);
|
(unsigned long long)block, nr_blocks, ret);
|
||||||
|
dmz_check_bdev(zrc->dev);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -489,12 +490,7 @@ static void dmz_reclaim_work(struct work_struct *work)
|
||||||
ret = dmz_do_reclaim(zrc);
|
ret = dmz_do_reclaim(zrc);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
dmz_dev_debug(zrc->dev, "Reclaim error %d\n", ret);
|
dmz_dev_debug(zrc->dev, "Reclaim error %d\n", ret);
|
||||||
if (ret == -EIO)
|
if (!dmz_check_bdev(zrc->dev))
|
||||||
/*
|
|
||||||
* LLD might be performing some error handling sequence
|
|
||||||
* at the underlying device. To not interfere, do not
|
|
||||||
* attempt to schedule the next reclaim run immediately.
|
|
||||||
*/
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -80,6 +80,8 @@ static inline void dmz_bio_endio(struct bio *bio, blk_status_t status)
|
||||||
|
|
||||||
if (status != BLK_STS_OK && bio->bi_status == BLK_STS_OK)
|
if (status != BLK_STS_OK && bio->bi_status == BLK_STS_OK)
|
||||||
bio->bi_status = status;
|
bio->bi_status = status;
|
||||||
|
if (bio->bi_status != BLK_STS_OK)
|
||||||
|
bioctx->target->dev->flags |= DMZ_CHECK_BDEV;
|
||||||
|
|
||||||
if (refcount_dec_and_test(&bioctx->ref)) {
|
if (refcount_dec_and_test(&bioctx->ref)) {
|
||||||
struct dm_zone *zone = bioctx->zone;
|
struct dm_zone *zone = bioctx->zone;
|
||||||
|
@ -565,31 +567,51 @@ out:
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check the backing device availability. If it's on the way out,
|
* Check if the backing device is being removed. If it's on the way out,
|
||||||
* start failing I/O. Reclaim and metadata components also call this
|
* start failing I/O. Reclaim and metadata components also call this
|
||||||
* function to cleanly abort operation in the event of such failure.
|
* function to cleanly abort operation in the event of such failure.
|
||||||
*/
|
*/
|
||||||
bool dmz_bdev_is_dying(struct dmz_dev *dmz_dev)
|
bool dmz_bdev_is_dying(struct dmz_dev *dmz_dev)
|
||||||
{
|
{
|
||||||
struct gendisk *disk;
|
if (dmz_dev->flags & DMZ_BDEV_DYING)
|
||||||
|
return true;
|
||||||
|
|
||||||
if (!(dmz_dev->flags & DMZ_BDEV_DYING)) {
|
if (dmz_dev->flags & DMZ_CHECK_BDEV)
|
||||||
disk = dmz_dev->bdev->bd_disk;
|
return !dmz_check_bdev(dmz_dev);
|
||||||
if (blk_queue_dying(bdev_get_queue(dmz_dev->bdev))) {
|
|
||||||
dmz_dev_warn(dmz_dev, "Backing device queue dying");
|
if (blk_queue_dying(bdev_get_queue(dmz_dev->bdev))) {
|
||||||
dmz_dev->flags |= DMZ_BDEV_DYING;
|
dmz_dev_warn(dmz_dev, "Backing device queue dying");
|
||||||
} else if (disk->fops->check_events) {
|
dmz_dev->flags |= DMZ_BDEV_DYING;
|
||||||
if (disk->fops->check_events(disk, 0) &
|
|
||||||
DISK_EVENT_MEDIA_CHANGE) {
|
|
||||||
dmz_dev_warn(dmz_dev, "Backing device offline");
|
|
||||||
dmz_dev->flags |= DMZ_BDEV_DYING;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return dmz_dev->flags & DMZ_BDEV_DYING;
|
return dmz_dev->flags & DMZ_BDEV_DYING;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check the backing device availability. This detects such events as
|
||||||
|
* backing device going offline due to errors, media removals, etc.
|
||||||
|
* This check is less efficient than dmz_bdev_is_dying() and should
|
||||||
|
* only be performed as a part of error handling.
|
||||||
|
*/
|
||||||
|
bool dmz_check_bdev(struct dmz_dev *dmz_dev)
|
||||||
|
{
|
||||||
|
struct gendisk *disk;
|
||||||
|
|
||||||
|
dmz_dev->flags &= ~DMZ_CHECK_BDEV;
|
||||||
|
|
||||||
|
if (dmz_bdev_is_dying(dmz_dev))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
disk = dmz_dev->bdev->bd_disk;
|
||||||
|
if (disk->fops->check_events &&
|
||||||
|
disk->fops->check_events(disk, 0) & DISK_EVENT_MEDIA_CHANGE) {
|
||||||
|
dmz_dev_warn(dmz_dev, "Backing device offline");
|
||||||
|
dmz_dev->flags |= DMZ_BDEV_DYING;
|
||||||
|
}
|
||||||
|
|
||||||
|
return !(dmz_dev->flags & DMZ_BDEV_DYING);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Process a new BIO.
|
* Process a new BIO.
|
||||||
*/
|
*/
|
||||||
|
@ -902,8 +924,8 @@ static int dmz_prepare_ioctl(struct dm_target *ti, struct block_device **bdev)
|
||||||
{
|
{
|
||||||
struct dmz_target *dmz = ti->private;
|
struct dmz_target *dmz = ti->private;
|
||||||
|
|
||||||
if (dmz_bdev_is_dying(dmz->dev))
|
if (!dmz_check_bdev(dmz->dev))
|
||||||
return -ENODEV;
|
return -EIO;
|
||||||
|
|
||||||
*bdev = dmz->dev->bdev;
|
*bdev = dmz->dev->bdev;
|
||||||
|
|
||||||
|
|
|
@ -72,6 +72,7 @@ struct dmz_dev {
|
||||||
|
|
||||||
/* Device flags. */
|
/* Device flags. */
|
||||||
#define DMZ_BDEV_DYING (1 << 0)
|
#define DMZ_BDEV_DYING (1 << 0)
|
||||||
|
#define DMZ_CHECK_BDEV (2 << 0)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Zone descriptor.
|
* Zone descriptor.
|
||||||
|
@ -255,5 +256,6 @@ void dmz_schedule_reclaim(struct dmz_reclaim *zrc);
|
||||||
* Functions defined in dm-zoned-target.c
|
* Functions defined in dm-zoned-target.c
|
||||||
*/
|
*/
|
||||||
bool dmz_bdev_is_dying(struct dmz_dev *dmz_dev);
|
bool dmz_bdev_is_dying(struct dmz_dev *dmz_dev);
|
||||||
|
bool dmz_check_bdev(struct dmz_dev *dmz_dev);
|
||||||
|
|
||||||
#endif /* DM_ZONED_H */
|
#endif /* DM_ZONED_H */
|
||||||
|
|
Loading…
Reference in New Issue