diff --git a/Documentation/admin-guide/device-mapper/index.rst b/Documentation/admin-guide/device-mapper/index.rst index ec62fcc8eece..6cf8adc86fa8 100644 --- a/Documentation/admin-guide/device-mapper/index.rst +++ b/Documentation/admin-guide/device-mapper/index.rst @@ -11,6 +11,7 @@ Device Mapper dm-clone dm-crypt dm-dust + dm-ebs dm-flakey dm-init dm-integrity diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index ac83f5002ce5..489935d5f22d 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1471,7 +1471,7 @@ static void retrieve_deps(struct dm_table *table, /* * Check we have enough space. */ - needed = sizeof(*deps) + (sizeof(*deps->dev) * count); + needed = struct_size(deps, dev, count); if (len < needed) { param->flags |= DM_BUFFER_FULL_FLAG; return; diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index 74f3c506f084..30505d70f423 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -282,6 +282,8 @@ static int persistent_memory_claim(struct dm_writecache *wc) while (daa-- && i < p) { pages[i++] = pfn_t_to_page(pfn); pfn.val++; + if (!(i & 15)) + cond_resched(); } } while (i < p); wc->memory_map = vmap(pages, p, VM_MAP, PAGE_KERNEL); @@ -849,10 +851,14 @@ static void writecache_discard(struct dm_writecache *wc, sector_t start, sector_ if (likely(!e->write_in_progress)) { if (!discarded_something) { - writecache_wait_for_ios(wc, READ); - writecache_wait_for_ios(wc, WRITE); + if (!WC_MODE_PMEM(wc)) { + writecache_wait_for_ios(wc, READ); + writecache_wait_for_ios(wc, WRITE); + } discarded_something = true; } + if (!writecache_entry_is_committed(wc, e)) + wc->uncommitted_blocks--; writecache_free_entry(wc, e); } diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 130b5a6d9f12..5cf6f5f552e0 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -1078,7 +1078,8 @@ static int dmz_check_sb(struct dmz_metadata *zmd, struct dmz_sb *dsb, nr_meta_zones = (le32_to_cpu(sb->nr_meta_blocks) + zmd->zone_nr_blocks - 1) >> zmd->zone_nr_blocks_shift; if (!nr_meta_zones || - nr_meta_zones >= zmd->nr_rnd_zones) { + (zmd->nr_devs <= 1 && nr_meta_zones >= zmd->nr_rnd_zones) || + (zmd->nr_devs > 1 && nr_meta_zones >= zmd->nr_cache_zones)) { dmz_dev_err(dev, "Invalid number of metadata blocks"); return -ENXIO; } @@ -1949,7 +1950,7 @@ static struct dm_zone *dmz_get_rnd_zone_for_reclaim(struct dmz_metadata *zmd, unsigned int idx, bool idle) { struct dm_zone *dzone = NULL; - struct dm_zone *zone, *last = NULL; + struct dm_zone *zone, *maxw_z = NULL; struct list_head *zone_list; /* If we have cache zones select from the cache zone list */ @@ -1961,18 +1962,37 @@ static struct dm_zone *dmz_get_rnd_zone_for_reclaim(struct dmz_metadata *zmd, } else zone_list = &zmd->dev[idx].map_rnd_list; + /* + * Find the buffer zone with the heaviest weight or the first (oldest) + * data zone that can be reclaimed. + */ list_for_each_entry(zone, zone_list, link) { if (dmz_is_buf(zone)) { dzone = zone->bzone; - if (dzone->dev->dev_idx != idx) + if (dmz_is_rnd(dzone) && dzone->dev->dev_idx != idx) continue; - if (!last) { - last = dzone; + if (!maxw_z || maxw_z->weight < dzone->weight) + maxw_z = dzone; + } else { + dzone = zone; + if (dmz_lock_zone_reclaim(dzone)) + return dzone; + } + } + + if (maxw_z && dmz_lock_zone_reclaim(maxw_z)) + return maxw_z; + + /* + * If we come here, none of the zones inspected could be locked for + * reclaim. Try again, being more aggressive, that is, find the + * first zone that can be reclaimed regardless of its weitght. + */ + list_for_each_entry(zone, zone_list, link) { + if (dmz_is_buf(zone)) { + dzone = zone->bzone; + if (dmz_is_rnd(dzone) && dzone->dev->dev_idx != idx) continue; - } - if (last->weight < dzone->weight) - continue; - dzone = last; } else dzone = zone; if (dmz_lock_zone_reclaim(dzone)) @@ -2006,7 +2026,7 @@ static struct dm_zone *dmz_get_seq_zone_for_reclaim(struct dmz_metadata *zmd, struct dm_zone *dmz_get_zone_for_reclaim(struct dmz_metadata *zmd, unsigned int dev_idx, bool idle) { - struct dm_zone *zone; + struct dm_zone *zone = NULL; /* * Search for a zone candidate to reclaim: 2 cases are possible. @@ -2019,7 +2039,7 @@ struct dm_zone *dmz_get_zone_for_reclaim(struct dmz_metadata *zmd, dmz_lock_map(zmd); if (list_empty(&zmd->reserved_seq_zones_list)) zone = dmz_get_seq_zone_for_reclaim(zmd, dev_idx); - else + if (!zone) zone = dmz_get_rnd_zone_for_reclaim(zmd, dev_idx, idle); dmz_unlock_map(zmd); diff --git a/drivers/md/dm-zoned-reclaim.c b/drivers/md/dm-zoned-reclaim.c index 2261b4dd60b7..dd1eebf6e50f 100644 --- a/drivers/md/dm-zoned-reclaim.c +++ b/drivers/md/dm-zoned-reclaim.c @@ -377,6 +377,7 @@ static int dmz_do_reclaim(struct dmz_reclaim *zrc) dmz_metadata_label(zmd), zrc->dev_idx); return -EBUSY; } + rzone = dzone; start = jiffies; if (dmz_is_cache(dzone) || dmz_is_rnd(dzone)) { @@ -391,8 +392,6 @@ static int dmz_do_reclaim(struct dmz_reclaim *zrc) */ ret = dmz_reclaim_rnd_data(zrc, dzone); } - rzone = dzone; - } else { struct dm_zone *bzone = dzone->bzone; sector_t chunk_block = 0; @@ -415,7 +414,6 @@ static int dmz_do_reclaim(struct dmz_reclaim *zrc) * be later reclaimed. */ ret = dmz_reclaim_seq_data(zrc, dzone); - rzone = dzone; } } out: diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index a907a9446c0b..cf915009c306 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -890,7 +890,7 @@ static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv) } /* Set target (no write same support) */ - ti->max_io_len = dmz_zone_nr_sectors(dmz->metadata) << 9; + ti->max_io_len = dmz_zone_nr_sectors(dmz->metadata); ti->num_flush_bios = 1; ti->num_discard_bios = 1; ti->num_write_zeroes_bios = 1; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 109e81f33edb..e6807792fec8 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1009,6 +1009,7 @@ static void clone_endio(struct bio *bio) struct dm_io *io = tio->io; struct mapped_device *md = tio->io->md; dm_endio_fn endio = tio->ti->type->end_io; + struct bio *orig_bio = io->orig_bio; if (unlikely(error == BLK_STS_TARGET) && md->type != DM_TYPE_NVME_BIO_BASED) { if (bio_op(bio) == REQ_OP_DISCARD && @@ -1022,6 +1023,18 @@ static void clone_endio(struct bio *bio) disable_write_zeroes(md); } + /* + * For zone-append bios get offset in zone of the written + * sector and add that to the original bio sector pos. + */ + if (bio_op(orig_bio) == REQ_OP_ZONE_APPEND) { + sector_t written_sector = bio->bi_iter.bi_sector; + struct request_queue *q = orig_bio->bi_disk->queue; + u64 mask = (u64)blk_queue_zone_sectors(q) - 1; + + orig_bio->bi_iter.bi_sector += written_sector & mask; + } + if (endio) { int r = endio(tio->ti, bio, &error); switch (r) {