diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 188121ca00aa..703bbbc4f16f 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -520,10 +520,12 @@ struct pool { struct bio_list deferred_bios; struct bio_list deferred_flush_bios; struct list_head prepared_mappings; + struct list_head prepared_discards; struct bio_list retry_on_resume_list; struct deferred_set shared_read_ds; + struct deferred_set all_io_ds; struct new_mapping *next_mapping; mempool_t *mapping_pool; @@ -621,6 +623,7 @@ static struct pool *__pool_table_lookup_metadata_dev(struct block_device *md_dev struct endio_hook { struct thin_c *tc; struct deferred_entry *shared_read_entry; + struct deferred_entry *all_io_entry; struct new_mapping *overwrite_mapping; }; @@ -728,11 +731,12 @@ struct new_mapping { unsigned quiesced:1; unsigned prepared:1; + unsigned pass_discard:1; struct thin_c *tc; dm_block_t virt_block; dm_block_t data_block; - struct cell *cell; + struct cell *cell, *cell2; int err; /* @@ -872,7 +876,30 @@ static void process_prepared_mapping(struct new_mapping *m) mempool_free(m, tc->pool->mapping_pool); } -static void process_prepared_mappings(struct pool *pool) +static void process_prepared_discard(struct new_mapping *m) +{ + int r; + struct thin_c *tc = m->tc; + + r = dm_thin_remove_block(tc->td, m->virt_block); + if (r) + DMERR("dm_thin_remove_block() failed"); + + /* + * Pass the discard down to the underlying device? + */ + if (m->pass_discard) + remap_and_issue(tc, m->bio, m->data_block); + else + bio_endio(m->bio, 0); + + cell_defer_except(tc, m->cell); + cell_defer_except(tc, m->cell2); + mempool_free(m, tc->pool->mapping_pool); +} + +static void process_prepared(struct pool *pool, struct list_head *head, + void (*fn)(struct new_mapping *)) { unsigned long flags; struct list_head maps; @@ -880,21 +907,27 @@ static void process_prepared_mappings(struct pool *pool) INIT_LIST_HEAD(&maps); spin_lock_irqsave(&pool->lock, flags); - list_splice_init(&pool->prepared_mappings, &maps); + list_splice_init(head, &maps); spin_unlock_irqrestore(&pool->lock, flags); list_for_each_entry_safe(m, tmp, &maps, list) - process_prepared_mapping(m); + fn(m); } /* * Deferred bio jobs. */ +static int io_overlaps_block(struct pool *pool, struct bio *bio) +{ + return !(bio->bi_sector & pool->offset_mask) && + (bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT)); + +} + static int io_overwrites_block(struct pool *pool, struct bio *bio) { - return ((bio_data_dir(bio) == WRITE) && - !(bio->bi_sector & pool->offset_mask)) && - (bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT)); + return (bio_data_dir(bio) == WRITE) && + io_overlaps_block(pool, bio); } static void save_and_set_endio(struct bio *bio, bio_end_io_t **save, @@ -1134,6 +1167,86 @@ static void no_space(struct cell *cell) retry_on_resume(bio); } +static void process_discard(struct thin_c *tc, struct bio *bio) +{ + int r; + struct pool *pool = tc->pool; + struct cell *cell, *cell2; + struct cell_key key, key2; + dm_block_t block = get_bio_block(tc, bio); + struct dm_thin_lookup_result lookup_result; + struct new_mapping *m; + + build_virtual_key(tc->td, block, &key); + if (bio_detain(tc->pool->prison, &key, bio, &cell)) + return; + + r = dm_thin_find_block(tc->td, block, 1, &lookup_result); + switch (r) { + case 0: + /* + * Check nobody is fiddling with this pool block. This can + * happen if someone's in the process of breaking sharing + * on this block. + */ + build_data_key(tc->td, lookup_result.block, &key2); + if (bio_detain(tc->pool->prison, &key2, bio, &cell2)) { + cell_release_singleton(cell, bio); + break; + } + + if (io_overlaps_block(pool, bio)) { + /* + * IO may still be going to the destination block. We must + * quiesce before we can do the removal. + */ + m = get_next_mapping(pool); + m->tc = tc; + m->pass_discard = !lookup_result.shared; + m->virt_block = block; + m->data_block = lookup_result.block; + m->cell = cell; + m->cell2 = cell2; + m->err = 0; + m->bio = bio; + + if (!ds_add_work(&pool->all_io_ds, &m->list)) { + list_add(&m->list, &pool->prepared_discards); + wake_worker(pool); + } + } else { + /* + * This path is hit if people are ignoring + * limits->discard_granularity. It ignores any + * part of the discard that is in a subsequent + * block. + */ + sector_t offset = bio->bi_sector - (block << pool->block_shift); + unsigned remaining = (pool->sectors_per_block - offset) << 9; + bio->bi_size = min(bio->bi_size, remaining); + + cell_release_singleton(cell, bio); + cell_release_singleton(cell2, bio); + remap_and_issue(tc, bio, lookup_result.block); + } + break; + + case -ENODATA: + /* + * It isn't provisioned, just forget it. + */ + cell_release_singleton(cell, bio); + bio_endio(bio, 0); + break; + + default: + DMERR("discard: find block unexpectedly returned %d", r); + cell_release_singleton(cell, bio); + bio_io_error(bio); + break; + } +} + static void break_sharing(struct thin_c *tc, struct bio *bio, dm_block_t block, struct cell_key *key, struct dm_thin_lookup_result *lookup_result, @@ -1279,6 +1392,7 @@ static void process_bio(struct thin_c *tc, struct bio *bio) default: DMERR("dm_thin_find_block() failed, error = %d", r); + cell_release_singleton(cell, bio); bio_io_error(bio); break; } @@ -1320,7 +1434,11 @@ static void process_deferred_bios(struct pool *pool) break; } - process_bio(tc, bio); + + if (bio->bi_rw & REQ_DISCARD) + process_discard(tc, bio); + else + process_bio(tc, bio); } /* @@ -1354,7 +1472,8 @@ static void do_worker(struct work_struct *ws) { struct pool *pool = container_of(ws, struct pool, worker); - process_prepared_mappings(pool); + process_prepared(pool, &pool->prepared_mappings, process_prepared_mapping); + process_prepared(pool, &pool->prepared_discards, process_prepared_discard); process_deferred_bios(pool); } @@ -1397,6 +1516,7 @@ static struct endio_hook *thin_hook_bio(struct thin_c *tc, struct bio *bio) h->tc = tc; h->shared_read_entry = NULL; + h->all_io_entry = bio->bi_rw & REQ_DISCARD ? NULL : ds_inc(&pool->all_io_ds); h->overwrite_mapping = NULL; return h; @@ -1415,7 +1535,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio, struct dm_thin_lookup_result result; map_context->ptr = thin_hook_bio(tc, bio); - if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) { + if (bio->bi_rw & (REQ_DISCARD | REQ_FLUSH | REQ_FUA)) { thin_defer_bio(tc, bio); return DM_MAPIO_SUBMITTED; } @@ -1591,10 +1711,12 @@ static struct pool *pool_create(struct mapped_device *pool_md, bio_list_init(&pool->deferred_bios); bio_list_init(&pool->deferred_flush_bios); INIT_LIST_HEAD(&pool->prepared_mappings); + INIT_LIST_HEAD(&pool->prepared_discards); pool->low_water_triggered = 0; pool->no_free_space = 0; bio_list_init(&pool->retry_on_resume_list); ds_init(&pool->shared_read_ds); + ds_init(&pool->all_io_ds); pool->next_mapping = NULL; pool->mapping_pool = @@ -1834,7 +1956,8 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) pt->low_water_blocks = low_water_blocks; pt->zero_new_blocks = pf.zero_new_blocks; ti->num_flush_requests = 1; - ti->num_discard_requests = 0; + ti->num_discard_requests = 1; + ti->discards_supported = 1; ti->private = pt; pt->callbacks.congested_fn = pool_is_congested; @@ -2227,6 +2350,17 @@ static int pool_merge(struct dm_target *ti, struct bvec_merge_data *bvm, return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); } +static void set_discard_limits(struct pool *pool, struct queue_limits *limits) +{ + limits->max_discard_sectors = pool->sectors_per_block; + + /* + * This is just a hint, and not enforced. We have to cope with + * bios that overlap 2 blocks. + */ + limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT; +} + static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits) { struct pool_c *pt = ti->private; @@ -2234,6 +2368,7 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits) blk_limits_io_min(limits, 0); blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT); + set_discard_limits(pool, limits); } static struct target_type pool_target = { @@ -2350,8 +2485,8 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) ti->split_io = tc->pool->sectors_per_block; ti->num_flush_requests = 1; - ti->num_discard_requests = 0; - ti->discards_supported = 0; + ti->num_discard_requests = 1; + ti->discards_supported = 1; dm_put(pool_md); @@ -2407,6 +2542,13 @@ static int thin_endio(struct dm_target *ti, spin_unlock_irqrestore(&pool->lock, flags); } + if (h->all_io_entry) { + INIT_LIST_HEAD(&work); + ds_dec(h->all_io_entry, &work); + list_for_each_entry_safe(m, tmp, &work, list) + list_add(&m->list, &pool->prepared_discards); + } + mempool_free(h, pool->endio_hook_pool); return 0; @@ -2487,9 +2629,11 @@ static int thin_iterate_devices(struct dm_target *ti, static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits) { struct thin_c *tc = ti->private; + struct pool *pool = tc->pool; blk_limits_io_min(limits, 0); - blk_limits_io_opt(limits, tc->pool->sectors_per_block << SECTOR_SHIFT); + blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT); + set_discard_limits(pool, limits); } static struct target_type thin_target = {