diff --git a/Documentation/device-mapper/cache.txt b/Documentation/device-mapper/cache.txt index 33d45ee0b737..ff6639f72536 100644 --- a/Documentation/device-mapper/cache.txt +++ b/Documentation/device-mapper/cache.txt @@ -68,10 +68,11 @@ So large block sizes are bad because they waste cache space. And small block sizes are bad because they increase the amount of metadata (both in core and on disk). -Writeback/writethrough ----------------------- +Cache operating modes +--------------------- -The cache has two modes, writeback and writethrough. +The cache has three operating modes: writeback, writethrough and +passthrough. If writeback, the default, is selected then a write to a block that is cached will go only to the cache and the block will be marked dirty in @@ -81,6 +82,18 @@ If writethrough is selected then a write to a cached block will not complete until it has hit both the origin and cache devices. Clean blocks should remain clean. +If passthrough is selected, useful when the cache contents are not known +to be coherent with the origin device, then all reads are served from +the origin device (all reads miss the cache) and all writes are +forwarded to the origin device; additionally, write hits cause cache +block invalidates. Passthrough mode allows a cache device to be +activated without having to worry about coherency. Coherency that +exists is maintained, although the cache will gradually cool as writes +take place. If the coherency of the cache can later be verified, or +established, the cache device can can be transitioned to writethrough or +writeback mode while still warm. Otherwise, the cache contents can be +discarded prior to transitioning to the desired operating mode. + A simple cleaner policy is provided, which will clean (write back) all dirty blocks in a cache. Useful for decommissioning a cache. diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index 062b83ed3e84..8601425436cd 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -1249,3 +1249,8 @@ int dm_cache_save_hint(struct dm_cache_metadata *cmd, dm_cblock_t cblock, return r; } + +int dm_cache_metadata_all_clean(struct dm_cache_metadata *cmd, bool *result) +{ + return blocks_are_unmapped_or_clean(cmd, 0, cmd->cache_blocks, result); +} diff --git a/drivers/md/dm-cache-metadata.h b/drivers/md/dm-cache-metadata.h index f45cef21f3d0..cd906f14f98d 100644 --- a/drivers/md/dm-cache-metadata.h +++ b/drivers/md/dm-cache-metadata.h @@ -137,6 +137,11 @@ int dm_cache_begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy * int dm_cache_save_hint(struct dm_cache_metadata *cmd, dm_cblock_t cblock, uint32_t hint); +/* + * Query method. Are all the blocks in the cache clean? + */ +int dm_cache_metadata_all_clean(struct dm_cache_metadata *cmd, bool *result); + /*----------------------------------------------------------------*/ #endif /* DM_CACHE_METADATA_H */ diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 183dfc9db297..8c0217753cc5 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -104,14 +104,37 @@ static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio) /* * FIXME: the cache is read/write for the time being. */ -enum cache_mode { +enum cache_metadata_mode { CM_WRITE, /* metadata may be changed */ CM_READ_ONLY, /* metadata may not be changed */ }; +enum cache_io_mode { + /* + * Data is written to cached blocks only. These blocks are marked + * dirty. If you lose the cache device you will lose data. + * Potential performance increase for both reads and writes. + */ + CM_IO_WRITEBACK, + + /* + * Data is written to both cache and origin. Blocks are never + * dirty. Potential performance benfit for reads only. + */ + CM_IO_WRITETHROUGH, + + /* + * A degraded mode useful for various cache coherency situations + * (eg, rolling back snapshots). Reads and writes always go to the + * origin. If a write goes to a cached oblock, then the cache + * block is invalidated. + */ + CM_IO_PASSTHROUGH +}; + struct cache_features { - enum cache_mode mode; - bool write_through:1; + enum cache_metadata_mode mode; + enum cache_io_mode io_mode; }; struct cache_stats { @@ -565,9 +588,24 @@ static void save_stats(struct cache *cache) #define PB_DATA_SIZE_WB (offsetof(struct per_bio_data, cache)) #define PB_DATA_SIZE_WT (sizeof(struct per_bio_data)) +static bool writethrough_mode(struct cache_features *f) +{ + return f->io_mode == CM_IO_WRITETHROUGH; +} + +static bool writeback_mode(struct cache_features *f) +{ + return f->io_mode == CM_IO_WRITEBACK; +} + +static bool passthrough_mode(struct cache_features *f) +{ + return f->io_mode == CM_IO_PASSTHROUGH; +} + static size_t get_per_bio_data_size(struct cache *cache) { - return cache->features.write_through ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB; + return writethrough_mode(&cache->features) ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB; } static struct per_bio_data *get_per_bio_data(struct bio *bio, size_t data_size) @@ -1135,6 +1173,32 @@ static void demote_then_promote(struct cache *cache, struct prealloc *structs, quiesce_migration(mg); } +/* + * Invalidate a cache entry. No writeback occurs; any changes in the cache + * block are thrown away. + */ +static void invalidate(struct cache *cache, struct prealloc *structs, + dm_oblock_t oblock, dm_cblock_t cblock, + struct dm_bio_prison_cell *cell) +{ + struct dm_cache_migration *mg = prealloc_get_migration(structs); + + mg->err = false; + mg->writeback = false; + mg->demote = true; + mg->promote = false; + mg->requeue_holder = true; + mg->cache = cache; + mg->old_oblock = oblock; + mg->cblock = cblock; + mg->old_ocell = cell; + mg->new_ocell = NULL; + mg->start_jiffies = jiffies; + + inc_nr_migrations(cache); + quiesce_migration(mg); +} + /*---------------------------------------------------------------- * bio processing *--------------------------------------------------------------*/ @@ -1197,13 +1261,6 @@ static bool spare_migration_bandwidth(struct cache *cache) return current_volume < cache->migration_threshold; } -static bool is_writethrough_io(struct cache *cache, struct bio *bio, - dm_cblock_t cblock) -{ - return bio_data_dir(bio) == WRITE && - cache->features.write_through && !is_dirty(cache, cblock); -} - static void inc_hit_counter(struct cache *cache, struct bio *bio) { atomic_inc(bio_data_dir(bio) == READ ? @@ -1216,6 +1273,15 @@ static void inc_miss_counter(struct cache *cache, struct bio *bio) &cache->stats.read_miss : &cache->stats.write_miss); } +static void issue_cache_bio(struct cache *cache, struct bio *bio, + struct per_bio_data *pb, + dm_oblock_t oblock, dm_cblock_t cblock) +{ + pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); + remap_to_cache_dirty(cache, bio, oblock, cblock); + issue(cache, bio); +} + static void process_bio(struct cache *cache, struct prealloc *structs, struct bio *bio) { @@ -1227,7 +1293,8 @@ static void process_bio(struct cache *cache, struct prealloc *structs, size_t pb_data_size = get_per_bio_data_size(cache); struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); bool discarded_block = is_discarded_oblock(cache, block); - bool can_migrate = discarded_block || spare_migration_bandwidth(cache); + bool passthrough = passthrough_mode(&cache->features); + bool can_migrate = !passthrough && (discarded_block || spare_migration_bandwidth(cache)); /* * Check to see if that block is currently migrating. @@ -1248,15 +1315,39 @@ static void process_bio(struct cache *cache, struct prealloc *structs, switch (lookup_result.op) { case POLICY_HIT: - inc_hit_counter(cache, bio); - pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); + if (passthrough) { + inc_miss_counter(cache, bio); - if (is_writethrough_io(cache, bio, lookup_result.cblock)) - remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock); - else - remap_to_cache_dirty(cache, bio, block, lookup_result.cblock); + /* + * Passthrough always maps to the origin, + * invalidating any cache blocks that are written + * to. + */ + + if (bio_data_dir(bio) == WRITE) { + atomic_inc(&cache->stats.demotion); + invalidate(cache, structs, block, lookup_result.cblock, new_ocell); + release_cell = false; + + } else { + /* FIXME: factor out issue_origin() */ + pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); + remap_to_origin_clear_discard(cache, bio, block); + issue(cache, bio); + } + } else { + inc_hit_counter(cache, bio); + + if (bio_data_dir(bio) == WRITE && + writethrough_mode(&cache->features) && + !is_dirty(cache, lookup_result.cblock)) { + pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); + remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock); + issue(cache, bio); + } else + issue_cache_bio(cache, bio, pb, block, lookup_result.cblock); + } - issue(cache, bio); break; case POLICY_MISS: @@ -1807,7 +1898,7 @@ static int parse_block_size(struct cache_args *ca, struct dm_arg_set *as, static void init_features(struct cache_features *cf) { cf->mode = CM_WRITE; - cf->write_through = false; + cf->io_mode = CM_IO_WRITEBACK; } static int parse_features(struct cache_args *ca, struct dm_arg_set *as, @@ -1832,10 +1923,13 @@ static int parse_features(struct cache_args *ca, struct dm_arg_set *as, arg = dm_shift_arg(as); if (!strcasecmp(arg, "writeback")) - cf->write_through = false; + cf->io_mode = CM_IO_WRITEBACK; else if (!strcasecmp(arg, "writethrough")) - cf->write_through = true; + cf->io_mode = CM_IO_WRITETHROUGH; + + else if (!strcasecmp(arg, "passthrough")) + cf->io_mode = CM_IO_PASSTHROUGH; else { *error = "Unrecognised cache feature requested"; @@ -2088,6 +2182,22 @@ static int cache_create(struct cache_args *ca, struct cache **result) } cache->cmd = cmd; + if (passthrough_mode(&cache->features)) { + bool all_clean; + + r = dm_cache_metadata_all_clean(cache->cmd, &all_clean); + if (r) { + *error = "dm_cache_metadata_all_clean() failed"; + goto bad; + } + + if (!all_clean) { + *error = "Cannot enter passthrough mode unless all blocks are clean"; + r = -EINVAL; + goto bad; + } + } + spin_lock_init(&cache->lock); bio_list_init(&cache->deferred_bios); bio_list_init(&cache->deferred_flush_bios); @@ -2303,17 +2413,37 @@ static int cache_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_SUBMITTED; } + r = DM_MAPIO_REMAPPED; switch (lookup_result.op) { case POLICY_HIT: - inc_hit_counter(cache, bio); - pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); + if (passthrough_mode(&cache->features)) { + if (bio_data_dir(bio) == WRITE) { + /* + * We need to invalidate this block, so + * defer for the worker thread. + */ + cell_defer(cache, cell, true); + r = DM_MAPIO_SUBMITTED; - if (is_writethrough_io(cache, bio, lookup_result.cblock)) - remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock); - else - remap_to_cache_dirty(cache, bio, block, lookup_result.cblock); + } else { + pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); + inc_miss_counter(cache, bio); + remap_to_origin_clear_discard(cache, bio, block); - cell_defer(cache, cell, false); + cell_defer(cache, cell, false); + } + + } else { + inc_hit_counter(cache, bio); + + if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) && + !is_dirty(cache, lookup_result.cblock)) + remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock); + else + remap_to_cache_dirty(cache, bio, block, lookup_result.cblock); + + cell_defer(cache, cell, false); + } break; case POLICY_MISS: @@ -2338,10 +2468,10 @@ static int cache_map(struct dm_target *ti, struct bio *bio) DMERR_LIMIT("%s: erroring bio: unknown policy op: %u", __func__, (unsigned) lookup_result.op); bio_io_error(bio); - return DM_MAPIO_SUBMITTED; + r = DM_MAPIO_SUBMITTED; } - return DM_MAPIO_REMAPPED; + return r; } static int cache_end_io(struct dm_target *ti, struct bio *bio, int error) @@ -2659,10 +2789,19 @@ static void cache_status(struct dm_target *ti, status_type_t type, (unsigned long long) from_cblock(residency), cache->nr_dirty); - if (cache->features.write_through) + if (writethrough_mode(&cache->features)) DMEMIT("1 writethrough "); - else - DMEMIT("0 "); + + else if (passthrough_mode(&cache->features)) + DMEMIT("1 passthrough "); + + else if (writeback_mode(&cache->features)) + DMEMIT("1 writeback "); + + else { + DMERR("internal error: unknown io mode: %d", (int) cache->features.io_mode); + goto err; + } DMEMIT("2 migration_threshold %llu ", (unsigned long long) cache->migration_threshold); if (sz < maxlen) { @@ -2771,7 +2910,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type cache_target = { .name = "cache", - .version = {1, 1, 1}, + .version = {1, 2, 0}, .module = THIS_MODULE, .ctr = cache_ctr, .dtr = cache_dtr,