From 593060d756e0c2382d59cf00d5f3b9e3a336c408 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 25 Mar 2008 16:50:33 -0400 Subject: [PATCH] Btrfs: Implement raid0 when multiple devices are present Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 3 + fs/btrfs/disk-io.c | 2 + fs/btrfs/extent-tree.c | 102 +++++++++++++++++++++----------- fs/btrfs/inode.c | 2 - fs/btrfs/volumes.c | 130 +++++++++++++++++++++++++++++++---------- 5 files changed, 173 insertions(+), 66 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 7556f8319c60..f43dfe7ffe6d 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -413,6 +413,7 @@ struct btrfs_csum_item { #define BTRFS_BLOCK_GROUP_DATA (1 << 0) #define BTRFS_BLOCK_GROUP_SYSTEM (1 << 1) #define BTRFS_BLOCK_GROUP_METADATA (1 << 2) +#define BTRFS_BLOCK_GROUP_RAID0 (1 << 3) struct btrfs_block_group_item { @@ -498,6 +499,8 @@ struct btrfs_fs_info { u64 delalloc_bytes; u64 last_alloc; u64 last_data_alloc; + int extra_data_alloc_bits; + int extra_alloc_bits; }; /* diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f971a29e4f20..d8474796dea2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -736,6 +736,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->total_pinned = 0; fs_info->last_alloc = 0; fs_info->last_data_alloc = 0; + fs_info->extra_alloc_bits = 0; + fs_info->extra_data_alloc_bits = 0; #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e9ef644ff56f..c226656f29b7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -172,7 +172,7 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) { - return (cache->flags & bits); + return (cache->flags & bits) == bits; } static int noinline find_search_start(struct btrfs_root *root, @@ -1010,6 +1010,35 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, } +static int update_space_info(struct btrfs_fs_info *info, u64 flags, + u64 total_bytes, u64 bytes_used, + struct btrfs_space_info **space_info) +{ + struct btrfs_space_info *found; + + found = __find_space_info(info, flags); + if (found) { + found->total_bytes += total_bytes; + found->bytes_used += bytes_used; + WARN_ON(found->total_bytes < found->bytes_used); + *space_info = found; + return 0; + } + found = kmalloc(sizeof(*found), GFP_NOFS); + if (!found) + return -ENOMEM; + + list_add(&found->list, &info->space_info); + found->flags = flags; + found->total_bytes = total_bytes; + found->bytes_used = bytes_used; + found->bytes_pinned = 0; + found->full = 0; + *space_info = found; + return 0; +} + + static int do_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 alloc_bytes, u64 flags) @@ -1021,6 +1050,11 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, int ret; space_info = __find_space_info(extent_root->fs_info, flags); + if (!space_info) { + ret = update_space_info(extent_root->fs_info, flags, + 0, 0, &space_info); + BUG_ON(ret); + } BUG_ON(!space_info); if (space_info->full) @@ -1044,6 +1078,17 @@ printk("space info full %Lu\n", flags); extent_root->fs_info->chunk_root->root_key.objectid, start, num_bytes); BUG_ON(ret); + + if (flags & BTRFS_BLOCK_GROUP_RAID0) { + if (flags & BTRFS_BLOCK_GROUP_DATA) { + extent_root->fs_info->extra_data_alloc_bits = + BTRFS_BLOCK_GROUP_RAID0; + } + if (flags & BTRFS_BLOCK_GROUP_METADATA) { + extent_root->fs_info->extra_alloc_bits = + BTRFS_BLOCK_GROUP_RAID0; + } + } return 0; } @@ -1655,24 +1700,31 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_extent_ref *ref; struct btrfs_path *path; struct btrfs_key keys[2]; + int extra_chunk_alloc_bits = 0; if (data) { - data = BTRFS_BLOCK_GROUP_DATA; + data = BTRFS_BLOCK_GROUP_DATA | info->extra_data_alloc_bits; } else if (root == root->fs_info->chunk_root) { data = BTRFS_BLOCK_GROUP_SYSTEM; } else { - data = BTRFS_BLOCK_GROUP_METADATA; + data = BTRFS_BLOCK_GROUP_METADATA | info->extra_alloc_bits; } + if (btrfs_super_num_devices(&info->super_copy) > 1 && + !(data & BTRFS_BLOCK_GROUP_SYSTEM)) + extra_chunk_alloc_bits = BTRFS_BLOCK_GROUP_RAID0; if (root->ref_cows) { - if (data != BTRFS_BLOCK_GROUP_METADATA) { + if (!(data & BTRFS_BLOCK_GROUP_METADATA)) { ret = do_chunk_alloc(trans, root->fs_info->extent_root, 2 * 1024 * 1024, - BTRFS_BLOCK_GROUP_METADATA); + BTRFS_BLOCK_GROUP_METADATA | + info->extra_alloc_bits | + extra_chunk_alloc_bits); BUG_ON(ret); } ret = do_chunk_alloc(trans, root->fs_info->extent_root, - num_bytes + 2 * 1024 * 1024, data); + num_bytes + 2 * 1024 * 1024, data | + extra_chunk_alloc_bits); BUG_ON(ret); } @@ -2627,34 +2679,6 @@ error: return ret; } -static int update_space_info(struct btrfs_fs_info *info, u64 flags, - u64 total_bytes, u64 bytes_used, - struct btrfs_space_info **space_info) -{ - struct btrfs_space_info *found; - - found = __find_space_info(info, flags); - if (found) { - found->total_bytes += total_bytes; - found->bytes_used += bytes_used; - WARN_ON(found->total_bytes < found->bytes_used); - *space_info = found; - return 0; - } - found = kmalloc(sizeof(*found), GFP_NOFS); - if (!found) - return -ENOMEM; - - list_add(&found->list, &info->space_info); - found->flags = flags; - found->total_bytes = total_bytes; - found->bytes_used = bytes_used; - found->bytes_pinned = 0; - found->full = 0; - *space_info = found; - return 0; -} - int btrfs_read_block_groups(struct btrfs_root *root) { struct btrfs_path *path; @@ -2712,6 +2736,16 @@ int btrfs_read_block_groups(struct btrfs_root *root) } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) { bit = BLOCK_GROUP_METADATA; } + if (cache->flags & BTRFS_BLOCK_GROUP_RAID0) { + if (cache->flags & BTRFS_BLOCK_GROUP_DATA) { + info->extra_data_alloc_bits = + BTRFS_BLOCK_GROUP_RAID0; + } + if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) { + info->extra_alloc_bits = + BTRFS_BLOCK_GROUP_RAID0; + } + } ret = update_space_info(info, cache->flags, found_key.offset, btrfs_block_group_used(&cache->item), diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5140d6801846..db60d85598ce 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -317,8 +317,6 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, map_length = length; ret = btrfs_map_block(map_tree, logical, &physical, &map_length, &dev); if (map_length < length + size) { - printk("merge bio hook logical %Lu bio len %Lu physical %Lu " - "len %Lu\n", logical, length, physical, map_length); return 1; } return 0; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 263f01cc3db4..d8fce32a3bbc 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "ctree.h" #include "extent_map.h" #include "disk-io.h" @@ -25,10 +26,24 @@ #include "print-tree.h" #include "volumes.h" -struct map_lookup { +struct stripe { struct btrfs_device *dev; u64 physical; }; + +struct map_lookup { + u64 type; + int io_align; + int io_width; + int stripe_len; + int sector_size; + int num_stripes; + struct stripe stripes[]; +}; + +#define map_lookup_size(n) (sizeof(struct map_lookup) + \ + (sizeof(struct stripe) * (n))) + static DEFINE_MUTEX(uuid_mutex); static LIST_HEAD(fs_uuids); @@ -592,6 +607,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 *num_bytes, u64 type) { u64 dev_offset; + struct btrfs_fs_info *info = extent_root->fs_info; struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root; struct btrfs_stripe *stripes; struct btrfs_device *device = NULL; @@ -610,10 +626,18 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, int looped = 0; int ret; int index; + int stripe_len = 64 * 1024; struct btrfs_key key; if (list_empty(dev_list)) return -ENOSPC; + + if (type & BTRFS_BLOCK_GROUP_RAID0) + num_stripes = btrfs_super_num_devices(&info->super_copy); + if (type & BTRFS_BLOCK_GROUP_DATA) + stripe_len = 64 * 1024; + if (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)) + stripe_len = 32 * 1024; again: INIT_LIST_HEAD(&private_devs); cur = dev_list->next; @@ -650,9 +674,15 @@ again: if (!chunk) return -ENOMEM; + map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); + if (!map) { + kfree(chunk); + return -ENOMEM; + } + stripes = &chunk->stripe; - *num_bytes = calc_size; + *num_bytes = calc_size * num_stripes; index = 0; while(index < num_stripes) { BUG_ON(list_empty(&private_devs)); @@ -669,6 +699,8 @@ printk("alloc chunk size %Lu from dev %Lu\n", calc_size, device->devid); ret = btrfs_update_device(trans, device); BUG_ON(ret); + map->stripes[index].dev = device; + map->stripes[index].physical = dev_offset; btrfs_set_stack_stripe_devid(stripes + index, device->devid); btrfs_set_stack_stripe_offset(stripes + index, dev_offset); physical = dev_offset; @@ -680,12 +712,18 @@ printk("alloc chunk size %Lu from dev %Lu\n", calc_size, device->devid); key.offset = *num_bytes; key.type = BTRFS_CHUNK_ITEM_KEY; btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid); - btrfs_set_stack_chunk_stripe_len(chunk, 64 * 1024); + btrfs_set_stack_chunk_stripe_len(chunk, stripe_len); btrfs_set_stack_chunk_type(chunk, type); btrfs_set_stack_chunk_num_stripes(chunk, num_stripes); - btrfs_set_stack_chunk_io_align(chunk, extent_root->sectorsize); - btrfs_set_stack_chunk_io_width(chunk, extent_root->sectorsize); + btrfs_set_stack_chunk_io_align(chunk, stripe_len); + btrfs_set_stack_chunk_io_width(chunk, stripe_len); btrfs_set_stack_chunk_sector_size(chunk, extent_root->sectorsize); + map->sector_size = extent_root->sectorsize; + map->stripe_len = stripe_len; + map->io_align = stripe_len; + map->io_width = stripe_len; + map->type = type; + map->num_stripes = num_stripes; ret = btrfs_insert_item(trans, chunk_root, &key, chunk, btrfs_chunk_item_size(num_stripes)); @@ -695,25 +733,11 @@ printk("alloc chunk size %Lu from dev %Lu\n", calc_size, device->devid); em = alloc_extent_map(GFP_NOFS); if (!em) return -ENOMEM; - map = kmalloc(sizeof(*map), GFP_NOFS); - if (!map) { - free_extent_map(em); - return -ENOMEM; - } - em->bdev = (struct block_device *)map; em->start = key.objectid; em->len = key.offset; em->block_start = 0; - map->physical = physical; - map->dev = device; - - if (!map->dev) { - kfree(map); - free_extent_map(em); - return -EIO; - } kfree(chunk); em_tree = &extent_root->fs_info->mapping_tree.map_tree; @@ -758,6 +782,9 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, struct map_lookup *map; struct extent_map_tree *em_tree = &map_tree->map_tree; u64 offset; + u64 stripe_offset; + u64 stripe_nr; + int stripe_index; spin_lock(&em_tree->lock); @@ -767,9 +794,40 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, BUG_ON(em->start > logical || em->start + em->len < logical); map = (struct map_lookup *)em->bdev; offset = logical - em->start; - *phys = map->physical + offset; - *length = em->len - offset; - *dev = map->dev; + + stripe_nr = offset; + /* + * stripe_nr counts the total number of stripes we have to stride + * to get to this block + */ + do_div(stripe_nr, map->stripe_len); + + stripe_offset = stripe_nr * map->stripe_len; + BUG_ON(offset < stripe_offset); + + /* stripe_offset is the offset of this block in its stripe*/ + stripe_offset = offset - stripe_offset; + + /* + * after this do_div call, stripe_nr is the number of stripes + * on this device we have to walk to find the data, and + * stripe_index is the number of our device in the stripe array + */ + stripe_index = do_div(stripe_nr, map->num_stripes); + + BUG_ON(stripe_index >= map->num_stripes); + + *phys = map->stripes[stripe_index].physical + stripe_offset + + stripe_nr * map->stripe_len; + + if (map->type & BTRFS_BLOCK_GROUP_RAID0) { + /* we limit the length of each bio to what fits in a stripe */ + *length = min_t(u64, em->len - offset, + map->stripe_len - stripe_offset); + } else { + *length = em->len - offset; + } + *dev = map->stripes[stripe_index].dev; free_extent_map(em); spin_unlock(&em_tree->lock); return 0; @@ -822,7 +880,9 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, u64 logical; u64 length; u64 devid; + int num_stripes; int ret; + int i; logical = key->objectid; length = key->offset; @@ -846,7 +906,8 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, em = alloc_extent_map(GFP_NOFS); if (!em) return -ENOMEM; - map = kmalloc(sizeof(*map), GFP_NOFS); + num_stripes = btrfs_chunk_num_stripes(leaf, chunk); + map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); if (!map) { free_extent_map(em); return -ENOMEM; @@ -857,13 +918,22 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, em->len = length; em->block_start = 0; - map->physical = btrfs_stripe_offset_nr(leaf, chunk, 0); - devid = btrfs_stripe_devid_nr(leaf, chunk, 0); - map->dev = btrfs_find_device(root, devid); - if (!map->dev) { - kfree(map); - free_extent_map(em); - return -EIO; + map->num_stripes = num_stripes; + map->io_width = btrfs_chunk_io_width(leaf, chunk); + map->io_align = btrfs_chunk_io_align(leaf, chunk); + map->sector_size = btrfs_chunk_sector_size(leaf, chunk); + map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk); + map->type = btrfs_chunk_type(leaf, chunk); + for (i = 0; i < num_stripes; i++) { + map->stripes[i].physical = + btrfs_stripe_offset_nr(leaf, chunk, i); + devid = btrfs_stripe_devid_nr(leaf, chunk, i); + map->stripes[i].dev = btrfs_find_device(root, devid); + if (!map->stripes[i].dev) { + kfree(map); + free_extent_map(em); + return -EIO; + } } spin_lock(&map_tree->map_tree.lock);