diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0cbf3491bb7c..42b03c4ee494 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -691,6 +691,7 @@ struct btrfs_space_info { struct list_head block_groups; spinlock_t lock; struct rw_semaphore groups_sem; + atomic_t caching_threads; }; /* @@ -721,11 +722,17 @@ struct btrfs_free_cluster { struct list_head block_group_list; }; +enum btrfs_caching_type { + BTRFS_CACHE_NO = 0, + BTRFS_CACHE_STARTED = 1, + BTRFS_CACHE_FINISHED = 2, +}; + struct btrfs_block_group_cache { struct btrfs_key key; struct btrfs_block_group_item item; + struct btrfs_fs_info *fs_info; spinlock_t lock; - struct mutex cache_mutex; u64 pinned; u64 reserved; u64 flags; @@ -733,15 +740,19 @@ struct btrfs_block_group_cache { int extents_thresh; int free_extents; int total_bitmaps; - int cached; int ro; int dirty; + /* cache tracking stuff */ + wait_queue_head_t caching_q; + int cached; + struct btrfs_space_info *space_info; /* free space cache stuff */ spinlock_t tree_lock; struct rb_root free_space_offset; + u64 free_space; /* block group cache stuff */ struct rb_node cache_node; @@ -834,6 +845,7 @@ struct btrfs_fs_info { atomic_t async_submit_draining; atomic_t nr_async_bios; atomic_t async_delalloc_pages; + atomic_t async_caching_threads; /* * this is used by the balancing code to wait for all the pending @@ -950,6 +962,9 @@ struct btrfs_root { /* the node lock is held while changing the node pointer */ spinlock_t node_lock; + /* taken when updating the commit root */ + struct rw_semaphore commit_root_sem; + struct extent_buffer *commit_root; struct btrfs_root *log_root; struct btrfs_root *reloc_root; @@ -1911,7 +1926,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, struct btrfs_root *root, unsigned long count); int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); int btrfs_update_pinned_extents(struct btrfs_root *root, - u64 bytenr, u64 num, int pin); + u64 bytenr, u64 num, int pin, int mark_free); int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *leaf); int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, @@ -1996,6 +2011,7 @@ void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, u64 bytes); void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, u64 bytes); +void btrfs_free_super_mirror_extents(struct btrfs_fs_info *info); /* ctree.c */ int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, int level, int *slot); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 55d9d188e693..ec2c915f7f4a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -907,6 +907,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, spin_lock_init(&root->inode_lock); mutex_init(&root->objectid_mutex); mutex_init(&root->log_mutex); + init_rwsem(&root->commit_root_sem); init_waitqueue_head(&root->log_writer_wait); init_waitqueue_head(&root->log_commit_wait[0]); init_waitqueue_head(&root->log_commit_wait[1]); @@ -1566,6 +1567,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, atomic_set(&fs_info->async_delalloc_pages, 0); atomic_set(&fs_info->async_submit_draining, 0); atomic_set(&fs_info->nr_async_bios, 0); + atomic_set(&fs_info->async_caching_threads, 0); fs_info->sb = sb; fs_info->max_extent = (u64)-1; fs_info->max_inline = 8192 * 1024; @@ -2337,6 +2339,7 @@ int close_ctree(struct btrfs_root *root) free_extent_buffer(root->fs_info->csum_root->commit_root); btrfs_free_block_groups(root->fs_info); + btrfs_free_super_mirror_extents(root->fs_info); del_fs_roots(fs_info); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 98697be6bdde..9a489cc89fd3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "compat.h" #include "hash.h" #include "ctree.h" @@ -61,6 +62,13 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 alloc_bytes, u64 flags, int force); +static noinline int +block_group_cache_done(struct btrfs_block_group_cache *cache) +{ + smp_mb(); + return cache->cached == BTRFS_CACHE_FINISHED; +} + static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) { return (cache->flags & bits) == bits; @@ -145,21 +153,64 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr, return ret; } +void btrfs_free_super_mirror_extents(struct btrfs_fs_info *info) +{ + u64 start, end, last = 0; + int ret; + + while (1) { + ret = find_first_extent_bit(&info->pinned_extents, last, + &start, &end, EXTENT_LOCKED); + if (ret) + break; + + unlock_extent(&info->pinned_extents, start, end, GFP_NOFS); + last = end+1; + } +} + +static int remove_sb_from_cache(struct btrfs_root *root, + struct btrfs_block_group_cache *cache) +{ + struct btrfs_fs_info *fs_info = root->fs_info; + u64 bytenr; + u64 *logical; + int stripe_len; + int i, nr, ret; + + for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { + bytenr = btrfs_sb_offset(i); + ret = btrfs_rmap_block(&root->fs_info->mapping_tree, + cache->key.objectid, bytenr, + 0, &logical, &nr, &stripe_len); + BUG_ON(ret); + while (nr--) { + try_lock_extent(&fs_info->pinned_extents, + logical[nr], + logical[nr] + stripe_len - 1, GFP_NOFS); + } + kfree(logical); + } + + return 0; +} + /* * this is only called by cache_block_group, since we could have freed extents * we need to check the pinned_extents for any extents that can't be used yet * since their free space will be released as soon as the transaction commits. */ -static int add_new_free_space(struct btrfs_block_group_cache *block_group, +static u64 add_new_free_space(struct btrfs_block_group_cache *block_group, struct btrfs_fs_info *info, u64 start, u64 end) { - u64 extent_start, extent_end, size; + u64 extent_start, extent_end, size, total_added = 0; int ret; while (start < end) { ret = find_first_extent_bit(&info->pinned_extents, start, &extent_start, &extent_end, - EXTENT_DIRTY); + EXTENT_DIRTY|EXTENT_LOCKED| + EXTENT_DELALLOC); if (ret) break; @@ -167,6 +218,7 @@ static int add_new_free_space(struct btrfs_block_group_cache *block_group, start = extent_end + 1; } else if (extent_start > start && extent_start < end) { size = extent_start - start; + total_added += size; ret = btrfs_add_free_space(block_group, start, size); BUG_ON(ret); @@ -178,84 +230,139 @@ static int add_new_free_space(struct btrfs_block_group_cache *block_group, if (start < end) { size = end - start; + total_added += size; ret = btrfs_add_free_space(block_group, start, size); BUG_ON(ret); } - return 0; + return total_added; } -static int remove_sb_from_cache(struct btrfs_root *root, - struct btrfs_block_group_cache *cache) -{ - u64 bytenr; - u64 *logical; - int stripe_len; - int i, nr, ret; +DEFINE_MUTEX(discard_mutex); - for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { - bytenr = btrfs_sb_offset(i); - ret = btrfs_rmap_block(&root->fs_info->mapping_tree, - cache->key.objectid, bytenr, 0, - &logical, &nr, &stripe_len); - BUG_ON(ret); - while (nr--) { - btrfs_remove_free_space(cache, logical[nr], - stripe_len); +/* + * if async kthreads are running when we cross transactions, we mark any pinned + * extents with EXTENT_DELALLOC and then let the caching kthreads clean up those + * extents when they are done. Also we run this from btrfs_finish_extent_commit + * in case there were some pinned extents that were missed because we had + * already cached that block group. + */ +static void btrfs_discard_pinned_extents(struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *cache) +{ + u64 start, end, last; + int ret; + + if (!cache) + last = 0; + else + last = cache->key.objectid; + + mutex_lock(&discard_mutex); + while (1) { + ret = find_first_extent_bit(&fs_info->pinned_extents, last, + &start, &end, EXTENT_DELALLOC); + if (ret) + break; + + if (cache && start >= cache->key.objectid + cache->key.offset) + break; + + + if (!cache) { + cache = btrfs_lookup_block_group(fs_info, start); + BUG_ON(!cache); + + start = max(start, cache->key.objectid); + end = min(end, cache->key.objectid + cache->key.offset - 1); + + if (block_group_cache_done(cache)) + btrfs_add_free_space(cache, start, + end - start + 1); + cache = NULL; + } else { + start = max(start, cache->key.objectid); + end = min(end, cache->key.objectid + cache->key.offset - 1); + btrfs_add_free_space(cache, start, end - start + 1); + } + + clear_extent_bits(&fs_info->pinned_extents, start, end, + EXTENT_DELALLOC, GFP_NOFS); + last = end + 1; + + if (need_resched()) { + mutex_unlock(&discard_mutex); + cond_resched(); + mutex_lock(&discard_mutex); } - kfree(logical); } - return 0; + mutex_unlock(&discard_mutex); } -static int cache_block_group(struct btrfs_root *root, - struct btrfs_block_group_cache *block_group) +static int caching_kthread(void *data) { + struct btrfs_block_group_cache *block_group = data; + struct btrfs_fs_info *fs_info = block_group->fs_info; + u64 last = 0; struct btrfs_path *path; int ret = 0; struct btrfs_key key; struct extent_buffer *leaf; int slot; - u64 last; + u64 total_found = 0; - if (!block_group) - return 0; - - root = root->fs_info->extent_root; - - if (block_group->cached) - return 0; + BUG_ON(!fs_info); path = btrfs_alloc_path(); if (!path) return -ENOMEM; - path->reada = 2; + atomic_inc(&fs_info->async_caching_threads); + atomic_inc(&block_group->space_info->caching_threads); + last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); +again: + /* need to make sure the commit_root doesn't disappear */ + down_read(&fs_info->extent_root->commit_root_sem); + /* - * we get into deadlocks with paths held by callers of this function. - * since the alloc_mutex is protecting things right now, just - * skip the locking here + * We don't want to deadlock with somebody trying to allocate a new + * extent for the extent root while also trying to search the extent + * root to add free space. So we skip locking and search the commit + * root, since its read-only */ path->skip_locking = 1; - last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); + path->search_commit_root = 1; + path->reada = 2; + key.objectid = last; key.offset = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0); if (ret < 0) goto err; while (1) { + smp_mb(); + if (block_group->fs_info->closing) + break; + leaf = path->nodes[0]; slot = path->slots[0]; if (slot >= btrfs_header_nritems(leaf)) { - ret = btrfs_next_leaf(root, path); + ret = btrfs_next_leaf(fs_info->extent_root, path); if (ret < 0) goto err; - if (ret == 0) - continue; - else + else if (ret) break; + + if (need_resched()) { + btrfs_release_path(fs_info->extent_root, path); + up_read(&fs_info->extent_root->commit_root_sem); + cond_resched(); + goto again; + } + + continue; } btrfs_item_key_to_cpu(leaf, &key, slot); if (key.objectid < block_group->key.objectid) @@ -266,24 +373,63 @@ static int cache_block_group(struct btrfs_root *root, break; if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { - add_new_free_space(block_group, root->fs_info, last, - key.objectid); - + total_found += add_new_free_space(block_group, + fs_info, last, + key.objectid); last = key.objectid + key.offset; } + + if (total_found > (1024 * 1024 * 2)) { + total_found = 0; + wake_up(&block_group->caching_q); + } next: path->slots[0]++; } - - add_new_free_space(block_group, root->fs_info, last, - block_group->key.objectid + - block_group->key.offset); - - block_group->cached = 1; - remove_sb_from_cache(root, block_group); ret = 0; + + total_found += add_new_free_space(block_group, fs_info, last, + block_group->key.objectid + + block_group->key.offset); + + spin_lock(&block_group->lock); + block_group->cached = BTRFS_CACHE_FINISHED; + spin_unlock(&block_group->lock); + err: btrfs_free_path(path); + up_read(&fs_info->extent_root->commit_root_sem); + atomic_dec(&fs_info->async_caching_threads); + atomic_dec(&block_group->space_info->caching_threads); + wake_up(&block_group->caching_q); + + if (!ret) + btrfs_discard_pinned_extents(fs_info, block_group); + + return 0; +} + +static int cache_block_group(struct btrfs_block_group_cache *cache) +{ + struct task_struct *tsk; + int ret = 0; + + spin_lock(&cache->lock); + if (cache->cached != BTRFS_CACHE_NO) { + spin_unlock(&cache->lock); + return ret; + } + cache->cached = BTRFS_CACHE_STARTED; + spin_unlock(&cache->lock); + + tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", + cache->key.objectid); + if (IS_ERR(tsk)) { + ret = PTR_ERR(tsk); + printk(KERN_ERR "error running thread %d\n", ret); + BUG(); + } + return ret; } @@ -1721,7 +1867,7 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, BUG_ON(ret); } btrfs_update_pinned_extents(root, node->bytenr, - node->num_bytes, 1); + node->num_bytes, 1, 0); update_reserved_extents(root, node->bytenr, node->num_bytes, 0); } @@ -2496,6 +2642,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, found->force_alloc = 0; *space_info = found; list_add_rcu(&found->list, &info->space_info); + atomic_set(&found->caching_threads, 0); return 0; } @@ -2953,7 +3100,7 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) } int btrfs_update_pinned_extents(struct btrfs_root *root, - u64 bytenr, u64 num, int pin) + u64 bytenr, u64 num, int pin, int mark_free) { u64 len; struct btrfs_block_group_cache *cache; @@ -2988,7 +3135,7 @@ int btrfs_update_pinned_extents(struct btrfs_root *root, spin_unlock(&cache->lock); spin_unlock(&cache->space_info->lock); fs_info->total_pinned -= len; - if (cache->cached) + if (block_group_cache_done(cache) && mark_free) btrfs_add_free_space(cache, bytenr, len); } btrfs_put_block_group(cache); @@ -3034,14 +3181,27 @@ int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) u64 last = 0; u64 start; u64 end; + bool caching_kthreads = false; struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents; int ret; + if (atomic_read(&root->fs_info->async_caching_threads)) + caching_kthreads = true; + while (1) { ret = find_first_extent_bit(pinned_extents, last, &start, &end, EXTENT_DIRTY); if (ret) break; + + /* + * we need to make sure that the pinned extents don't go away + * while we are caching block groups + */ + if (unlikely(caching_kthreads)) + set_extent_delalloc(pinned_extents, start, end, + GFP_NOFS); + set_extent_dirty(copy, start, end, GFP_NOFS); last = end + 1; } @@ -3055,6 +3215,12 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, u64 start; u64 end; int ret; + int mark_free = 1; + + ret = find_first_extent_bit(&root->fs_info->pinned_extents, 0, + &start, &end, EXTENT_DELALLOC); + if (!ret) + mark_free = 0; while (1) { ret = find_first_extent_bit(unpin, 0, &start, &end, @@ -3065,11 +3231,16 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, ret = btrfs_discard_extent(root, start, end + 1 - start); /* unlocks the pinned mutex */ - btrfs_update_pinned_extents(root, start, end + 1 - start, 0); + btrfs_update_pinned_extents(root, start, end + 1 - start, 0, + mark_free); clear_extent_dirty(unpin, start, end, GFP_NOFS); cond_resched(); } + + if (unlikely(!mark_free)) + btrfs_discard_pinned_extents(root->fs_info, NULL); + return ret; } @@ -3110,7 +3281,7 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans, pinit: btrfs_set_path_blocking(path); /* unlocks the pinned mutex */ - btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); + btrfs_update_pinned_extents(root, bytenr, num_bytes, 1, 0); BUG_ON(err < 0); return 0; @@ -3421,7 +3592,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, if (root_objectid == BTRFS_TREE_LOG_OBJECTID) { WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID); /* unlocks the pinned mutex */ - btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); + btrfs_update_pinned_extents(root, bytenr, num_bytes, 1, 0); update_reserved_extents(root, bytenr, num_bytes, 0); ret = 0; } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { @@ -3447,6 +3618,45 @@ static u64 stripe_align(struct btrfs_root *root, u64 val) return ret; } +/* + * when we wait for progress in the block group caching, its because + * our allocation attempt failed at least once. So, we must sleep + * and let some progress happen before we try again. + * + * This function will sleep at least once waiting for new free space to + * show up, and then it will check the block group free space numbers + * for our min num_bytes. Another option is to have it go ahead + * and look in the rbtree for a free extent of a given size, but this + * is a good start. + */ +static noinline int +wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, + u64 num_bytes) +{ + DEFINE_WAIT(wait); + + prepare_to_wait(&cache->caching_q, &wait, TASK_UNINTERRUPTIBLE); + + if (block_group_cache_done(cache)) { + finish_wait(&cache->caching_q, &wait); + return 0; + } + schedule(); + finish_wait(&cache->caching_q, &wait); + + wait_event(cache->caching_q, block_group_cache_done(cache) || + (cache->free_space >= num_bytes)); + return 0; +} + +enum btrfs_loop_type { + LOOP_CACHED_ONLY = 0, + LOOP_CACHING_NOWAIT = 1, + LOOP_CACHING_WAIT = 2, + LOOP_ALLOC_CHUNK = 3, + LOOP_NO_EMPTY_SIZE = 4, +}; + /* * walks the btree of allocated extents and find a hole of a given size. * The key ins is changed to record the hole: @@ -3472,6 +3682,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_space_info *space_info; int last_ptr_loop = 0; int loop = 0; + bool found_uncached_bg = false; WARN_ON(num_bytes < root->sectorsize); btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); @@ -3503,15 +3714,18 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, search_start = max(search_start, first_logical_byte(root, 0)); search_start = max(search_start, hint_byte); - if (!last_ptr) { + if (!last_ptr) empty_cluster = 0; - loop = 1; - } if (search_start == hint_byte) { block_group = btrfs_lookup_block_group(root->fs_info, search_start); - if (block_group && block_group_bits(block_group, data)) { + /* + * we don't want to use the block group if it doesn't match our + * allocation bits, or if its not cached. + */ + if (block_group && block_group_bits(block_group, data) && + block_group_cache_done(block_group)) { down_read(&space_info->groups_sem); if (list_empty(&block_group->list) || block_group->ro) { @@ -3534,21 +3748,35 @@ search: down_read(&space_info->groups_sem); list_for_each_entry(block_group, &space_info->block_groups, list) { u64 offset; + int cached; atomic_inc(&block_group->count); search_start = block_group->key.objectid; have_block_group: - if (unlikely(!block_group->cached)) { - mutex_lock(&block_group->cache_mutex); - ret = cache_block_group(root, block_group); - mutex_unlock(&block_group->cache_mutex); - if (ret) { - btrfs_put_block_group(block_group); - break; + if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { + /* + * we want to start caching kthreads, but not too many + * right off the bat so we don't overwhelm the system, + * so only start them if there are less than 2 and we're + * in the initial allocation phase. + */ + if (loop > LOOP_CACHING_NOWAIT || + atomic_read(&space_info->caching_threads) < 2) { + ret = cache_block_group(block_group); + BUG_ON(ret); } } + cached = block_group_cache_done(block_group); + if (unlikely(!cached)) { + found_uncached_bg = true; + + /* if we only want cached bgs, loop */ + if (loop == LOOP_CACHED_ONLY) + goto loop; + } + if (unlikely(block_group->ro)) goto loop; @@ -3627,14 +3855,21 @@ refill_cluster: spin_unlock(&last_ptr->refill_lock); goto checks; } + } else if (!cached && loop > LOOP_CACHING_NOWAIT) { + spin_unlock(&last_ptr->refill_lock); + + wait_block_group_cache_progress(block_group, + num_bytes + empty_cluster + empty_size); + goto have_block_group; } + /* * at this point we either didn't find a cluster * or we weren't able to allocate a block from our * cluster. Free the cluster we've been trying * to use, and go to the next block group */ - if (loop < 2) { + if (loop < LOOP_NO_EMPTY_SIZE) { btrfs_return_cluster_to_free_space(NULL, last_ptr); spin_unlock(&last_ptr->refill_lock); @@ -3645,8 +3880,15 @@ refill_cluster: offset = btrfs_find_space_for_alloc(block_group, search_start, num_bytes, empty_size); - if (!offset) + if (!offset && (cached || (!cached && + loop == LOOP_CACHING_NOWAIT))) { goto loop; + } else if (!offset && (!cached && + loop > LOOP_CACHING_NOWAIT)) { + wait_block_group_cache_progress(block_group, + num_bytes + empty_size); + goto have_block_group; + } checks: search_start = stripe_align(root, offset); /* move on to the next group */ @@ -3694,13 +3936,26 @@ loop: } up_read(&space_info->groups_sem); - /* loop == 0, try to find a clustered alloc in every block group - * loop == 1, try again after forcing a chunk allocation - * loop == 2, set empty_size and empty_cluster to 0 and try again + /* LOOP_CACHED_ONLY, only search fully cached block groups + * LOOP_CACHING_NOWAIT, search partially cached block groups, but + * dont wait foR them to finish caching + * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching + * LOOP_ALLOC_CHUNK, force a chunk allocation and try again + * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try + * again */ - if (!ins->objectid && loop < 3 && - (empty_size || empty_cluster || allowed_chunk_alloc)) { - if (loop >= 2) { + if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && + (found_uncached_bg || empty_size || empty_cluster || + allowed_chunk_alloc)) { + if (found_uncached_bg) { + found_uncached_bg = false; + if (loop < LOOP_CACHING_WAIT) { + loop++; + goto search; + } + } + + if (loop == LOOP_ALLOC_CHUNK) { empty_size = 0; empty_cluster = 0; } @@ -3713,7 +3968,7 @@ loop: space_info->force_alloc = 1; } - if (loop < 3) { + if (loop < LOOP_NO_EMPTY_SIZE) { loop++; goto search; } @@ -3809,7 +4064,7 @@ again: num_bytes, data, 1); goto again; } - if (ret) { + if (ret == -ENOSPC) { struct btrfs_space_info *sinfo; sinfo = __find_space_info(root->fs_info, data); @@ -3817,7 +4072,6 @@ again: "wanted %llu\n", (unsigned long long)data, (unsigned long long)num_bytes); dump_space_info(sinfo, num_bytes); - BUG(); } return ret; @@ -3855,7 +4109,9 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size, empty_size, hint_byte, search_end, ins, data); - update_reserved_extents(root, ins->objectid, ins->offset, 1); + if (!ret) + update_reserved_extents(root, ins->objectid, ins->offset, 1); + return ret; } @@ -4017,9 +4273,9 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, struct btrfs_block_group_cache *block_group; block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); - mutex_lock(&block_group->cache_mutex); - cache_block_group(root, block_group); - mutex_unlock(&block_group->cache_mutex); + cache_block_group(block_group); + wait_event(block_group->caching_q, + block_group_cache_done(block_group)); ret = btrfs_remove_free_space(block_group, ins->objectid, ins->offset); @@ -4050,7 +4306,8 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans, ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes, empty_size, hint_byte, search_end, ins, 0); - BUG_ON(ret); + if (ret) + return ret; if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { if (parent == 0) @@ -6966,11 +7223,16 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) &info->block_group_cache_tree); spin_unlock(&info->block_group_cache_lock); - btrfs_remove_free_space_cache(block_group); down_write(&block_group->space_info->groups_sem); list_del(&block_group->list); up_write(&block_group->space_info->groups_sem); + if (block_group->cached == BTRFS_CACHE_STARTED) + wait_event(block_group->caching_q, + block_group_cache_done(block_group)); + + btrfs_remove_free_space_cache(block_group); + WARN_ON(atomic_read(&block_group->count) != 1); kfree(block_group); @@ -7036,10 +7298,10 @@ int btrfs_read_block_groups(struct btrfs_root *root) atomic_set(&cache->count, 1); spin_lock_init(&cache->lock); spin_lock_init(&cache->tree_lock); - mutex_init(&cache->cache_mutex); + cache->fs_info = info; + init_waitqueue_head(&cache->caching_q); INIT_LIST_HEAD(&cache->list); INIT_LIST_HEAD(&cache->cluster_list); - cache->sectorsize = root->sectorsize; /* * we only want to have 32k of ram per block group for keeping @@ -7057,6 +7319,26 @@ int btrfs_read_block_groups(struct btrfs_root *root) key.objectid = found_key.objectid + found_key.offset; btrfs_release_path(root, path); cache->flags = btrfs_block_group_flags(&cache->item); + cache->sectorsize = root->sectorsize; + + remove_sb_from_cache(root, cache); + + /* + * check for two cases, either we are full, and therefore + * don't need to bother with the caching work since we won't + * find any space, or we are empty, and we can just add all + * the space in and be done with it. This saves us _alot_ of + * time, particularly in the full case. + */ + if (found_key.offset == btrfs_block_group_used(&cache->item)) { + cache->cached = BTRFS_CACHE_FINISHED; + } else if (btrfs_block_group_used(&cache->item) == 0) { + cache->cached = BTRFS_CACHE_FINISHED; + add_new_free_space(cache, root->fs_info, + found_key.objectid, + found_key.objectid + + found_key.offset); + } ret = update_space_info(info, cache->flags, found_key.offset, btrfs_block_group_used(&cache->item), @@ -7112,7 +7394,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, atomic_set(&cache->count, 1); spin_lock_init(&cache->lock); spin_lock_init(&cache->tree_lock); - mutex_init(&cache->cache_mutex); + init_waitqueue_head(&cache->caching_q); INIT_LIST_HEAD(&cache->list); INIT_LIST_HEAD(&cache->cluster_list); @@ -7121,11 +7403,12 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, cache->flags = type; btrfs_set_block_group_flags(&cache->item, type); - cache->cached = 1; - ret = btrfs_add_free_space(cache, chunk_offset, size); - BUG_ON(ret); + cache->cached = BTRFS_CACHE_FINISHED; remove_sb_from_cache(root, cache); + add_new_free_space(cache, root->fs_info, chunk_offset, + chunk_offset + size); + ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, &cache->space_info); BUG_ON(ret); @@ -7184,7 +7467,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, rb_erase(&block_group->cache_node, &root->fs_info->block_group_cache_tree); spin_unlock(&root->fs_info->block_group_cache_lock); - btrfs_remove_free_space_cache(block_group); + down_write(&block_group->space_info->groups_sem); /* * we must use list_del_init so people can check to see if they @@ -7193,6 +7476,12 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, list_del_init(&block_group->list); up_write(&block_group->space_info->groups_sem); + if (block_group->cached == BTRFS_CACHE_STARTED) + wait_event(block_group->caching_q, + block_group_cache_done(block_group)); + + btrfs_remove_free_space_cache(block_group); + spin_lock(&block_group->space_info->lock); block_group->space_info->total_bytes -= block_group->key.offset; block_group->space_info->bytes_readonly -= block_group->key.offset; diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index ab8cad8b46c9..af99b78b288e 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -238,6 +238,7 @@ static void unlink_free_space(struct btrfs_block_group_cache *block_group, { rb_erase(&info->offset_index, &block_group->free_space_offset); block_group->free_extents--; + block_group->free_space -= info->bytes; } static int link_free_space(struct btrfs_block_group_cache *block_group, @@ -251,6 +252,7 @@ static int link_free_space(struct btrfs_block_group_cache *block_group, if (ret) return ret; + block_group->free_space += info->bytes; block_group->free_extents++; return ret; } @@ -285,36 +287,40 @@ static void recalculate_thresholds(struct btrfs_block_group_cache *block_group) } } -static void bitmap_clear_bits(struct btrfs_free_space *info, u64 offset, u64 bytes, - u64 sectorsize) +static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group, + struct btrfs_free_space *info, u64 offset, + u64 bytes) { unsigned long start, end; unsigned long i; - start = offset_to_bit(info->offset, sectorsize, offset); - end = start + bytes_to_bits(bytes, sectorsize); + start = offset_to_bit(info->offset, block_group->sectorsize, offset); + end = start + bytes_to_bits(bytes, block_group->sectorsize); BUG_ON(end > BITS_PER_BITMAP); for (i = start; i < end; i++) clear_bit(i, info->bitmap); info->bytes -= bytes; + block_group->free_space -= bytes; } -static void bitmap_set_bits(struct btrfs_free_space *info, u64 offset, u64 bytes, - u64 sectorsize) +static void bitmap_set_bits(struct btrfs_block_group_cache *block_group, + struct btrfs_free_space *info, u64 offset, + u64 bytes) { unsigned long start, end; unsigned long i; - start = offset_to_bit(info->offset, sectorsize, offset); - end = start + bytes_to_bits(bytes, sectorsize); + start = offset_to_bit(info->offset, block_group->sectorsize, offset); + end = start + bytes_to_bits(bytes, block_group->sectorsize); BUG_ON(end > BITS_PER_BITMAP); for (i = start; i < end; i++) set_bit(i, info->bitmap); info->bytes += bytes; + block_group->free_space += bytes; } static int search_bitmap(struct btrfs_block_group_cache *block_group, @@ -414,13 +420,12 @@ again: (u64)(BITS_PER_BITMAP * block_group->sectorsize) - 1; if (*offset > bitmap_info->offset && *offset + *bytes > end) { - bitmap_clear_bits(bitmap_info, *offset, - end - *offset + 1, block_group->sectorsize); + bitmap_clear_bits(block_group, bitmap_info, *offset, + end - *offset + 1); *bytes -= end - *offset + 1; *offset = end + 1; } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) { - bitmap_clear_bits(bitmap_info, *offset, - *bytes, block_group->sectorsize); + bitmap_clear_bits(block_group, bitmap_info, *offset, *bytes); *bytes = 0; } @@ -495,14 +500,13 @@ again: (u64)(BITS_PER_BITMAP * block_group->sectorsize); if (offset >= bitmap_info->offset && offset + bytes > end) { - bitmap_set_bits(bitmap_info, offset, end - offset, - block_group->sectorsize); + bitmap_set_bits(block_group, bitmap_info, offset, + end - offset); bytes -= end - offset; offset = end; added = 0; } else if (offset >= bitmap_info->offset && offset + bytes <= end) { - bitmap_set_bits(bitmap_info, offset, bytes, - block_group->sectorsize); + bitmap_set_bits(block_group, bitmap_info, offset, bytes); bytes = 0; } else { BUG(); @@ -870,8 +874,7 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, ret = offset; if (entry->bitmap) { - bitmap_clear_bits(entry, offset, bytes, - block_group->sectorsize); + bitmap_clear_bits(block_group, entry, offset, bytes); if (!entry->bytes) { unlink_free_space(block_group, entry); kfree(entry->bitmap); @@ -891,6 +894,7 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, out: spin_unlock(&block_group->tree_lock); + return ret; } @@ -967,7 +971,7 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, goto out; ret = search_start; - bitmap_clear_bits(entry, ret, bytes, block_group->sectorsize); + bitmap_clear_bits(block_group, entry, ret, bytes); out: spin_unlock(&cluster->lock); spin_unlock(&block_group->tree_lock); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 81f7124c3051..32454d1c566f 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -40,6 +40,14 @@ static noinline void put_transaction(struct btrfs_transaction *transaction) } } +static noinline void switch_commit_root(struct btrfs_root *root) +{ + down_write(&root->commit_root_sem); + free_extent_buffer(root->commit_root); + root->commit_root = btrfs_root_node(root); + up_write(&root->commit_root_sem); +} + /* * either allocate a new transaction or hop into the existing one */ @@ -458,8 +466,7 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, ret = btrfs_write_dirty_block_groups(trans, root); BUG_ON(ret); } - free_extent_buffer(root->commit_root); - root->commit_root = btrfs_root_node(root); + switch_commit_root(root); return 0; } @@ -537,8 +544,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, btrfs_update_reloc_root(trans, root); if (root->commit_root != root->node) { - free_extent_buffer(root->commit_root); - root->commit_root = btrfs_root_node(root); + switch_commit_root(root); btrfs_set_root_node(&root->root_item, root->node); } @@ -1002,15 +1008,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_set_root_node(&root->fs_info->tree_root->root_item, root->fs_info->tree_root->node); - free_extent_buffer(root->fs_info->tree_root->commit_root); - root->fs_info->tree_root->commit_root = - btrfs_root_node(root->fs_info->tree_root); + switch_commit_root(root->fs_info->tree_root); btrfs_set_root_node(&root->fs_info->chunk_root->root_item, root->fs_info->chunk_root->node); - free_extent_buffer(root->fs_info->chunk_root->commit_root); - root->fs_info->chunk_root->commit_root = - btrfs_root_node(root->fs_info->chunk_root); + switch_commit_root(root->fs_info->chunk_root); update_super_roots(root); @@ -1050,6 +1052,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, cur_trans->commit_done = 1; root->fs_info->last_trans_committed = cur_trans->transid; + wake_up(&cur_trans->commit_wait); put_transaction(cur_trans); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index c13922206d1b..195606862618 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -264,7 +264,7 @@ static int process_one_buffer(struct btrfs_root *log, { if (wc->pin) btrfs_update_pinned_extents(log->fs_info->extent_root, - eb->start, eb->len, 1); + eb->start, eb->len, 1, 0); if (btrfs_buffer_uptodate(eb, gen)) { if (wc->write)