diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 51ff3f8dbab9..089f6da09411 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1298,8 +1298,8 @@ struct btrfs_block_group_cache { */ struct list_head cluster_list; - /* For delayed block group creation */ - struct list_head new_bg_list; + /* For delayed block group creation or deletion of empty block groups */ + struct list_head bg_list; }; /* delayed seq elem */ @@ -1568,6 +1568,7 @@ struct btrfs_fs_info { int do_barriers; int closing; int log_root_recovering; + int open; u64 total_pinned; @@ -1717,6 +1718,9 @@ struct btrfs_fs_info { /* Used to reclaim the metadata space in the background. */ struct work_struct async_reclaim_work; + + spinlock_t unused_bgs_lock; + struct list_head unused_bgs; }; struct btrfs_subvolume_writers { @@ -3344,6 +3348,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 size); int btrfs_remove_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 group_start); +void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info); void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root); u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 48794f951427..4780e6623c7b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1769,6 +1769,7 @@ static int cleaner_kthread(void *arg) } btrfs_run_delayed_iputs(root); + btrfs_delete_unused_bgs(root->fs_info); again = btrfs_clean_one_deleted_snapshot(root); mutex_unlock(&root->fs_info->cleaner_mutex); @@ -2230,6 +2231,7 @@ int open_ctree(struct super_block *sb, spin_lock_init(&fs_info->super_lock); spin_lock_init(&fs_info->qgroup_op_lock); spin_lock_init(&fs_info->buffer_lock); + spin_lock_init(&fs_info->unused_bgs_lock); rwlock_init(&fs_info->tree_mod_log_lock); mutex_init(&fs_info->reloc_mutex); mutex_init(&fs_info->delalloc_root_mutex); @@ -2239,6 +2241,7 @@ int open_ctree(struct super_block *sb, INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); INIT_LIST_HEAD(&fs_info->space_info); INIT_LIST_HEAD(&fs_info->tree_mod_seq_list); + INIT_LIST_HEAD(&fs_info->unused_bgs); btrfs_mapping_init(&fs_info->mapping_tree); btrfs_init_block_rsv(&fs_info->global_block_rsv, BTRFS_BLOCK_RSV_GLOBAL); @@ -2977,6 +2980,8 @@ retry_root_backup: fs_info->update_uuid_tree_gen = 1; } + fs_info->open = 1; + return 0; fail_qgroup: @@ -3688,6 +3693,7 @@ void close_ctree(struct btrfs_root *root) invalidate_inode_pages2(fs_info->btree_inode->i_mapping); btrfs_stop_all_workers(fs_info); + fs_info->open = 0; free_root_pointers(fs_info, 1); iput(fs_info->btree_inode); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b30ddb49cfab..28a27d5f02d3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5433,6 +5433,20 @@ static int update_block_group(struct btrfs_root *root, spin_unlock(&cache->space_info->lock); } else { old_val -= num_bytes; + + /* + * No longer have used bytes in this block group, queue + * it for deletion. + */ + if (old_val == 0) { + spin_lock(&info->unused_bgs_lock); + if (list_empty(&cache->bg_list)) { + btrfs_get_block_group(cache); + list_add_tail(&cache->bg_list, + &info->unused_bgs); + } + spin_unlock(&info->unused_bgs_lock); + } btrfs_set_block_group_used(&cache->item, old_val); cache->pinned += num_bytes; cache->space_info->bytes_pinned += num_bytes; @@ -8855,6 +8869,16 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) } up_write(&info->commit_root_sem); + spin_lock(&info->unused_bgs_lock); + while (!list_empty(&info->unused_bgs)) { + block_group = list_first_entry(&info->unused_bgs, + struct btrfs_block_group_cache, + bg_list); + list_del_init(&block_group->bg_list); + btrfs_put_block_group(block_group); + } + spin_unlock(&info->unused_bgs_lock); + spin_lock(&info->block_group_cache_lock); while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { block_group = rb_entry(n, struct btrfs_block_group_cache, @@ -8989,7 +9013,7 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size) init_rwsem(&cache->data_rwsem); INIT_LIST_HEAD(&cache->list); INIT_LIST_HEAD(&cache->cluster_list); - INIT_LIST_HEAD(&cache->new_bg_list); + INIT_LIST_HEAD(&cache->bg_list); btrfs_init_free_space_ctl(cache); return cache; @@ -9130,8 +9154,18 @@ int btrfs_read_block_groups(struct btrfs_root *root) __link_block_group(space_info, cache); set_avail_alloc_bits(root->fs_info, cache->flags); - if (btrfs_chunk_readonly(root, cache->key.objectid)) + if (btrfs_chunk_readonly(root, cache->key.objectid)) { set_block_group_ro(cache, 1); + } else if (btrfs_block_group_used(&cache->item) == 0) { + spin_lock(&info->unused_bgs_lock); + /* Should always be true but just in case. */ + if (list_empty(&cache->bg_list)) { + btrfs_get_block_group(cache); + list_add_tail(&cache->bg_list, + &info->unused_bgs); + } + spin_unlock(&info->unused_bgs_lock); + } } list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) { @@ -9172,10 +9206,8 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, struct btrfs_key key; int ret = 0; - list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, - new_bg_list) { - list_del_init(&block_group->new_bg_list); - + list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) { + list_del_init(&block_group->bg_list); if (ret) continue; @@ -9261,7 +9293,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, __link_block_group(cache->space_info, cache); - list_add_tail(&cache->new_bg_list, &trans->new_bgs); + list_add_tail(&cache->bg_list, &trans->new_bgs); set_avail_alloc_bits(extent_root->fs_info, type); @@ -9430,6 +9462,101 @@ out: return ret; } +/* + * Process the unused_bgs list and remove any that don't have any allocated + * space inside of them. + */ +void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) +{ + struct btrfs_block_group_cache *block_group; + struct btrfs_space_info *space_info; + struct btrfs_root *root = fs_info->extent_root; + struct btrfs_trans_handle *trans; + int ret = 0; + + if (!fs_info->open) + return; + + spin_lock(&fs_info->unused_bgs_lock); + while (!list_empty(&fs_info->unused_bgs)) { + u64 start, end; + + block_group = list_first_entry(&fs_info->unused_bgs, + struct btrfs_block_group_cache, + bg_list); + space_info = block_group->space_info; + list_del_init(&block_group->bg_list); + if (ret || btrfs_mixed_space_info(space_info)) { + btrfs_put_block_group(block_group); + continue; + } + spin_unlock(&fs_info->unused_bgs_lock); + + /* Don't want to race with allocators so take the groups_sem */ + down_write(&space_info->groups_sem); + spin_lock(&block_group->lock); + if (block_group->reserved || + btrfs_block_group_used(&block_group->item) || + block_group->ro) { + /* + * We want to bail if we made new allocations or have + * outstanding allocations in this block group. We do + * the ro check in case balance is currently acting on + * this block group. + */ + spin_unlock(&block_group->lock); + up_write(&space_info->groups_sem); + goto next; + } + spin_unlock(&block_group->lock); + + /* We don't want to force the issue, only flip if it's ok. */ + ret = set_block_group_ro(block_group, 0); + up_write(&space_info->groups_sem); + if (ret < 0) { + ret = 0; + goto next; + } + + /* + * Want to do this before we do anything else so we can recover + * properly if we fail to join the transaction. + */ + trans = btrfs_join_transaction(root); + if (IS_ERR(trans)) { + btrfs_set_block_group_rw(root, block_group); + ret = PTR_ERR(trans); + goto next; + } + + /* + * We could have pending pinned extents for this block group, + * just delete them, we don't care about them anymore. + */ + start = block_group->key.objectid; + end = start + block_group->key.offset - 1; + clear_extent_bits(&fs_info->freed_extents[0], start, end, + EXTENT_DIRTY, GFP_NOFS); + clear_extent_bits(&fs_info->freed_extents[1], start, end, + EXTENT_DIRTY, GFP_NOFS); + + /* Reset pinned so btrfs_put_block_group doesn't complain */ + block_group->pinned = 0; + + /* + * Btrfs_remove_chunk will abort the transaction if things go + * horribly wrong. + */ + ret = btrfs_remove_chunk(trans, root, + block_group->key.objectid); + btrfs_end_transaction(trans, root); +next: + btrfs_put_block_group(block_group); + spin_lock(&fs_info->unused_bgs_lock); + } + spin_unlock(&fs_info->unused_bgs_lock); +} + int btrfs_init_space_info(struct btrfs_fs_info *fs_info) { struct btrfs_space_info *space_info; diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c index d78ae10d0446..2299bfde39ee 100644 --- a/fs/btrfs/tests/free-space-tests.c +++ b/fs/btrfs/tests/free-space-tests.c @@ -45,7 +45,7 @@ static struct btrfs_block_group_cache *init_test_block_group(void) spin_lock_init(&cache->lock); INIT_LIST_HEAD(&cache->list); INIT_LIST_HEAD(&cache->cluster_list); - INIT_LIST_HEAD(&cache->new_bg_list); + INIT_LIST_HEAD(&cache->bg_list); btrfs_init_free_space_ctl(cache); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 63e632746d8a..f27c0f7c387e 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2568,23 +2568,114 @@ static int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64 return ret; } +int btrfs_remove_chunk(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 chunk_offset) +{ + struct extent_map_tree *em_tree; + struct extent_map *em; + struct btrfs_root *extent_root = root->fs_info->extent_root; + struct map_lookup *map; + u64 dev_extent_len = 0; + u64 chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; + u64 chunk_tree = root->fs_info->chunk_root->objectid; + int i, ret = 0; + + /* Just in case */ + root = root->fs_info->chunk_root; + em_tree = &root->fs_info->mapping_tree.map_tree; + + read_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, chunk_offset, 1); + read_unlock(&em_tree->lock); + + if (!em || em->start > chunk_offset || + em->start + em->len < chunk_offset) { + /* + * This is a logic error, but we don't want to just rely on the + * user having built with ASSERT enabled, so if ASSERT doens't + * do anything we still error out. + */ + ASSERT(0); + if (em) + free_extent_map(em); + return -EINVAL; + } + map = (struct map_lookup *)em->bdev; + + for (i = 0; i < map->num_stripes; i++) { + struct btrfs_device *device = map->stripes[i].dev; + ret = btrfs_free_dev_extent(trans, device, + map->stripes[i].physical, + &dev_extent_len); + if (ret) { + btrfs_abort_transaction(trans, root, ret); + goto out; + } + + if (device->bytes_used > 0) { + lock_chunks(root); + btrfs_device_set_bytes_used(device, + device->bytes_used - dev_extent_len); + spin_lock(&root->fs_info->free_chunk_lock); + root->fs_info->free_chunk_space += dev_extent_len; + spin_unlock(&root->fs_info->free_chunk_lock); + btrfs_clear_space_info_full(root->fs_info); + unlock_chunks(root); + } + + if (map->stripes[i].dev) { + ret = btrfs_update_device(trans, map->stripes[i].dev); + if (ret) { + btrfs_abort_transaction(trans, root, ret); + goto out; + } + } + } + ret = btrfs_free_chunk(trans, root, chunk_tree, chunk_objectid, + chunk_offset); + if (ret) { + btrfs_abort_transaction(trans, root, ret); + goto out; + } + + trace_btrfs_chunk_free(root, map, chunk_offset, em->len); + + if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { + ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset); + if (ret) { + btrfs_abort_transaction(trans, root, ret); + goto out; + } + } + + ret = btrfs_remove_block_group(trans, extent_root, chunk_offset); + if (ret) { + btrfs_abort_transaction(trans, extent_root, ret); + goto out; + } + + write_lock(&em_tree->lock); + remove_extent_mapping(em_tree, em); + write_unlock(&em_tree->lock); + + /* once for the tree */ + free_extent_map(em); +out: + /* once for us */ + free_extent_map(em); + return ret; +} + static int btrfs_relocate_chunk(struct btrfs_root *root, u64 chunk_tree, u64 chunk_objectid, u64 chunk_offset) { - struct extent_map_tree *em_tree; struct btrfs_root *extent_root; struct btrfs_trans_handle *trans; - struct btrfs_device *device; - struct extent_map *em; - struct map_lookup *map; - u64 dev_extent_len = 0; int ret; - int i; root = root->fs_info->chunk_root; extent_root = root->fs_info->extent_root; - em_tree = &root->fs_info->mapping_tree.map_tree; ret = btrfs_can_relocate(extent_root, chunk_offset); if (ret) @@ -2606,63 +2697,9 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, * step two, delete the device extents and the * chunk tree entries */ - read_lock(&em_tree->lock); - em = lookup_extent_mapping(em_tree, chunk_offset, 1); - read_unlock(&em_tree->lock); - - BUG_ON(!em || em->start > chunk_offset || - em->start + em->len < chunk_offset); - map = (struct map_lookup *)em->bdev; - - for (i = 0; i < map->num_stripes; i++) { - device = map->stripes[i].dev; - ret = btrfs_free_dev_extent(trans, device, - map->stripes[i].physical, - &dev_extent_len); - BUG_ON(ret); - - if (device->bytes_used > 0) { - lock_chunks(root); - btrfs_device_set_bytes_used(device, - device->bytes_used - dev_extent_len); - spin_lock(&root->fs_info->free_chunk_lock); - root->fs_info->free_chunk_space += dev_extent_len; - spin_unlock(&root->fs_info->free_chunk_lock); - btrfs_clear_space_info_full(root->fs_info); - unlock_chunks(root); - } - - if (map->stripes[i].dev) { - ret = btrfs_update_device(trans, map->stripes[i].dev); - BUG_ON(ret); - } - } - ret = btrfs_free_chunk(trans, root, chunk_tree, chunk_objectid, - chunk_offset); - - BUG_ON(ret); - - trace_btrfs_chunk_free(root, map, chunk_offset, em->len); - - if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { - ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset); - BUG_ON(ret); - } - - ret = btrfs_remove_block_group(trans, extent_root, chunk_offset); - BUG_ON(ret); - - write_lock(&em_tree->lock); - remove_extent_mapping(em_tree, em); - write_unlock(&em_tree->lock); - - /* once for the tree */ - free_extent_map(em); - /* once for us */ - free_extent_map(em); - + ret = btrfs_remove_chunk(trans, root, chunk_offset); btrfs_end_transaction(trans, root); - return 0; + return ret; } static int btrfs_relocate_sys_chunks(struct btrfs_root *root) diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 91998bc0b4c4..08980fa23039 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -463,6 +463,8 @@ unsigned long btrfs_full_stripe_len(struct btrfs_root *root, int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 chunk_offset, u64 chunk_size); +int btrfs_remove_chunk(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 chunk_offset); static inline int btrfs_dev_stats_dirty(struct btrfs_device *dev) {