diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 84a6a5b3384a..90e70e21e479 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1408,6 +1408,17 @@ struct btrfs_block_group_cache { struct btrfs_io_ctl io_ctl; + /* + * Incremented when doing extent allocations and holding a read lock + * on the space_info's groups_sem semaphore. + * Decremented when an ordered extent that represents an IO against this + * block group's range is created (after it's added to its inode's + * root's list of ordered extents) or immediately after the allocation + * if it's a metadata extent or fallocate extent (for these cases we + * don't create ordered extents). + */ + atomic_t reservations; + /* Lock for free space tree operations. */ struct mutex free_space_lock; @@ -3499,6 +3510,9 @@ int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans, struct btrfs_root *root); +void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info, + const u64 start); +void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg); void btrfs_put_block_group(struct btrfs_block_group_cache *cache); int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, struct btrfs_root *root, unsigned long count); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 251452a2b72c..09aad7b447f5 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -6175,6 +6175,57 @@ int btrfs_exclude_logged_extents(struct btrfs_root *log, return 0; } +static void +btrfs_inc_block_group_reservations(struct btrfs_block_group_cache *bg) +{ + atomic_inc(&bg->reservations); +} + +void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info, + const u64 start) +{ + struct btrfs_block_group_cache *bg; + + bg = btrfs_lookup_block_group(fs_info, start); + ASSERT(bg); + if (atomic_dec_and_test(&bg->reservations)) + wake_up_atomic_t(&bg->reservations); + btrfs_put_block_group(bg); +} + +static int btrfs_wait_bg_reservations_atomic_t(atomic_t *a) +{ + schedule(); + return 0; +} + +void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg) +{ + struct btrfs_space_info *space_info = bg->space_info; + + ASSERT(bg->ro); + + if (!(bg->flags & BTRFS_BLOCK_GROUP_DATA)) + return; + + /* + * Our block group is read only but before we set it to read only, + * some task might have had allocated an extent from it already, but it + * has not yet created a respective ordered extent (and added it to a + * root's list of ordered extents). + * Therefore wait for any task currently allocating extents, since the + * block group's reservations counter is incremented while a read lock + * on the groups' semaphore is held and decremented after releasing + * the read access on that semaphore and creating the ordered extent. + */ + down_write(&space_info->groups_sem); + up_write(&space_info->groups_sem); + + wait_on_atomic_t(&bg->reservations, + btrfs_wait_bg_reservations_atomic_t, + TASK_UNINTERRUPTIBLE); +} + /** * btrfs_update_reserved_bytes - update the block_group and space info counters * @cache: The cache we are manipulating @@ -7434,6 +7485,7 @@ checks: btrfs_add_free_space(block_group, offset, num_bytes); goto loop; } + btrfs_inc_block_group_reservations(block_group); /* we are all good, lets return */ ins->objectid = search_start; @@ -7615,8 +7667,10 @@ again: WARN_ON(num_bytes < root->sectorsize); ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins, flags, delalloc); - - if (ret == -ENOSPC) { + if (!ret && !is_data) { + btrfs_dec_block_group_reservations(root->fs_info, + ins->objectid); + } else if (ret == -ENOSPC) { if (!final_tried && ins->offset) { num_bytes = min(num_bytes >> 1, ins->offset); num_bytes = round_down(num_bytes, root->sectorsize); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2aaba58b4856..3991d8a74f61 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -824,6 +824,7 @@ retry: async_extent->ram_size - 1, 0); goto out_free_reserve; } + btrfs_dec_block_group_reservations(root->fs_info, ins.objectid); /* * clear dirty, set writeback and unlock the pages. @@ -861,6 +862,7 @@ retry: } return; out_free_reserve: + btrfs_dec_block_group_reservations(root->fs_info, ins.objectid); btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); out_free: extent_clear_unlock_delalloc(inode, async_extent->start, @@ -1038,6 +1040,8 @@ static noinline int cow_file_range(struct inode *inode, goto out_drop_extent_cache; } + btrfs_dec_block_group_reservations(root->fs_info, ins.objectid); + if (disk_num_bytes < cur_alloc_size) break; @@ -1066,6 +1070,7 @@ out: out_drop_extent_cache: btrfs_drop_extent_cache(inode, start, start + ram_size - 1, 0); out_reserve: + btrfs_dec_block_group_reservations(root->fs_info, ins.objectid); btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); out_unlock: extent_clear_unlock_delalloc(inode, start, end, locked_page, @@ -7162,6 +7167,8 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode, return ERR_PTR(ret); } + btrfs_dec_block_group_reservations(root->fs_info, ins.objectid); + em = create_pinned_em(inode, start, ins.offset, start, ins.objectid, ins.offset, ins.offset, ins.offset, 0); if (IS_ERR(em)) { @@ -9942,6 +9949,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, btrfs_end_transaction(trans, root); break; } + btrfs_dec_block_group_reservations(root->fs_info, ins.objectid); last_alloc = ins.offset; ret = insert_reserved_file_extent(trans, inode, diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 30f77ed60133..e78f8e44bd9a 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -4254,11 +4254,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) btrfs_info(extent_root->fs_info, "relocating block group %llu flags %llu", rc->block_group->key.objectid, rc->block_group->flags); - ret = btrfs_start_delalloc_roots(fs_info, 0, -1); - if (ret < 0) { - err = ret; - goto out; - } + btrfs_wait_block_group_reservations(rc->block_group); btrfs_wait_ordered_roots(fs_info, -1, rc->block_group->key.objectid, rc->block_group->key.offset);