diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index cde698a07d21..a2ae42720a6a 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1802,6 +1802,8 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev) set_nlink(inode, btrfs_stack_inode_nlink(inode_item)); inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item)); BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item); + BTRFS_I(inode)->last_trans = btrfs_stack_inode_transid(inode_item); + inode->i_version = btrfs_stack_inode_sequence(inode_item); inode->i_rdev = 0; *rdev = btrfs_stack_inode_rdev(inode_item); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1eef4ee01d1a..0ec8e228b89f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3178,8 +3178,8 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, bi = btrfs_item_ptr_offset(leaf, path->slots[0]); write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); btrfs_mark_buffer_dirty(leaf); - btrfs_release_path(path); fail: + btrfs_release_path(path); if (ret) btrfs_abort_transaction(trans, root, ret); return ret; @@ -3305,8 +3305,7 @@ again: spin_lock(&block_group->lock); if (block_group->cached != BTRFS_CACHE_FINISHED || - !btrfs_test_opt(root, SPACE_CACHE) || - block_group->delalloc_bytes) { + !btrfs_test_opt(root, SPACE_CACHE)) { /* * don't bother trying to write stuff out _if_ * a) we're not cached, @@ -3408,17 +3407,14 @@ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans, int loops = 0; spin_lock(&cur_trans->dirty_bgs_lock); - if (!list_empty(&cur_trans->dirty_bgs)) { - list_splice_init(&cur_trans->dirty_bgs, &dirty); + if (list_empty(&cur_trans->dirty_bgs)) { + spin_unlock(&cur_trans->dirty_bgs_lock); + return 0; } + list_splice_init(&cur_trans->dirty_bgs, &dirty); spin_unlock(&cur_trans->dirty_bgs_lock); again: - if (list_empty(&dirty)) { - btrfs_free_path(path); - return 0; - } - /* * make sure all the block groups on our dirty list actually * exist @@ -3431,18 +3427,16 @@ again: return -ENOMEM; } + /* + * cache_write_mutex is here only to save us from balance or automatic + * removal of empty block groups deleting this block group while we are + * writing out the cache + */ + mutex_lock(&trans->transaction->cache_write_mutex); while (!list_empty(&dirty)) { cache = list_first_entry(&dirty, struct btrfs_block_group_cache, dirty_list); - - /* - * cache_write_mutex is here only to save us from balance - * deleting this block group while we are writing out the - * cache - */ - mutex_lock(&trans->transaction->cache_write_mutex); - /* * this can happen if something re-dirties a block * group that is already under IO. Just wait for it to @@ -3495,7 +3489,6 @@ again: } if (!ret) ret = write_one_cache_group(trans, root, path, cache); - mutex_unlock(&trans->transaction->cache_write_mutex); /* if its not on the io list, we need to put the block group */ if (should_put) @@ -3503,7 +3496,16 @@ again: if (ret) break; + + /* + * Avoid blocking other tasks for too long. It might even save + * us from writing caches for block groups that are going to be + * removed. + */ + mutex_unlock(&trans->transaction->cache_write_mutex); + mutex_lock(&trans->transaction->cache_write_mutex); } + mutex_unlock(&trans->transaction->cache_write_mutex); /* * go through delayed refs for all the stuff we've just kicked off @@ -3514,8 +3516,15 @@ again: loops++; spin_lock(&cur_trans->dirty_bgs_lock); list_splice_init(&cur_trans->dirty_bgs, &dirty); + /* + * dirty_bgs_lock protects us from concurrent block group + * deletes too (not just cache_write_mutex). + */ + if (!list_empty(&dirty)) { + spin_unlock(&cur_trans->dirty_bgs_lock); + goto again; + } spin_unlock(&cur_trans->dirty_bgs_lock); - goto again; } btrfs_free_path(path); @@ -7537,7 +7546,7 @@ static void unuse_block_rsv(struct btrfs_fs_info *fs_info, * returns the key for the extent through ins, and a tree buffer for * the first block of the extent through buf. * - * returns the tree buffer or NULL. + * returns the tree buffer or an ERR_PTR on error. */ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -7548,6 +7557,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, struct btrfs_key ins; struct btrfs_block_rsv *block_rsv; struct extent_buffer *buf; + struct btrfs_delayed_extent_op *extent_op; u64 flags = 0; int ret; u32 blocksize = root->nodesize; @@ -7568,13 +7578,14 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, ret = btrfs_reserve_extent(root, blocksize, blocksize, empty_size, hint, &ins, 0, 0); - if (ret) { - unuse_block_rsv(root->fs_info, block_rsv, blocksize); - return ERR_PTR(ret); - } + if (ret) + goto out_unuse; buf = btrfs_init_new_buffer(trans, root, ins.objectid, level); - BUG_ON(IS_ERR(buf)); /* -ENOMEM */ + if (IS_ERR(buf)) { + ret = PTR_ERR(buf); + goto out_free_reserved; + } if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { if (parent == 0) @@ -7584,9 +7595,11 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, BUG_ON(parent > 0); if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { - struct btrfs_delayed_extent_op *extent_op; extent_op = btrfs_alloc_delayed_extent_op(); - BUG_ON(!extent_op); /* -ENOMEM */ + if (!extent_op) { + ret = -ENOMEM; + goto out_free_buf; + } if (key) memcpy(&extent_op->key, key, sizeof(extent_op->key)); else @@ -7601,13 +7614,24 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, extent_op->level = level; ret = btrfs_add_delayed_tree_ref(root->fs_info, trans, - ins.objectid, - ins.offset, parent, root_objectid, - level, BTRFS_ADD_DELAYED_EXTENT, - extent_op, 0); - BUG_ON(ret); /* -ENOMEM */ + ins.objectid, ins.offset, + parent, root_objectid, level, + BTRFS_ADD_DELAYED_EXTENT, + extent_op, 0); + if (ret) + goto out_free_delayed; } return buf; + +out_free_delayed: + btrfs_free_delayed_extent_op(extent_op); +out_free_buf: + free_extent_buffer(buf); +out_free_reserved: + btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 0); +out_unuse: + unuse_block_rsv(root->fs_info, block_rsv, blocksize); + return ERR_PTR(ret); } struct walk_control { diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 782f3bc4651d..43af5a61ad25 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4560,36 +4560,37 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb) do { index--; page = eb->pages[index]; - if (page && mapped) { + if (!page) + continue; + if (mapped) spin_lock(&page->mapping->private_lock); + /* + * We do this since we'll remove the pages after we've + * removed the eb from the radix tree, so we could race + * and have this page now attached to the new eb. So + * only clear page_private if it's still connected to + * this eb. + */ + if (PagePrivate(page) && + page->private == (unsigned long)eb) { + BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); + BUG_ON(PageDirty(page)); + BUG_ON(PageWriteback(page)); /* - * We do this since we'll remove the pages after we've - * removed the eb from the radix tree, so we could race - * and have this page now attached to the new eb. So - * only clear page_private if it's still connected to - * this eb. + * We need to make sure we haven't be attached + * to a new eb. */ - if (PagePrivate(page) && - page->private == (unsigned long)eb) { - BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); - BUG_ON(PageDirty(page)); - BUG_ON(PageWriteback(page)); - /* - * We need to make sure we haven't be attached - * to a new eb. - */ - ClearPagePrivate(page); - set_page_private(page, 0); - /* One for the page private */ - page_cache_release(page); - } - spin_unlock(&page->mapping->private_lock); - - } - if (page) { - /* One for when we alloced the page */ + ClearPagePrivate(page); + set_page_private(page, 0); + /* One for the page private */ page_cache_release(page); } + + if (mapped) + spin_unlock(&page->mapping->private_lock); + + /* One for when we alloced the page */ + page_cache_release(page); } while (index != 0); } @@ -4870,6 +4871,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, mark_extent_buffer_accessed(exists, p); goto free_eb; } + exists = NULL; /* * Do this so attach doesn't complain and we need to @@ -4933,12 +4935,12 @@ again: return eb; free_eb: + WARN_ON(!atomic_dec_and_test(&eb->refs)); for (i = 0; i < num_pages; i++) { if (eb->pages[i]) unlock_page(eb->pages[i]); } - WARN_ON(!atomic_dec_and_test(&eb->refs)); btrfs_release_extent_buffer(eb); return exists; } diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 81fa75a8e1f3..41c510b7cc11 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1218,7 +1218,7 @@ out: * * This function writes out a free space cache struct to disk for quick recovery * on mount. This will return 0 if it was successfull in writing the cache out, - * and -1 if it was not. + * or an errno if it was not. */ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, struct btrfs_free_space_ctl *ctl, @@ -1235,12 +1235,12 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, int must_iput = 0; if (!i_size_read(inode)) - return -1; + return -EIO; WARN_ON(io_ctl->pages); ret = io_ctl_init(io_ctl, inode, root, 1); if (ret) - return -1; + return ret; if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) { down_write(&block_group->data_rwsem); @@ -1258,7 +1258,9 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, } /* Lock all pages first so we can lock the extent safely. */ - io_ctl_prepare_pages(io_ctl, inode, 0); + ret = io_ctl_prepare_pages(io_ctl, inode, 0); + if (ret) + goto out; lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 0, &cached_state); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ada4d24ed11b..8bb013672aee 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3632,16 +3632,6 @@ static void btrfs_read_locked_inode(struct inode *inode) BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item); BTRFS_I(inode)->last_trans = btrfs_inode_transid(leaf, inode_item); - /* - * If we were modified in the current generation and evicted from memory - * and then re-read we need to do a full sync since we don't have any - * idea about which extents were modified before we were evicted from - * cache. - */ - if (BTRFS_I(inode)->last_trans == root->fs_info->generation) - set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, - &BTRFS_I(inode)->runtime_flags); - inode->i_version = btrfs_inode_sequence(leaf, inode_item); inode->i_generation = BTRFS_I(inode)->generation; inode->i_rdev = 0; @@ -3651,6 +3641,19 @@ static void btrfs_read_locked_inode(struct inode *inode) BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); cache_index: + /* + * If we were modified in the current generation and evicted from memory + * and then re-read we need to do a full sync since we don't have any + * idea about which extents were modified before we were evicted from + * cache. + * + * This is required for both inode re-read from disk and delayed inode + * in delayed_nodes_tree. + */ + if (BTRFS_I(inode)->last_trans == root->fs_info->generation) + set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, + &BTRFS_I(inode)->runtime_flags); + path->slots[0]++; if (inode->i_nlink != 1 || path->slots[0] >= btrfs_header_nritems(leaf)) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index b05653f182c2..1c22c6518504 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2410,7 +2410,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, "Attempt to delete subvolume %llu during send", dest->root_key.objectid); err = -EPERM; - goto out_dput; + goto out_unlock_inode; } d_invalidate(dentry); @@ -2505,6 +2505,7 @@ out_up_write: root_flags & ~BTRFS_ROOT_SUBVOL_DEAD); spin_unlock(&dest->root_item_lock); } +out_unlock_inode: mutex_unlock(&inode->i_mutex); if (!err) { shrink_dcache_sb(root->fs_info->sb); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 8bcd2a007517..96aebf3bcd5b 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1058,6 +1058,7 @@ static int contains_pending_extent(struct btrfs_trans_handle *trans, struct extent_map *em; struct list_head *search_list = &trans->transaction->pending_chunks; int ret = 0; + u64 physical_start = *start; again: list_for_each_entry(em, search_list, list) { @@ -1068,9 +1069,9 @@ again: for (i = 0; i < map->num_stripes; i++) { if (map->stripes[i].dev != device) continue; - if (map->stripes[i].physical >= *start + len || + if (map->stripes[i].physical >= physical_start + len || map->stripes[i].physical + em->orig_block_len <= - *start) + physical_start) continue; *start = map->stripes[i].physical + em->orig_block_len; @@ -1193,8 +1194,14 @@ again: */ if (contains_pending_extent(trans, device, &search_start, - hole_size)) - hole_size = 0; + hole_size)) { + if (key.offset >= search_start) { + hole_size = key.offset - search_start; + } else { + WARN_ON_ONCE(1); + hole_size = 0; + } + } if (hole_size > max_hole_size) { max_hole_start = search_start;