From ccd467d60e81b48cdbecae93532b66bcdedca91d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 28 Jun 2007 15:57:36 -0400 Subject: [PATCH] Btrfs: crash recovery fixes Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 13 +++++++++- fs/btrfs/ctree.h | 12 ++++------ fs/btrfs/disk-io.c | 22 ++++++++++------- fs/btrfs/disk-io.h | 1 + fs/btrfs/extent-tree.c | 37 +++++++++++++++++++++++++---- fs/btrfs/file.c | 15 ++++++++---- fs/btrfs/inode.c | 6 ++--- fs/btrfs/transaction.c | 54 ++++++++++++++++++++++++++---------------- 8 files changed, 112 insertions(+), 48 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 606a19b5916d..9eb646529edc 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -75,6 +75,17 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_node *cow_node; int ret; + WARN_ON(!buffer_uptodate(buf)); + if (trans->transaction != root->fs_info->running_transaction) { + printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, + root->fs_info->running_transaction->transid); + WARN_ON(1); + } + if (trans->transid != root->fs_info->generation) { + printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, + root->fs_info->generation); + WARN_ON(1); + } if (btrfs_header_generation(btrfs_buffer_header(buf)) == trans->transid) { *cow_ret = buf; @@ -107,7 +118,7 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1); } btrfs_block_release(root, buf); - mark_buffer_dirty(cow); + btrfs_mark_buffer_dirty(cow); *cow_ret = cow; return 0; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1998f86df08a..0287bd51d87c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1013,18 +1013,13 @@ static inline void btrfs_memmove(struct btrfs_root *root, memmove(dst, src, nr); } -static inline void btrfs_mark_buffer_dirty(struct buffer_head *bh) -{ - WARN_ON(!atomic_read(&bh->b_count)); - mark_buffer_dirty(bh); -} - /* helper function to cast into the data area of the leaf. */ #define btrfs_item_ptr(leaf, slot, type) \ ((type *)(btrfs_leaf_data(leaf) + \ btrfs_item_offset((leaf)->items + (slot)))) /* extent-tree.c */ +int btrfs_copy_pinned(struct btrfs_root *root, struct radix_tree_root *copy); struct btrfs_block_group_cache *btrfs_lookup_block_group(struct btrfs_fs_info *info, u64 blocknr); @@ -1044,8 +1039,9 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf); int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks, int pin); -int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct - btrfs_root *root); +int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct radix_tree_root *unpin_radix); int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7081729d5b16..d1bf5bc1bc14 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -270,14 +270,6 @@ fail: return NULL; } -int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct buffer_head *buf) -{ - WARN_ON(atomic_read(&buf->b_count) == 0); - mark_buffer_dirty(buf); - return 0; -} - int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf) { @@ -621,6 +613,20 @@ int close_ctree(struct btrfs_root *root) return 0; } +void btrfs_mark_buffer_dirty(struct buffer_head *bh) +{ + struct btrfs_root *root = BTRFS_I(bh->b_page->mapping->host)->root; + u64 transid = btrfs_header_generation(btrfs_buffer_header(bh)); + WARN_ON(!atomic_read(&bh->b_count)); + if (transid != root->fs_info->generation) { + printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n", + (unsigned long long)bh->b_blocknr, + transid, root->fs_info->generation); + WARN_ON(1); + } + mark_buffer_dirty(bh); +} + void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf) { brelse(buf); diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index c4a695ac44f6..9e2c261b41ae 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -78,4 +78,5 @@ int btrfs_map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, int btrfs_releasepage(struct page *page, gfp_t flags); void btrfs_btree_balance_dirty(struct btrfs_root *root); int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); +void btrfs_mark_buffer_dirty(struct buffer_head *bh); #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 01dc30579287..14b93268920e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -523,6 +523,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, } return 0; fail: + WARN_ON(1); for (i =0; i < faili; i++) { if (leaf) { u64 disk_blocknr; @@ -572,7 +573,7 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, bi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_block_group_item); memcpy(bi, &cache->item, sizeof(*bi)); - mark_buffer_dirty(path->nodes[0]); + btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_release_path(extent_root, path); fail: finish_current_insert(trans, extent_root); @@ -739,8 +740,30 @@ static int try_remove_page(struct address_space *mapping, unsigned long index) return ret; } -int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct - btrfs_root *root) +int btrfs_copy_pinned(struct btrfs_root *root, struct radix_tree_root *copy) +{ + unsigned long gang[8]; + u64 last = 0; + struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix; + int ret; + int i; + + while(1) { + ret = find_first_radix_bit(pinned_radix, gang, last, + ARRAY_SIZE(gang)); + if (!ret) + break; + for (i = 0 ; i < ret; i++) { + set_radix_bit(copy, gang[i]); + last = gang[i] + 1; + } + } + return 0; +} + +int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct radix_tree_root *unpin_radix) { unsigned long gang[8]; struct inode *btree_inode = root->fs_info->btree_inode; @@ -752,7 +775,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct struct radix_tree_root *extent_radix = &root->fs_info->extent_map_radix; while(1) { - ret = find_first_radix_bit(pinned_radix, gang, 0, + ret = find_first_radix_bit(unpin_radix, gang, 0, ARRAY_SIZE(gang)); if (!ret) break; @@ -760,6 +783,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct first = gang[0]; for (i = 0; i < ret; i++) { clear_radix_bit(pinned_radix, gang[i]); + clear_radix_bit(unpin_radix, gang[i]); block_group = btrfs_lookup_block_group(root->fs_info, gang[i]); if (block_group) { @@ -1309,6 +1333,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, if (data) { ret = find_free_extent(trans, root, 0, 0, search_end, 0, &prealloc_key, 0, 0, 0); + BUG_ON(ret); if (ret) return ret; exclude_nr = info->extent_tree_prealloc_nr; @@ -1319,6 +1344,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, ret = find_free_extent(trans, root, num_blocks, search_start, search_end, hint_block, ins, exclude_start, exclude_nr, data); + BUG_ON(ret); if (ret) return ret; @@ -1334,10 +1360,12 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, if (!data) { exclude_start = ins->objectid; exclude_nr = ins->offset; + hint_block = exclude_start + exclude_nr; ret = find_free_extent(trans, root, 0, search_start, search_end, hint_block, &prealloc_key, exclude_start, exclude_nr, 0); + BUG_ON(ret); if (ret) return ret; } @@ -1348,6 +1376,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, ret = btrfs_insert_item(trans, extent_root, ins, &extent_item, sizeof(extent_item)); + BUG_ON(ret); finish_current_insert(trans, extent_root); pending_ret = del_pending_extents(trans, extent_root); if (ret) { diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index fef7ba1e707f..2456cc3e1cfd 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -127,7 +127,7 @@ static int insert_inline_extent(struct btrfs_root *root, struct inode *inode, ptr, kaddr + bh_offset(bh), size); kunmap_atomic(kaddr, KM_USER0); - mark_buffer_dirty(path->nodes[0]); + btrfs_mark_buffer_dirty(path->nodes[0]); fail: btrfs_free_path(path); ret = btrfs_end_transaction(trans, root); @@ -211,11 +211,13 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, int found_type; int found_extent; int found_inline; + int recow; path = btrfs_alloc_path(); if (!path) return -ENOMEM; while(1) { + recow = 0; btrfs_release_path(root, path); ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, search_start, -1); @@ -244,6 +246,10 @@ next_slot: if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY) { goto out; } + if (recow) { + search_start = key.offset; + continue; + } if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) { extent = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); @@ -274,6 +280,7 @@ next_slot: nextret = btrfs_next_leaf(root, path); if (nextret) goto out; + recow = 1; } else { path->slots[0]++; } @@ -321,7 +328,7 @@ next_slot: } btrfs_set_file_extent_num_blocks(extent, new_num); - mark_buffer_dirty(path->nodes[0]); + btrfs_mark_buffer_dirty(path->nodes[0]); } else { WARN_ON(1); } @@ -452,6 +459,8 @@ static int prepare_pages(struct btrfs_root *root, err = -ENOMEM; goto failed_release; } + cancel_dirty_page(pages[i], PAGE_CACHE_SIZE); + wait_on_page_writeback(pages[i]); } mutex_lock(&root->fs_info->fs_mutex); @@ -522,8 +531,6 @@ static int prepare_pages(struct btrfs_root *root, mutex_unlock(&root->fs_info->fs_mutex); for (i = 0; i < num_pages; i++) { - cancel_dirty_page(pages[i], PAGE_CACHE_SIZE); - wait_on_page_writeback(pages[i]); offset = pos & (PAGE_CACHE_SIZE -1); this_write = min((size_t)PAGE_CACHE_SIZE - offset, write_bytes); if (!page_has_buffers(pages[i])) { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index eba06e7cf414..4fc0367d54f2 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -506,7 +506,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, extent_num_blocks); inode->i_blocks -= (orig_num_blocks - extent_num_blocks) << 3; - mark_buffer_dirty(path->nodes[0]); + btrfs_mark_buffer_dirty(path->nodes[0]); } else { extent_start = btrfs_file_extent_disk_blocknr(fi); @@ -2020,7 +2020,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) btrfs_set_header_owner(&leaf->header, root->root_key.objectid); memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid, sizeof(leaf->header.fsid)); - mark_buffer_dirty(subvol); + btrfs_mark_buffer_dirty(subvol); inode_item = &root_item.inode; memset(inode_item, 0, sizeof(*inode_item)); @@ -2497,7 +2497,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, ptr = btrfs_file_extent_inline_start(ei); btrfs_memcpy(root, path->nodes[0]->b_data, ptr, symname, name_len); - mark_buffer_dirty(path->nodes[0]); + btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_free_path(path); inode->i_op = &btrfs_symlink_inode_operations; inode->i_mapping->a_ops = &btrfs_symlink_aops; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 3b2face593e9..bec38ae8aa11 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -85,6 +85,8 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, if (root != root->fs_info->tree_root && root->last_trans < running_trans_id) { + WARN_ON(root == root->fs_info->extent_root); + WARN_ON(root->ref_cows != 1); if (root->root_item.refs != 0) { radix_tree_tag_set(&root->fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid, @@ -113,10 +115,11 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, mutex_lock(&root->fs_info->trans_mutex); cur_trans = root->fs_info->running_transaction; + WARN_ON(cur_trans != trans->transaction); WARN_ON(cur_trans->num_writers < 1); + cur_trans->num_writers--; if (waitqueue_active(&cur_trans->writer_wait)) wake_up(&cur_trans->writer_wait); - cur_trans->num_writers--; put_transaction(cur_trans); mutex_unlock(&root->fs_info->trans_mutex); memset(trans, 0, sizeof(*trans)); @@ -194,6 +197,7 @@ static int wait_for_commit(struct btrfs_root *root, struct btrfs_transaction *commit) { DEFINE_WAIT(wait); + mutex_lock(&root->fs_info->trans_mutex); while(!commit->commit_done) { prepare_to_wait(&commit->commit_wait, &wait, TASK_UNINTERRUPTIBLE); @@ -203,6 +207,7 @@ static int wait_for_commit(struct btrfs_root *root, schedule(); mutex_lock(&root->fs_info->trans_mutex); } + mutex_unlock(&root->fs_info->trans_mutex); finish_wait(&commit->commit_wait, &wait); return 0; } @@ -279,7 +284,6 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, &root->root_item); if (err) break; - refs = btrfs_root_refs(&tmp_item); btrfs_set_root_refs(&tmp_item, refs - 1); err = btrfs_update_root(trans, root->fs_info->tree_root, @@ -333,31 +337,53 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_transaction *cur_trans; struct btrfs_transaction *prev_trans = NULL; struct list_head dirty_fs_roots; + struct radix_tree_root pinned_copy; DEFINE_WAIT(wait); + init_bit_radix(&pinned_copy); INIT_LIST_HEAD(&dirty_fs_roots); mutex_lock(&root->fs_info->trans_mutex); if (trans->transaction->in_commit) { cur_trans = trans->transaction; trans->transaction->use_count++; + mutex_unlock(&root->fs_info->trans_mutex); btrfs_end_transaction(trans, root); + + mutex_unlock(&root->fs_info->fs_mutex); ret = wait_for_commit(root, cur_trans); BUG_ON(ret); put_transaction(cur_trans); - mutex_unlock(&root->fs_info->trans_mutex); + mutex_lock(&root->fs_info->fs_mutex); return 0; } - cur_trans = trans->transaction; trans->transaction->in_commit = 1; + cur_trans = trans->transaction; + if (cur_trans->list.prev != &root->fs_info->trans_list) { + prev_trans = list_entry(cur_trans->list.prev, + struct btrfs_transaction, list); + if (!prev_trans->commit_done) { + prev_trans->use_count++; + mutex_unlock(&root->fs_info->fs_mutex); + mutex_unlock(&root->fs_info->trans_mutex); + + wait_for_commit(root, prev_trans); + put_transaction(prev_trans); + + mutex_lock(&root->fs_info->fs_mutex); + mutex_lock(&root->fs_info->trans_mutex); + } + } while (trans->transaction->num_writers > 1) { WARN_ON(cur_trans != trans->transaction); prepare_to_wait(&trans->transaction->writer_wait, &wait, TASK_UNINTERRUPTIBLE); if (trans->transaction->num_writers <= 1) break; + mutex_unlock(&root->fs_info->fs_mutex); mutex_unlock(&root->fs_info->trans_mutex); schedule(); + mutex_lock(&root->fs_info->fs_mutex); mutex_lock(&root->fs_info->trans_mutex); finish_wait(&trans->transaction->writer_wait, &wait); } @@ -372,34 +398,22 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, cur_trans = root->fs_info->running_transaction; root->fs_info->running_transaction = NULL; - if (cur_trans->list.prev != &root->fs_info->trans_list) { - prev_trans = list_entry(cur_trans->list.prev, - struct btrfs_transaction, list); - if (prev_trans->commit_done) - prev_trans = NULL; - else - prev_trans->use_count++; - } btrfs_set_super_generation(&root->fs_info->super_copy, cur_trans->transid); btrfs_set_super_root(&root->fs_info->super_copy, bh_blocknr(root->fs_info->tree_root->node)); memcpy(root->fs_info->disk_super, &root->fs_info->super_copy, sizeof(root->fs_info->super_copy)); + + btrfs_copy_pinned(root, &pinned_copy); + mutex_unlock(&root->fs_info->trans_mutex); mutex_unlock(&root->fs_info->fs_mutex); ret = btrfs_write_and_wait_transaction(trans, root); - if (prev_trans) { - mutex_lock(&root->fs_info->trans_mutex); - wait_for_commit(root, prev_trans); - put_transaction(prev_trans); - mutex_unlock(&root->fs_info->trans_mutex); - } BUG_ON(ret); write_ctree_super(trans, root); - mutex_lock(&root->fs_info->fs_mutex); - btrfs_finish_extent_commit(trans, root); + btrfs_finish_extent_commit(trans, root, &pinned_copy); mutex_lock(&root->fs_info->trans_mutex); cur_trans->commit_done = 1; wake_up(&cur_trans->commit_wait);