diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 289d71d8653a..02e571e6ee62 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -379,7 +379,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, cur = btrfs_find_tree_block(root, blocknr, blocksize); if (cur) - uptodate = btrfs_buffer_uptodate(cur); + uptodate = btrfs_buffer_uptodate(cur, gen); else uptodate = 0; if (!cur || !uptodate) { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index edee7a44f861..574b1245964e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -205,6 +205,33 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, return 0; } +static int verify_parent_transid(struct extent_io_tree *io_tree, + struct extent_buffer *eb, u64 parent_transid) +{ + int ret; + + if (!parent_transid || btrfs_header_generation(eb) == parent_transid) + return 0; + + lock_extent(io_tree, eb->start, eb->start + eb->len - 1, GFP_NOFS); + if (extent_buffer_uptodate(io_tree, eb) && + btrfs_header_generation(eb) == parent_transid) { + ret = 0; + goto out; + } + printk("parent transid verify failed on %llu wanted %llu found %llu\n", + (unsigned long long)eb->start, + (unsigned long long)parent_transid, + (unsigned long long)btrfs_header_generation(eb)); + ret = 1; +out: + clear_extent_buffer_uptodate(io_tree, eb); + unlock_extent(io_tree, eb->start, eb->start + eb->len - 1, + GFP_NOFS); + return ret; + +} + static int btree_read_extent_buffer_pages(struct btrfs_root *root, struct extent_buffer *eb, u64 start, u64 parent_transid) @@ -218,7 +245,8 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, while (1) { ret = read_extent_buffer_pages(io_tree, eb, start, 1, btree_get_extent, mirror_num); - if (!ret) + if (!ret && + !verify_parent_transid(io_tree, eb, parent_transid)) return ret; num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, @@ -330,6 +358,13 @@ int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, ret = -EIO; goto err; } + if (memcmp_extent_buffer(eb, root->fs_info->fsid, + (unsigned long)btrfs_header_fsid(eb), + BTRFS_FSID_SIZE)) { + printk("bad fsid on block %Lu\n", eb->start); + ret = -EIO; + goto err; + } found_level = btrfs_header_level(eb); ret = csum_tree_block(root, eb, 1); @@ -1363,7 +1398,9 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) "I/O error on %s\n", bdevname(bh->b_bdev, b)); } - set_buffer_write_io_error(bh); + /* note, we dont' set_buffer_write_io_error because we have + * our own ways of dealing with the IO errors + */ clear_buffer_uptodate(bh); } unlock_buffer(bh); @@ -1459,7 +1496,8 @@ int write_all_supers(struct btrfs_root *root) ret = submit_bh(WRITE, bh); BUG_ON(ret); wait_on_buffer(bh); - BUG_ON(!buffer_uptodate(bh)); + if (!buffer_uptodate(bh)) + total_errors++; } else { total_errors++; } @@ -1607,10 +1645,18 @@ int close_ctree(struct btrfs_root *root) return 0; } -int btrfs_buffer_uptodate(struct extent_buffer *buf) +int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid) { + int ret; struct inode *btree_inode = buf->first_page->mapping->host; - return extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf); + + ret = extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf); + if (!ret) + return ret; + + ret = verify_parent_transid(&BTRFS_I(btree_inode)->io_tree, buf, + parent_transid); + return !ret; } int btrfs_set_buffer_uptodate(struct extent_buffer *buf) diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index e29c895d5230..30d1ed293c25 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -56,7 +56,7 @@ int btrfs_insert_dev_radix(struct btrfs_root *root, void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr); int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); void btrfs_mark_buffer_dirty(struct extent_buffer *buf); -int btrfs_buffer_uptodate(struct extent_buffer *buf); +int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid); int btrfs_set_buffer_uptodate(struct extent_buffer *buf); int wait_on_tree_block_writeback(struct btrfs_root *root, struct extent_buffer *buf); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index db07dde4a870..605018c6045c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1366,7 +1366,7 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, if (!pending) { buf = btrfs_find_tree_block(root, bytenr, num_bytes); if (buf) { - if (btrfs_buffer_uptodate(buf)) { + if (btrfs_buffer_uptodate(buf, 0)) { u64 transid = root->fs_info->running_transaction->transid; u64 header_transid = @@ -2151,7 +2151,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, continue; } next = btrfs_find_tree_block(root, bytenr, blocksize); - if (!next || !btrfs_buffer_uptodate(next)) { + if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) { free_extent_buffer(next); reada_walk_down(root, cur, path->slots[*level]); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index dd403b426ff5..2a3624adc0cf 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1366,7 +1366,7 @@ static int end_bio_extent_writepage(struct bio *bio, unsigned int bytes_done, int err) #endif { - const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + int uptodate = err == 0; struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; struct extent_state *state = bio->bi_private; struct extent_io_tree *tree = state->tree; @@ -1375,6 +1375,7 @@ static int end_bio_extent_writepage(struct bio *bio, u64 end; u64 cur; int whole_page; + int ret; unsigned long flags; #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) @@ -1395,17 +1396,30 @@ static int end_bio_extent_writepage(struct bio *bio, if (--bvec >= bio->bi_io_vec) prefetchw(&bvec->bv_page->flags); + if (tree->ops && tree->ops->writepage_end_io_hook) { + ret = tree->ops->writepage_end_io_hook(page, start, + end, state); + if (ret) + uptodate = 0; + } + + if (!uptodate && tree->ops && + tree->ops->writepage_io_failed_hook) { + ret = tree->ops->writepage_io_failed_hook(bio, page, + start, end, state); + if (ret == 0) { + state = NULL; + uptodate = (err == 0); + continue; + } + } + if (!uptodate) { clear_extent_uptodate(tree, start, end, GFP_ATOMIC); ClearPageUptodate(page); SetPageError(page); } - if (tree->ops && tree->ops->writepage_end_io_hook) { - tree->ops->writepage_end_io_hook(page, start, end, - state); - } - /* * bios can get merged in funny ways, and so we need to * be careful with the state variable. We know the @@ -2073,9 +2087,9 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, } else { ret = 0; } - if (ret) + if (ret) { SetPageError(page); - else { + } else { unsigned long max_nr = end_index + 1; set_range_writeback(tree, cur, cur + iosize - 1); if (!PageWriteback(page)) { @@ -2948,6 +2962,25 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree, } EXPORT_SYMBOL(set_extent_buffer_dirty); +int clear_extent_buffer_uptodate(struct extent_io_tree *tree, + struct extent_buffer *eb) +{ + unsigned long i; + struct page *page; + unsigned long num_pages; + + num_pages = num_extent_pages(eb->start, eb->len); + eb->flags &= ~EXTENT_UPTODATE; + + clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, + GFP_NOFS); + for (i = 0; i < num_pages; i++) { + page = extent_buffer_page(eb, i); + ClearPageUptodate(page); + } + return 0; +} + int set_extent_buffer_uptodate(struct extent_io_tree *tree, struct extent_buffer *eb) { diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index e48346147514..f1960dafaa19 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -36,9 +36,12 @@ struct extent_io_ops { int (*readpage_io_failed_hook)(struct bio *bio, struct page *page, u64 start, u64 end, struct extent_state *state); + int (*writepage_io_failed_hook)(struct bio *bio, struct page *page, + u64 start, u64 end, + struct extent_state *state); int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end, struct extent_state *state); - void (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, + int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, struct extent_state *state); int (*set_bit_hook)(struct inode *inode, u64 start, u64 end, unsigned long old, unsigned long bits); @@ -212,6 +215,8 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree, struct extent_buffer *eb); int set_extent_buffer_uptodate(struct extent_io_tree *tree, struct extent_buffer *eb); +int clear_extent_buffer_uptodate(struct extent_io_tree *tree, + struct extent_buffer *eb); int extent_buffer_uptodate(struct extent_io_tree *tree, struct extent_buffer *eb); int map_extent_buffer(struct extent_buffer *eb, unsigned long offset, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a492fd238c88..08760ff9bab7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -430,9 +430,9 @@ struct io_failure_record { int last_mirror; }; -int btrfs_readpage_io_failed_hook(struct bio *failed_bio, - struct page *page, u64 start, u64 end, - struct extent_state *state) +int btrfs_io_failed_hook(struct bio *failed_bio, + struct page *page, u64 start, u64 end, + struct extent_state *state) { struct io_failure_record *failrec = NULL; u64 private; @@ -443,6 +443,7 @@ int btrfs_readpage_io_failed_hook(struct bio *failed_bio, struct bio *bio; int num_copies; int ret; + int rw; u64 logical; ret = get_state_private(failure_tree, start, &private); @@ -505,7 +506,41 @@ int btrfs_readpage_io_failed_hook(struct bio *failed_bio, bio->bi_bdev = failed_bio->bi_bdev; bio->bi_size = 0; bio_add_page(bio, page, failrec->len, start - page_offset(page)); - btrfs_submit_bio_hook(inode, READ, bio, failrec->last_mirror); + if (failed_bio->bi_rw & (1 << BIO_RW)) + rw = WRITE; + else + rw = READ; + + BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio, + failrec->last_mirror); + return 0; +} + +int btrfs_clean_io_failures(struct inode *inode, u64 start) +{ + u64 private; + u64 private_failure; + struct io_failure_record *failure; + int ret; + + private = 0; + if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private, + (u64)-1, 1, EXTENT_DIRTY)) { + ret = get_state_private(&BTRFS_I(inode)->io_failure_tree, + start, &private_failure); + if (ret == 0) { + failure = (struct io_failure_record *)(unsigned long) + private_failure; + set_state_private(&BTRFS_I(inode)->io_failure_tree, + failure->start, 0); + clear_extent_bits(&BTRFS_I(inode)->io_failure_tree, + failure->start, + failure->start + failure->len - 1, + EXTENT_DIRTY | EXTENT_LOCKED, + GFP_NOFS); + kfree(failure); + } + } return 0; } @@ -547,26 +582,7 @@ int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, /* if the io failure tree for this inode is non-empty, * check to see if we've recovered from a failed IO */ - private = 0; - if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private, - (u64)-1, 1, EXTENT_DIRTY)) { - u64 private_failure; - struct io_failure_record *failure; - ret = get_state_private(&BTRFS_I(inode)->io_failure_tree, - start, &private_failure); - if (ret == 0) { - failure = (struct io_failure_record *)(unsigned long) - private_failure; - set_state_private(&BTRFS_I(inode)->io_failure_tree, - failure->start, 0); - clear_extent_bits(&BTRFS_I(inode)->io_failure_tree, - failure->start, - failure->start + failure->len - 1, - EXTENT_DIRTY | EXTENT_LOCKED, - GFP_NOFS); - kfree(failure); - } - } + btrfs_clean_io_failures(inode, start); return 0; zeroit: @@ -3657,7 +3673,7 @@ static struct extent_io_ops btrfs_extent_io_ops = { .merge_bio_hook = btrfs_merge_bio_hook, .readpage_io_hook = btrfs_readpage_io_hook, .readpage_end_io_hook = btrfs_readpage_end_io_hook, - .readpage_io_failed_hook = btrfs_readpage_io_failed_hook, + .readpage_io_failed_hook = btrfs_io_failed_hook, .set_bit_hook = btrfs_set_bit_hook, .clear_bit_hook = btrfs_clear_bit_hook, }; diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 5085e9e693b9..c02e2bf2f028 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -51,6 +51,7 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, struct extent_buffer *next; struct extent_buffer *cur; u64 bytenr; + u64 ptr_gen; int ret = 0; int is_extent = 0; @@ -93,11 +94,12 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, break; } bytenr = btrfs_node_blockptr(cur, path->slots[*level]); + ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); if (cache_only) { next = btrfs_find_tree_block(root, bytenr, btrfs_level_size(root, *level - 1)); - if (!next || !btrfs_buffer_uptodate(next) || + if (!next || !btrfs_buffer_uptodate(next, ptr_gen) || !btrfs_buffer_defrag(next)) { free_extent_buffer(next); path->slots[*level]++; @@ -106,8 +108,7 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, } else { next = read_tree_block(root, bytenr, btrfs_level_size(root, *level - 1), - btrfs_node_ptr_generation(cur, - path->slots[*level])); + ptr_gen); } ret = btrfs_cow_block(trans, root, next, path->nodes[*level], path->slots[*level], &next); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index b5d7bd1915b4..5fc7fb481474 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1807,14 +1807,19 @@ static int end_bio_multi_stripe(struct bio *bio, if (atomic_dec_and_test(&multi->stripes_pending)) { bio->bi_private = multi->private; bio->bi_end_io = multi->end_io; - /* only send an error to the higher layers if it is * beyond the tolerance of the multi-bio */ - if (atomic_read(&multi->error) > multi->max_errors) + if (atomic_read(&multi->error) > multi->max_errors) { err = -EIO; - else + } else if (err) { + /* + * this bio is actually up to date, we didn't + * go over the max number of errors + */ + set_bit(BIO_UPTODATE, &bio->bi_flags); err = 0; + } kfree(multi); #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)