diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index ad7274137309..2436163d5436 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -124,8 +124,7 @@ static int check_compressed_csum(struct inode *inode, u32 csum; u32 *cb_sum = &cb->sums; - if (btrfs_test_opt(root, NODATASUM) || - btrfs_test_flag(inode, NODATASUM)) + if (btrfs_test_flag(inode, NODATASUM)) return 0; for (i = 0; i < cb->nr_pages; i++) { @@ -671,8 +670,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, */ atomic_inc(&cb->pending_bios); - if (!btrfs_test_opt(root, NODATASUM) && - !btrfs_test_flag(inode, NODATASUM)) { + if (!btrfs_test_flag(inode, NODATASUM)) { btrfs_lookup_bio_sums(root, inode, comp_bio, sums); } @@ -699,8 +697,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); BUG_ON(ret); - if (!btrfs_test_opt(root, NODATASUM) && - !btrfs_test_flag(inode, NODATASUM)) { + if (!btrfs_test_flag(inode, NODATASUM)) { btrfs_lookup_bio_sums(root, inode, comp_bio, sums); } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8733081d97a3..b89999de4564 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1702,7 +1702,7 @@ int btrfs_update_pinned_extents(struct btrfs_root *root, int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *leaf); int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 bytenr); + struct btrfs_root *root, u64 objectid, u64 bytenr); int btrfs_extent_post_op(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy); @@ -1789,6 +1789,7 @@ int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans, struct extent_buffer *buf, u64 orig_start); int btrfs_add_dead_reloc_root(struct btrfs_root *root); int btrfs_cleanup_reloc_trees(struct btrfs_root *root); +int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len); u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); /* ctree.c */ int btrfs_previous_item(struct btrfs_root *root, @@ -1994,6 +1995,8 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, int btrfs_csum_truncate(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 isize); +int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, + u64 end, struct list_head *list); /* inode.c */ /* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */ diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 171057a32679..8004695d24d6 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1359,7 +1359,7 @@ out: } int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 bytenr) + struct btrfs_root *root, u64 objectid, u64 bytenr) { struct btrfs_root *extent_root = root->fs_info->extent_root; struct btrfs_path *path; @@ -1418,8 +1418,9 @@ int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, ref_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); ref_root = btrfs_ref_root(leaf, ref_item); - if (ref_root != root->root_key.objectid && - ref_root != BTRFS_TREE_LOG_OBJECTID) { + if ((ref_root != root->root_key.objectid && + ref_root != BTRFS_TREE_LOG_OBJECTID) || + objectid != btrfs_ref_objectid(leaf, ref_item)) { ret = 1; goto out; } @@ -5367,7 +5368,6 @@ static int noinline relocate_one_extent(struct btrfs_root *extent_root, if (ret) goto out; } - btrfs_record_root_in_trans(found_root); ret = replace_one_extent(trans, found_root, path, extent_key, &first_key, ref_path, @@ -5534,6 +5534,7 @@ static struct inode noinline *create_reloc_inode(struct btrfs_fs_info *fs_info, } else { BUG_ON(1); } + BTRFS_I(inode)->index_cnt = group->key.objectid; err = btrfs_orphan_add(trans, inode); out: @@ -5546,6 +5547,47 @@ out: return inode; } +int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) +{ + + struct btrfs_ordered_sum *sums; + struct btrfs_sector_sum *sector_sum; + struct btrfs_ordered_extent *ordered; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct list_head list; + size_t offset; + int ret; + u64 disk_bytenr; + + INIT_LIST_HEAD(&list); + + ordered = btrfs_lookup_ordered_extent(inode, file_pos); + BUG_ON(ordered->file_offset != file_pos || ordered->len != len); + + disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt; + ret = btrfs_lookup_csums_range(root, disk_bytenr, + disk_bytenr + len - 1, &list); + + while (!list_empty(&list)) { + sums = list_entry(list.next, struct btrfs_ordered_sum, list); + list_del_init(&sums->list); + + sector_sum = sums->sums; + sums->bytenr = ordered->start; + + offset = 0; + while (offset < sums->len) { + sector_sum->bytenr += ordered->start - disk_bytenr; + sector_sum++; + offset += root->sectorsize; + } + + btrfs_add_ordered_sum(inode, ordered, sums); + } + btrfs_put_ordered_extent(ordered); + return 0; +} + int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start) { struct btrfs_trans_handle *trans; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 2d5f67065b69..c5b483a79137 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -16,6 +16,7 @@ #define EXTENT_ORDERED (1 << 9) #define EXTENT_ORDERED_METADATA (1 << 10) #define EXTENT_BOUNDARY (1 << 11) +#define EXTENT_NODATASUM (1 << 12) #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) /* flags for bio submission */ diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 3ebef871ee6c..df0447632dbd 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -140,6 +140,7 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, return ret; } + int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, struct bio *bio, u32 *dst) { @@ -185,9 +186,16 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, if (ret == -ENOENT || ret == -EFBIG) ret = 0; sum = 0; - printk("no csum found for inode %lu start " - "%llu\n", inode->i_ino, - (unsigned long long)offset); + if (BTRFS_I(inode)->root->root_key.objectid == + BTRFS_DATA_RELOC_TREE_OBJECTID) { + set_extent_bits(io_tree, offset, + offset + bvec->bv_len - 1, + EXTENT_NODATASUM, GFP_NOFS); + } else { + printk("no csum found for inode %lu " + "start %llu\n", inode->i_ino, + (unsigned long long)offset); + } item = NULL; btrfs_release_path(root, path); goto found; @@ -228,6 +236,106 @@ found: return 0; } +int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, + struct list_head *list) +{ + struct btrfs_key key; + struct btrfs_path *path; + struct extent_buffer *leaf; + struct btrfs_ordered_sum *sums; + struct btrfs_sector_sum *sector_sum; + struct btrfs_csum_item *item; + unsigned long offset; + int ret; + size_t size; + u64 csum_end; + u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy); + + path = btrfs_alloc_path(); + BUG_ON(!path); + + key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; + key.offset = start; + key.type = BTRFS_EXTENT_CSUM_KEY; + + ret = btrfs_search_slot(NULL, root->fs_info->csum_root, + &key, path, 0, 0); + if (ret < 0) + goto fail; + if (ret > 0 && path->slots[0] > 0) { + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1); + if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID && + key.type == BTRFS_EXTENT_CSUM_KEY) { + offset = (start - key.offset) >> + root->fs_info->sb->s_blocksize_bits; + if (offset * csum_size < + btrfs_item_size_nr(leaf, path->slots[0] - 1)) + path->slots[0]--; + } + } + + while (start <= end) { + leaf = path->nodes[0]; + if (path->slots[0] >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(root->fs_info->csum_root, path); + if (ret < 0) + goto fail; + if (ret > 0) + break; + leaf = path->nodes[0]; + } + + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || + key.type != BTRFS_EXTENT_CSUM_KEY) + break; + + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + if (key.offset > end) + break; + + if (key.offset > start) + start = key.offset; + + size = btrfs_item_size_nr(leaf, path->slots[0]); + csum_end = key.offset + (size / csum_size) * root->sectorsize; + + size = min(csum_end, end + 1) - start; + sums = kzalloc(btrfs_ordered_sum_size(root, size), GFP_NOFS); + BUG_ON(!sums); + + sector_sum = sums->sums; + sums->bytenr = start; + sums->len = size; + + offset = (start - key.offset) >> + root->fs_info->sb->s_blocksize_bits; + offset *= csum_size; + + item = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_csum_item); + while (size > 0) { + read_extent_buffer(path->nodes[0], §or_sum->sum, + ((unsigned long)item) + offset, + csum_size); + sector_sum->bytenr = start; + + size -= root->sectorsize; + start += root->sectorsize; + offset += csum_size; + sector_sum++; + } + list_add_tail(&sums->list, list); + + path->slots[0]++; + } + ret = 0; +fail: + btrfs_free_path(path); + return ret; +} + int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, struct bio *bio, u64 file_start, int contig) { diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 71bfe3a6a444..507081059d97 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1059,14 +1059,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, first_index = pos >> PAGE_CACHE_SHIFT; last_index = (pos + count) >> PAGE_CACHE_SHIFT; - /* - * if this is a nodatasum mount, force summing off for the inode - * all the time. That way a later mount with summing on won't - * get confused - */ - if (btrfs_test_opt(root, NODATASUM)) - btrfs_set_flag(inode, NODATASUM); - /* * there are lots of better ways to do this, but this code * makes sure the first and last page in the file range are diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0a28b7706314..e64a4fe19a60 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -771,6 +771,13 @@ static noinline int cow_file_range(struct inode *inode, ram_size, cur_alloc_size, 0); BUG_ON(ret); + if (root->root_key.objectid == + BTRFS_DATA_RELOC_TREE_OBJECTID) { + ret = btrfs_reloc_clone_csums(inode, start, + cur_alloc_size); + BUG_ON(ret); + } + if (disk_num_bytes < cur_alloc_size) { printk("num_bytes %Lu cur_alloc %Lu\n", disk_num_bytes, cur_alloc_size); @@ -910,6 +917,26 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, return 0; } +static int noinline csum_exist_in_range(struct btrfs_root *root, + u64 bytenr, u64 num_bytes) +{ + int ret; + struct btrfs_ordered_sum *sums; + LIST_HEAD(list); + + ret = btrfs_lookup_csums_range(root, bytenr, bytenr + num_bytes - 1, + &list); + if (ret == 0 && list_empty(&list)) + return 0; + + while (!list_empty(&list)) { + sums = list_entry(list.next, struct btrfs_ordered_sum, list); + list_del(&sums->list); + kfree(sums); + } + return 1; +} + /* * when nowcow writeback call back. This checks for snapshots or COW copies * of the extents that exist in the file, and COWs the file as required. @@ -971,6 +998,7 @@ next_slot: nocow = 0; disk_bytenr = 0; + num_bytes = 0; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); if (found_key.objectid > inode->i_ino || @@ -996,19 +1024,29 @@ next_slot: path->slots[0]++; goto next_slot; } + if (disk_bytenr == 0) + goto out_check; if (btrfs_file_extent_compression(leaf, fi) || btrfs_file_extent_encryption(leaf, fi) || btrfs_file_extent_other_encoding(leaf, fi)) goto out_check; - if (disk_bytenr == 0) - goto out_check; if (extent_type == BTRFS_FILE_EXTENT_REG && !force) goto out_check; - if (btrfs_cross_ref_exist(trans, root, disk_bytenr)) - goto out_check; if (btrfs_extent_readonly(root, disk_bytenr)) goto out_check; + if (btrfs_cross_ref_exist(trans, root, inode->i_ino, + disk_bytenr)) + goto out_check; disk_bytenr += btrfs_file_extent_offset(leaf, fi); + disk_bytenr += cur_offset - found_key.offset; + num_bytes = min(end + 1, extent_end) - cur_offset; + /* + * force cow if csum exists in the range. + * this ensure that csum for a given extent are + * either valid or do not exist. + */ + if (csum_exist_in_range(root, disk_bytenr, num_bytes)) + goto out_check; nocow = 1; } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { extent_end = found_key.offset + @@ -1041,8 +1079,6 @@ out_check: cow_start = (u64)-1; } - disk_bytenr += cur_offset - found_key.offset; - num_bytes = min(end + 1, extent_end) - cur_offset; if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) { struct extent_map *em; struct extent_map_tree *em_tree; @@ -1105,11 +1141,9 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, u64 start, u64 end, int *page_started, unsigned long *nr_written) { - struct btrfs_root *root = BTRFS_I(inode)->root; int ret; - if (btrfs_test_opt(root, NODATACOW) || - btrfs_test_flag(inode, NODATACOW)) + if (btrfs_test_flag(inode, NODATACOW)) ret = run_delalloc_nocow(inode, locked_page, start, end, page_started, 1, nr_written); else if (btrfs_test_flag(inode, PREALLOC)) @@ -1252,8 +1286,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); BUG_ON(ret); - skip_sum = btrfs_test_opt(root, NODATASUM) || - btrfs_test_flag(inode, NODATASUM); + skip_sum = btrfs_test_flag(inode, NODATASUM); if (!(rw & (1 << BIO_RW))) { if (bio_flags & EXTENT_BIO_COMPRESSED) { @@ -1263,6 +1296,9 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, btrfs_lookup_bio_sums(root, inode, bio, NULL); goto mapit; } else if (!skip_sum) { + /* csum items have already been cloned */ + if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) + goto mapit; /* we're doing a write, do the async checksumming */ return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, inode, rw, bio, mirror_num, @@ -1692,10 +1728,16 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, ClearPageChecked(page); goto good; } - if (btrfs_test_opt(root, NODATASUM) || - btrfs_test_flag(inode, NODATASUM)) + if (btrfs_test_flag(inode, NODATASUM)) return 0; + if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID && + test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1)) { + clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM, + GFP_NOFS); + return 0; + } + if (state && state->start == start) { private = state->private; ret = 0; @@ -3391,6 +3433,12 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, owner = 1; BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0, alloc_hint, owner); + if ((mode & S_IFREG)) { + if (btrfs_test_opt(root, NODATASUM)) + btrfs_set_flag(inode, NODATASUM); + if (btrfs_test_opt(root, NODATACOW)) + btrfs_set_flag(inode, NODATACOW); + } key[0].objectid = objectid; btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);