diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 19c0dd33b1e8..c0c95cccbb5b 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1512,7 +1512,8 @@ cow_done: if (ret && slot > 0) slot -= 1; p->slots[level] = slot; - if (ins_len > 0 && btrfs_header_nritems(b) >= + if ((p->search_for_split || ins_len > 0) && + btrfs_header_nritems(b) >= BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { int sret = split_node(trans, root, p, level); BUG_ON(sret > 0); @@ -1596,7 +1597,8 @@ cow_done: goto done; } } - unlock_up(p, level, lowest_unlock); + if (!p->search_for_split) + unlock_up(p, level, lowest_unlock); goto done; } } @@ -2636,11 +2638,11 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, int num_doubles = 0; struct btrfs_disk_key disk_key; - if (extend) + if (extend && data_size) space_needed = data_size; /* first try to make some room by pushing left and right */ - if (ins_key->type != BTRFS_DIR_ITEM_KEY) { + if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) { wret = push_leaf_right(trans, root, path, data_size, 0); if (wret < 0) { return wret; @@ -2721,7 +2723,7 @@ again: } else { if (leaf_space_used(l, 0, mid + 1) + space_needed > BTRFS_LEAF_DATA_SIZE(root)) { - if (!extend && slot == 0) { + if (!extend && data_size && slot == 0) { btrfs_cpu_key_to_disk(&disk_key, ins_key); btrfs_set_header_nritems(right, 0); wret = insert_ptr(trans, root, path, @@ -2742,7 +2744,7 @@ again: } btrfs_mark_buffer_dirty(right); return ret; - } else if (extend && slot == 0) { + } else if ((extend || !data_size) && slot == 0) { mid = 1; } else { mid = slot; @@ -2827,6 +2829,123 @@ again: return ret; } +/* + * This function splits a single item into two items, + * giving 'new_key' to the new item and splitting the + * old one at split_offset (from the start of the item). + * + * The path may be released by this operation. After + * the split, the path is pointing to the old item. The + * new item is going to be in the same node as the old one. + * + * Note, the item being split must be smaller enough to live alone on + * a tree block with room for one extra struct btrfs_item + * + * This allows us to split the item in place, keeping a lock on the + * leaf the entire time. + */ +int btrfs_split_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *new_key, + unsigned long split_offset) +{ + u32 item_size; + struct extent_buffer *leaf; + struct btrfs_key orig_key; + struct btrfs_item *item; + struct btrfs_item *new_item; + int ret = 0; + int slot; + u32 nritems; + u32 orig_offset; + struct btrfs_disk_key disk_key; + char *buf; + + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &orig_key, path->slots[0]); + if (btrfs_leaf_free_space(root, leaf) >= sizeof(struct btrfs_item)) + goto split; + + item_size = btrfs_item_size_nr(leaf, path->slots[0]); + btrfs_release_path(root, path); + + path->search_for_split = 1; + path->keep_locks = 1; + + ret = btrfs_search_slot(trans, root, &orig_key, path, 0, 1); + path->search_for_split = 0; + + /* if our item isn't there or got smaller, return now */ + if (ret != 0 || item_size != btrfs_item_size_nr(path->nodes[0], + path->slots[0])) { + path->keep_locks = 0; + return -EAGAIN; + } + + ret = split_leaf(trans, root, &orig_key, path, 0, 0); + path->keep_locks = 0; + BUG_ON(ret); + + BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item)); + leaf = path->nodes[0]; + +split: + item = btrfs_item_nr(leaf, path->slots[0]); + orig_offset = btrfs_item_offset(leaf, item); + item_size = btrfs_item_size(leaf, item); + + + buf = kmalloc(item_size, GFP_NOFS); + read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf, + path->slots[0]), item_size); + slot = path->slots[0] + 1; + leaf = path->nodes[0]; + + nritems = btrfs_header_nritems(leaf); + + if (slot != nritems) { + /* shift the items */ + memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1), + btrfs_item_nr_offset(slot), + (nritems - slot) * sizeof(struct btrfs_item)); + + } + + btrfs_cpu_key_to_disk(&disk_key, new_key); + btrfs_set_item_key(leaf, &disk_key, slot); + + new_item = btrfs_item_nr(leaf, slot); + + btrfs_set_item_offset(leaf, new_item, orig_offset); + btrfs_set_item_size(leaf, new_item, item_size - split_offset); + + btrfs_set_item_offset(leaf, item, + orig_offset + item_size - split_offset); + btrfs_set_item_size(leaf, item, split_offset); + + btrfs_set_header_nritems(leaf, nritems + 1); + + /* write the data for the start of the original item */ + write_extent_buffer(leaf, buf, + btrfs_item_ptr_offset(leaf, path->slots[0]), + split_offset); + + /* write the data for the new item */ + write_extent_buffer(leaf, buf + split_offset, + btrfs_item_ptr_offset(leaf, slot), + item_size - split_offset); + btrfs_mark_buffer_dirty(leaf); + + ret = 0; + if (btrfs_leaf_free_space(root, leaf) < 0) { + btrfs_print_leaf(root, leaf); + BUG(); + } + kfree(buf); + return ret; +} + /* * make the item pointed to by the path smaller. new_size indicates * how small to make it, and from_end tells us if we just chop bytes diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f72b43819349..5b0c79d22c09 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -409,6 +409,12 @@ struct btrfs_path { int keep_locks; int skip_locking; int lowest_level; + + /* + * set by btrfs_split_item, tells search_slot to keep all locks + * and to force calls to keep space in the nodes + */ + int search_for_split; }; /* @@ -1816,6 +1822,11 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u32 new_size, int from_end); +int btrfs_split_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *new_key, + unsigned long split_offset); int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow); @@ -1953,6 +1964,8 @@ int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_key *location, int mod); /* file-item.c */ +int btrfs_del_csums(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytenr, u64 len); int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, struct bio *bio, u32 *dst); int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 803647bc8400..cc74316dc426 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2484,7 +2484,6 @@ static int __free_extent(struct btrfs_trans_handle *trans, mark_free = 1; BUG_ON(ret < 0); } - /* block accounting for super block */ spin_lock_irq(&info->delalloc_lock); super_used = btrfs_super_bytes_used(&info->super_copy); @@ -2504,6 +2503,11 @@ static int __free_extent(struct btrfs_trans_handle *trans, mark_free); BUG_ON(ret); + if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { + ret = btrfs_del_csums(trans, root, bytenr, num_bytes); + BUG_ON(ret); + } + #ifdef BIO_RW_DISCARD /* Tell the block device(s) that the sectors can be discarded */ ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index a3ad2ce00116..3ebef871ee6c 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -310,6 +310,179 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, return 0; } +/* + * helper function for csum removal, this expects the + * key to describe the csum pointed to by the path, and it expects + * the csum to overlap the range [bytenr, len] + * + * The csum should not be entirely contained in the range and the + * range should not be entirely contained in the csum. + * + * This calls btrfs_truncate_item with the correct args based on the + * overlap, and fixes up the key as required. + */ +static noinline int truncate_one_csum(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *key, + u64 bytenr, u64 len) +{ + struct extent_buffer *leaf; + u16 csum_size = + btrfs_super_csum_size(&root->fs_info->super_copy); + u64 csum_end; + u64 end_byte = bytenr + len; + u32 blocksize_bits = root->fs_info->sb->s_blocksize_bits; + int ret; + + leaf = path->nodes[0]; + csum_end = btrfs_item_size_nr(leaf, path->slots[0]) / csum_size; + csum_end <<= root->fs_info->sb->s_blocksize_bits; + csum_end += key->offset; + + if (key->offset < bytenr && csum_end <= end_byte) { + /* + * [ bytenr - len ] + * [ ] + * [csum ] + * A simple truncate off the end of the item + */ + u32 new_size = (bytenr - key->offset) >> blocksize_bits; + new_size *= csum_size; + ret = btrfs_truncate_item(trans, root, path, new_size, 1); + BUG_ON(ret); + } else if (key->offset >= bytenr && csum_end > end_byte && + end_byte > key->offset) { + /* + * [ bytenr - len ] + * [ ] + * [csum ] + * we need to truncate from the beginning of the csum + */ + u32 new_size = (csum_end - end_byte) >> blocksize_bits; + new_size *= csum_size; + + ret = btrfs_truncate_item(trans, root, path, new_size, 0); + BUG_ON(ret); + + key->offset = end_byte; + ret = btrfs_set_item_key_safe(trans, root, path, key); + BUG_ON(ret); + } else { + BUG(); + } + return 0; +} + +/* + * deletes the csum items from the csum tree for a given + * range of bytes. + */ +int btrfs_del_csums(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytenr, u64 len) +{ + struct btrfs_path *path; + struct btrfs_key key; + u64 end_byte = bytenr + len; + u64 csum_end; + struct extent_buffer *leaf; + int ret; + u16 csum_size = + btrfs_super_csum_size(&root->fs_info->super_copy); + int blocksize_bits = root->fs_info->sb->s_blocksize_bits; + + root = root->fs_info->csum_root; + + path = btrfs_alloc_path(); + + while(1) { + key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; + key.offset = end_byte - 1; + key.type = BTRFS_EXTENT_CSUM_KEY; + + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret > 0) { + if (path->slots[0] == 0) + goto out; + path->slots[0]--; + } + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + + if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || + key.type != BTRFS_EXTENT_CSUM_KEY) { + break; + } + + if (key.offset >= end_byte) + break; + + csum_end = btrfs_item_size_nr(leaf, path->slots[0]) / csum_size; + csum_end <<= blocksize_bits; + csum_end += key.offset; + + /* this csum ends before we start, we're done */ + if (csum_end <= bytenr) + break; + + /* delete the entire item, it is inside our range */ + if (key.offset >= bytenr && csum_end <= end_byte) { + ret = btrfs_del_item(trans, root, path); + BUG_ON(ret); + } else if (key.offset < bytenr && csum_end > end_byte) { + unsigned long offset; + unsigned long shift_len; + unsigned long item_offset; + /* + * [ bytenr - len ] + * [csum ] + * + * Our bytes are in the middle of the csum, + * we need to split this item and insert a new one. + * + * But we can't drop the path because the + * csum could change, get removed, extended etc. + * + * The trick here is the max size of a csum item leaves + * enough room in the tree block for a single + * item header. So, we split the item in place, + * adding a new header pointing to the existing + * bytes. Then we loop around again and we have + * a nicely formed csum item that we can neatly + * truncate. + */ + offset = (bytenr - key.offset) >> blocksize_bits; + offset *= csum_size; + + shift_len = (len >> blocksize_bits) * csum_size; + + item_offset = btrfs_item_ptr_offset(leaf, + path->slots[0]); + + memset_extent_buffer(leaf, 0, item_offset + offset, + shift_len); + key.offset = bytenr; + + /* + * btrfs_split_item returns -EAGAIN when the + * item changed size or key + */ + ret = btrfs_split_item(trans, root, path, &key, offset); + BUG_ON(ret && ret != -EAGAIN); + + key.offset = end_byte - 1; + } else { + ret = truncate_one_csum(trans, root, path, + &key, bytenr, len); + BUG_ON(ret); + } + btrfs_release_path(root, path); + } +out: + btrfs_free_path(path); + return 0; +} + int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_ordered_sum *sums) @@ -396,28 +569,40 @@ again: csum_size, 1); if (ret < 0) goto fail_unlock; - if (ret == 0) { - BUG(); + + if (ret > 0) { + if (path->slots[0] == 0) + goto insert; + path->slots[0]--; } - if (path->slots[0] == 0) { - goto insert; - } - path->slots[0]--; + leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); csum_offset = (bytenr - found_key.offset) >> root->fs_info->sb->s_blocksize_bits; + if (btrfs_key_type(&found_key) != BTRFS_EXTENT_CSUM_KEY || found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || csum_offset >= MAX_CSUM_ITEMS(root, csum_size)) { goto insert; } + if (csum_offset >= btrfs_item_size_nr(leaf, path->slots[0]) / csum_size) { u32 diff = (csum_offset + 1) * csum_size; + + /* + * is the item big enough already? we dropped our lock + * before and need to recheck + */ + if (diff < btrfs_item_size_nr(leaf, path->slots[0])) + goto csum; + diff = diff - btrfs_item_size_nr(leaf, path->slots[0]); - if (diff != csum_size) + if (diff != csum_size) { goto insert; + } + ret = btrfs_extend_item(trans, root, path, diff); BUG_ON(ret); goto csum; @@ -518,30 +703,3 @@ out: fail_unlock: goto out; } - -int btrfs_csum_truncate(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct btrfs_path *path, - u64 isize) -{ - struct btrfs_key key; - struct extent_buffer *leaf = path->nodes[0]; - int slot = path->slots[0]; - int ret; - u32 new_item_size; - u64 new_item_span; - u64 blocks; - - btrfs_item_key_to_cpu(leaf, &key, slot); - if (isize <= key.offset) - return 0; - new_item_span = isize - key.offset; - blocks = (new_item_span + root->sectorsize - 1) >> - root->fs_info->sb->s_blocksize_bits; - new_item_size = blocks * - btrfs_super_csum_size(&root->fs_info->super_copy); - if (new_item_size >= btrfs_item_size_nr(leaf, slot)) - return 0; - ret = btrfs_truncate_item(trans, root, path, new_item_size, 1); - BUG_ON(ret); - return ret; -}