Merge git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable

* git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable: (27 commits)
  Btrfs: add more error checking to btrfs_dirty_inode
  Btrfs: allow unaligned DIO
  Btrfs: drop verbose enospc printk
  Btrfs: Fix block generation verification race
  Btrfs: fix preallocation and nodatacow checks in O_DIRECT
  Btrfs: avoid ENOSPC errors in btrfs_dirty_inode
  Btrfs: move O_DIRECT space reservation to btrfs_direct_IO
  Btrfs: rework O_DIRECT enospc handling
  Btrfs: use async helpers for DIO write checksumming
  Btrfs: don't walk around with task->state != TASK_RUNNING
  Btrfs: do aio_write instead of write
  Btrfs: add basic DIO read/write support
  direct-io: do not merge logically non-contiguous requests
  direct-io: add a hook for the fs to provide its own submit_bio function
  fs: allow short direct-io reads to be completed via buffered IO
  Btrfs: Metadata ENOSPC handling for balance
  Btrfs: Pre-allocate space for data relocation
  Btrfs: Metadata ENOSPC handling for tree log
  Btrfs: Metadata reservation for orphan inodes
  Btrfs: Introduce global metadata reservation
  ...
This commit is contained in:
Linus Torvalds 2010-05-27 10:43:44 -07:00
commit 105a048a4f
31 changed files with 5095 additions and 2769 deletions

View file

@ -377,6 +377,7 @@ again:
if (!list_empty(&worker->pending) ||
!list_empty(&worker->prio_pending)) {
spin_unlock_irq(&worker->lock);
set_current_state(TASK_RUNNING);
goto again;
}

View file

@ -137,8 +137,8 @@ struct btrfs_inode {
* of extent items we've reserved metadata for.
*/
spinlock_t accounting_lock;
atomic_t outstanding_extents;
int reserved_extents;
int outstanding_extents;
/*
* ordered_data_close is set by truncate when a file that used
@ -151,6 +151,7 @@ struct btrfs_inode {
* of these.
*/
unsigned ordered_data_close:1;
unsigned orphan_meta_reserved:1;
unsigned dummy_inode:1;
/*

View file

@ -280,7 +280,8 @@ int btrfs_block_can_be_shared(struct btrfs_root *root,
static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf,
struct extent_buffer *cow)
struct extent_buffer *cow,
int *last_ref)
{
u64 refs;
u64 owner;
@ -366,6 +367,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
BUG_ON(ret);
}
clean_tree_block(trans, root, buf);
*last_ref = 1;
}
return 0;
}
@ -392,6 +394,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
struct btrfs_disk_key disk_key;
struct extent_buffer *cow;
int level;
int last_ref = 0;
int unlock_orig = 0;
u64 parent_start;
@ -442,7 +445,10 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
(unsigned long)btrfs_header_fsid(cow),
BTRFS_FSID_SIZE);
update_ref_for_cow(trans, root, buf, cow);
update_ref_for_cow(trans, root, buf, cow, &last_ref);
if (root->ref_cows)
btrfs_reloc_cow_block(trans, root, buf, cow);
if (buf == root->node) {
WARN_ON(parent && parent != buf);
@ -457,8 +463,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
extent_buffer_get(cow);
spin_unlock(&root->node_lock);
btrfs_free_tree_block(trans, root, buf->start, buf->len,
parent_start, root->root_key.objectid, level);
btrfs_free_tree_block(trans, root, buf, parent_start,
last_ref);
free_extent_buffer(buf);
add_root_to_dirty_list(root);
} else {
@ -473,8 +479,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
btrfs_set_node_ptr_generation(parent, parent_slot,
trans->transid);
btrfs_mark_buffer_dirty(parent);
btrfs_free_tree_block(trans, root, buf->start, buf->len,
parent_start, root->root_key.objectid, level);
btrfs_free_tree_block(trans, root, buf, parent_start,
last_ref);
}
if (unlock_orig)
btrfs_tree_unlock(buf);
@ -949,6 +955,22 @@ int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
return bin_search(eb, key, level, slot);
}
static void root_add_used(struct btrfs_root *root, u32 size)
{
spin_lock(&root->accounting_lock);
btrfs_set_root_used(&root->root_item,
btrfs_root_used(&root->root_item) + size);
spin_unlock(&root->accounting_lock);
}
static void root_sub_used(struct btrfs_root *root, u32 size)
{
spin_lock(&root->accounting_lock);
btrfs_set_root_used(&root->root_item,
btrfs_root_used(&root->root_item) - size);
spin_unlock(&root->accounting_lock);
}
/* given a node and slot number, this reads the blocks it points to. The
* extent buffer is returned with a reference taken (but unlocked).
* NULL is returned on error.
@ -1019,7 +1041,11 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
btrfs_tree_lock(child);
btrfs_set_lock_blocking(child);
ret = btrfs_cow_block(trans, root, child, mid, 0, &child);
BUG_ON(ret);
if (ret) {
btrfs_tree_unlock(child);
free_extent_buffer(child);
goto enospc;
}
spin_lock(&root->node_lock);
root->node = child;
@ -1034,11 +1060,12 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
btrfs_tree_unlock(mid);
/* once for the path */
free_extent_buffer(mid);
ret = btrfs_free_tree_block(trans, root, mid->start, mid->len,
0, root->root_key.objectid, level);
root_sub_used(root, mid->len);
btrfs_free_tree_block(trans, root, mid, 0, 1);
/* once for the root ptr */
free_extent_buffer(mid);
return ret;
return 0;
}
if (btrfs_header_nritems(mid) >
BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
@ -1088,23 +1115,16 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
if (wret < 0 && wret != -ENOSPC)
ret = wret;
if (btrfs_header_nritems(right) == 0) {
u64 bytenr = right->start;
u32 blocksize = right->len;
clean_tree_block(trans, root, right);
btrfs_tree_unlock(right);
free_extent_buffer(right);
right = NULL;
wret = del_ptr(trans, root, path, level + 1, pslot +
1);
if (wret)
ret = wret;
wret = btrfs_free_tree_block(trans, root,
bytenr, blocksize, 0,
root->root_key.objectid,
level);
if (wret)
ret = wret;
root_sub_used(root, right->len);
btrfs_free_tree_block(trans, root, right, 0, 1);
free_extent_buffer(right);
right = NULL;
} else {
struct btrfs_disk_key right_key;
btrfs_node_key(right, &right_key, 0);
@ -1136,21 +1156,15 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
BUG_ON(wret == 1);
}
if (btrfs_header_nritems(mid) == 0) {
/* we've managed to empty the middle node, drop it */
u64 bytenr = mid->start;
u32 blocksize = mid->len;
clean_tree_block(trans, root, mid);
btrfs_tree_unlock(mid);
free_extent_buffer(mid);
mid = NULL;
wret = del_ptr(trans, root, path, level + 1, pslot);
if (wret)
ret = wret;
wret = btrfs_free_tree_block(trans, root, bytenr, blocksize,
0, root->root_key.objectid, level);
if (wret)
ret = wret;
root_sub_used(root, mid->len);
btrfs_free_tree_block(trans, root, mid, 0, 1);
free_extent_buffer(mid);
mid = NULL;
} else {
/* update the parent key to reflect our changes */
struct btrfs_disk_key mid_key;
@ -1590,7 +1604,7 @@ read_block_for_search(struct btrfs_trans_handle *trans,
btrfs_release_path(NULL, p);
ret = -EAGAIN;
tmp = read_tree_block(root, blocknr, blocksize, gen);
tmp = read_tree_block(root, blocknr, blocksize, 0);
if (tmp) {
/*
* If the read above didn't mark this buffer up to date,
@ -1740,7 +1754,6 @@ again:
p->nodes[level + 1],
p->slots[level + 1], &b);
if (err) {
free_extent_buffer(b);
ret = err;
goto done;
}
@ -2076,6 +2089,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
if (IS_ERR(c))
return PTR_ERR(c);
root_add_used(root, root->nodesize);
memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
btrfs_set_header_nritems(c, 1);
btrfs_set_header_level(c, level);
@ -2134,6 +2149,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
int nritems;
BUG_ON(!path->nodes[level]);
btrfs_assert_tree_locked(path->nodes[level]);
lower = path->nodes[level];
nritems = btrfs_header_nritems(lower);
BUG_ON(slot > nritems);
@ -2202,6 +2218,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
if (IS_ERR(split))
return PTR_ERR(split);
root_add_used(root, root->nodesize);
memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header));
btrfs_set_header_level(split, btrfs_header_level(c));
btrfs_set_header_bytenr(split, split->start);
@ -2415,6 +2433,9 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
if (left_nritems)
btrfs_mark_buffer_dirty(left);
else
clean_tree_block(trans, root, left);
btrfs_mark_buffer_dirty(right);
btrfs_item_key(right, &disk_key, 0);
@ -2660,6 +2681,8 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(left);
if (right_nritems)
btrfs_mark_buffer_dirty(right);
else
clean_tree_block(trans, root, right);
btrfs_item_key(right, &disk_key, 0);
wret = fixup_low_keys(trans, root, path, &disk_key, 1);
@ -2669,8 +2692,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
/* then fixup the leaf pointer in the path */
if (path->slots[0] < push_items) {
path->slots[0] += old_left_nritems;
if (btrfs_header_nritems(path->nodes[0]) == 0)
clean_tree_block(trans, root, path->nodes[0]);
btrfs_tree_unlock(path->nodes[0]);
free_extent_buffer(path->nodes[0]);
path->nodes[0] = left;
@ -2932,10 +2953,10 @@ again:
right = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
root->root_key.objectid,
&disk_key, 0, l->start, 0);
if (IS_ERR(right)) {
BUG_ON(1);
if (IS_ERR(right))
return PTR_ERR(right);
}
root_add_used(root, root->leafsize);
memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
btrfs_set_header_bytenr(right, right->start);
@ -3054,7 +3075,8 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
btrfs_set_path_blocking(path);
ret = split_leaf(trans, root, &key, path, ins_len, 1);
BUG_ON(ret);
if (ret)
goto err;
path->keep_locks = 0;
btrfs_unlock_up_safe(path, 1);
@ -3796,9 +3818,10 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
*/
btrfs_unlock_up_safe(path, 0);
ret = btrfs_free_tree_block(trans, root, leaf->start, leaf->len,
0, root->root_key.objectid, 0);
return ret;
root_sub_used(root, leaf->len);
btrfs_free_tree_block(trans, root, leaf, 0, 1);
return 0;
}
/*
* delete the item at the leaf level in path. If that empties
@ -3865,6 +3888,8 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
if (leaf == root->node) {
btrfs_set_header_level(leaf, 0);
} else {
btrfs_set_path_blocking(path);
clean_tree_block(trans, root, leaf);
ret = btrfs_del_leaf(trans, root, path, leaf);
BUG_ON(ret);
}

View file

@ -34,6 +34,7 @@
struct btrfs_trans_handle;
struct btrfs_transaction;
struct btrfs_pending_snapshot;
extern struct kmem_cache *btrfs_trans_handle_cachep;
extern struct kmem_cache *btrfs_transaction_cachep;
extern struct kmem_cache *btrfs_bit_radix_cachep;
@ -663,6 +664,7 @@ struct btrfs_csum_item {
#define BTRFS_BLOCK_GROUP_RAID1 (1 << 4)
#define BTRFS_BLOCK_GROUP_DUP (1 << 5)
#define BTRFS_BLOCK_GROUP_RAID10 (1 << 6)
#define BTRFS_NR_RAID_TYPES 5
struct btrfs_block_group_item {
__le64 used;
@ -674,42 +676,46 @@ struct btrfs_space_info {
u64 flags;
u64 total_bytes; /* total bytes in the space */
u64 bytes_used; /* total bytes used on disk */
u64 bytes_used; /* total bytes used,
this does't take mirrors into account */
u64 bytes_pinned; /* total bytes pinned, will be freed when the
transaction finishes */
u64 bytes_reserved; /* total bytes the allocator has reserved for
current allocations */
u64 bytes_readonly; /* total bytes that are read only */
u64 bytes_super; /* total bytes reserved for the super blocks */
u64 bytes_root; /* the number of bytes needed to commit a
transaction */
u64 bytes_may_use; /* number of bytes that may be used for
delalloc/allocations */
u64 bytes_delalloc; /* number of bytes currently reserved for
delayed allocation */
u64 disk_used; /* total bytes used on disk */
int full; /* indicates that we cannot allocate any more
chunks for this space */
int force_alloc; /* set if we need to force a chunk alloc for
this space */
int force_delalloc; /* make people start doing filemap_flush until
we're under a threshold */
struct list_head list;
/* for controlling how we free up space for allocations */
wait_queue_head_t allocate_wait;
wait_queue_head_t flush_wait;
int allocating_chunk;
int flushing;
/* for block groups in our same type */
struct list_head block_groups;
struct list_head block_groups[BTRFS_NR_RAID_TYPES];
spinlock_t lock;
struct rw_semaphore groups_sem;
atomic_t caching_threads;
};
struct btrfs_block_rsv {
u64 size;
u64 reserved;
u64 freed[2];
struct btrfs_space_info *space_info;
struct list_head list;
spinlock_t lock;
atomic_t usage;
unsigned int priority:8;
unsigned int durable:1;
unsigned int refill_used:1;
unsigned int full:1;
};
/*
* free clusters are used to claim free space in relatively large chunks,
* allowing us to do less seeky writes. They are used for all metadata
@ -760,6 +766,7 @@ struct btrfs_block_group_cache {
spinlock_t lock;
u64 pinned;
u64 reserved;
u64 reserved_pinned;
u64 bytes_super;
u64 flags;
u64 sectorsize;
@ -825,6 +832,22 @@ struct btrfs_fs_info {
/* logical->physical extent mapping */
struct btrfs_mapping_tree mapping_tree;
/* block reservation for extent, checksum and root tree */
struct btrfs_block_rsv global_block_rsv;
/* block reservation for delay allocation */
struct btrfs_block_rsv delalloc_block_rsv;
/* block reservation for metadata operations */
struct btrfs_block_rsv trans_block_rsv;
/* block reservation for chunk tree */
struct btrfs_block_rsv chunk_block_rsv;
struct btrfs_block_rsv empty_block_rsv;
/* list of block reservations that cross multiple transactions */
struct list_head durable_block_rsv_list;
struct mutex durable_block_rsv_mutex;
u64 generation;
u64 last_trans_committed;
@ -927,7 +950,6 @@ struct btrfs_fs_info {
struct btrfs_workers endio_meta_write_workers;
struct btrfs_workers endio_write_workers;
struct btrfs_workers submit_workers;
struct btrfs_workers enospc_workers;
/*
* fixup workers take dirty pages that didn't properly go through
* the cow mechanism and make them safe to write. It happens
@ -943,6 +965,7 @@ struct btrfs_fs_info {
int do_barriers;
int closing;
int log_root_recovering;
int enospc_unlink;
u64 total_pinned;
@ -1012,6 +1035,9 @@ struct btrfs_root {
struct completion kobj_unregister;
struct mutex objectid_mutex;
spinlock_t accounting_lock;
struct btrfs_block_rsv *block_rsv;
struct mutex log_mutex;
wait_queue_head_t log_writer_wait;
wait_queue_head_t log_commit_wait[2];
@ -1043,7 +1069,6 @@ struct btrfs_root {
int ref_cows;
int track_dirty;
int in_radix;
int clean_orphans;
u64 defrag_trans_start;
struct btrfs_key defrag_progress;
@ -1057,8 +1082,11 @@ struct btrfs_root {
struct list_head root_list;
spinlock_t list_lock;
spinlock_t orphan_lock;
struct list_head orphan_list;
struct btrfs_block_rsv *orphan_block_rsv;
int orphan_item_inserted;
int orphan_cleanup_state;
spinlock_t inode_lock;
/* red-black tree that keeps track of in-memory inodes */
@ -1965,6 +1993,9 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_root *root, unsigned long count);
int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len);
int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytenr,
u64 num_bytes, u64 *refs, u64 *flags);
int btrfs_pin_extent(struct btrfs_root *root,
u64 bytenr, u64 num, int reserved);
int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
@ -1984,10 +2015,10 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
u64 parent, u64 root_objectid,
struct btrfs_disk_key *key, int level,
u64 hint, u64 empty_size);
int btrfs_free_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u32 blocksize,
u64 parent, u64 root_objectid, int level);
void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf,
u64 parent, int last_ref);
struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u32 blocksize,
@ -2041,27 +2072,49 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
u64 size);
int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 group_start);
int btrfs_prepare_block_group_relocation(struct btrfs_root *root,
struct btrfs_block_group_cache *group);
u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde);
void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items);
int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items);
int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root,
struct inode *inode, int num_items);
int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root,
struct inode *inode, int num_items);
int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
u64 bytes);
void btrfs_free_reserved_data_space(struct btrfs_root *root,
struct inode *inode, u64 bytes);
void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
u64 bytes);
void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
u64 bytes);
int btrfs_check_data_free_space(struct inode *inode, u64 bytes);
void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
int num_items, int *retries);
void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
struct inode *inode);
void btrfs_orphan_release_metadata(struct inode *inode);
int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans,
struct btrfs_pending_snapshot *pending);
int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes);
void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes);
int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes);
void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes);
void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv);
struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root);
void btrfs_free_block_rsv(struct btrfs_root *root,
struct btrfs_block_rsv *rsv);
void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *rsv);
int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 num_bytes, int *retries);
int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 min_reserved, int min_factor);
int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
struct btrfs_block_rsv *dst_rsv,
u64 num_bytes);
void btrfs_block_rsv_release(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 num_bytes);
int btrfs_set_block_group_ro(struct btrfs_root *root,
struct btrfs_block_group_cache *cache);
int btrfs_set_block_group_rw(struct btrfs_root *root,
struct btrfs_block_group_cache *cache);
/* ctree.c */
int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
int level, int *slot);
@ -2152,7 +2205,8 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf);
int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref);
int btrfs_drop_snapshot(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv, int update_ref);
int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *node,
@ -2245,6 +2299,12 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
const char *name, int name_len,
u64 inode_objectid, u64 ref_objectid, u64 *index);
struct btrfs_inode_ref *
btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
const char *name, int name_len,
u64 inode_objectid, u64 ref_objectid, int mod);
int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, u64 objectid);
@ -2257,6 +2317,8 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytenr, u64 len);
int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
struct bio *bio, u32 *dst);
int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
struct bio *bio, u64 logical_offset, u32 *dst);
int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 objectid, u64 pos,
@ -2311,6 +2373,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
u32 min_type);
int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput);
int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
struct extent_state **cached_state);
int btrfs_writepages(struct address_space *mapping,
@ -2349,10 +2412,20 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans,
int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode);
int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode);
void btrfs_orphan_cleanup(struct btrfs_root *root);
void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans,
struct btrfs_pending_snapshot *pending,
u64 *bytes_to_reserve);
void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans,
struct btrfs_pending_snapshot *pending);
void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_cont_expand(struct inode *inode, loff_t size);
int btrfs_invalidate_inodes(struct btrfs_root *root);
void btrfs_add_delayed_iput(struct inode *inode);
void btrfs_run_delayed_iputs(struct btrfs_root *root);
int btrfs_prealloc_file_range(struct inode *inode, int mode,
u64 start, u64 num_bytes, u64 min_size,
loff_t actual_len, u64 *alloc_hint);
extern const struct dentry_operations btrfs_dentry_operations;
/* ioctl.c */
@ -2409,4 +2482,12 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_recover_relocation(struct btrfs_root *root);
int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len);
void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *buf,
struct extent_buffer *cow);
void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans,
struct btrfs_pending_snapshot *pending,
u64 *bytes_to_reserve);
void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
struct btrfs_pending_snapshot *pending);
#endif

View file

@ -318,107 +318,6 @@ out:
return ret;
}
/*
* helper function to lookup reference count and flags of extent.
*
* the head node for delayed ref is used to store the sum of all the
* reference count modifications queued up in the rbtree. the head
* node may also store the extent flags to set. This way you can check
* to see what the reference count and extent flags would be if all of
* the delayed refs are not processed.
*/
int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytenr,
u64 num_bytes, u64 *refs, u64 *flags)
{
struct btrfs_delayed_ref_node *ref;
struct btrfs_delayed_ref_head *head;
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_path *path;
struct btrfs_extent_item *ei;
struct extent_buffer *leaf;
struct btrfs_key key;
u32 item_size;
u64 num_refs;
u64 extent_flags;
int ret;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
key.objectid = bytenr;
key.type = BTRFS_EXTENT_ITEM_KEY;
key.offset = num_bytes;
delayed_refs = &trans->transaction->delayed_refs;
again:
ret = btrfs_search_slot(trans, root->fs_info->extent_root,
&key, path, 0, 0);
if (ret < 0)
goto out;
if (ret == 0) {
leaf = path->nodes[0];
item_size = btrfs_item_size_nr(leaf, path->slots[0]);
if (item_size >= sizeof(*ei)) {
ei = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_extent_item);
num_refs = btrfs_extent_refs(leaf, ei);
extent_flags = btrfs_extent_flags(leaf, ei);
} else {
#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
struct btrfs_extent_item_v0 *ei0;
BUG_ON(item_size != sizeof(*ei0));
ei0 = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_extent_item_v0);
num_refs = btrfs_extent_refs_v0(leaf, ei0);
/* FIXME: this isn't correct for data */
extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
#else
BUG();
#endif
}
BUG_ON(num_refs == 0);
} else {
num_refs = 0;
extent_flags = 0;
ret = 0;
}
spin_lock(&delayed_refs->lock);
ref = find_ref_head(&delayed_refs->root, bytenr, NULL);
if (ref) {
head = btrfs_delayed_node_to_head(ref);
if (!mutex_trylock(&head->mutex)) {
atomic_inc(&ref->refs);
spin_unlock(&delayed_refs->lock);
btrfs_release_path(root->fs_info->extent_root, path);
mutex_lock(&head->mutex);
mutex_unlock(&head->mutex);
btrfs_put_delayed_ref(ref);
goto again;
}
if (head->extent_op && head->extent_op->update_flags)
extent_flags |= head->extent_op->flags_to_set;
else
BUG_ON(num_refs == 0);
num_refs += ref->ref_mod;
mutex_unlock(&head->mutex);
}
WARN_ON(num_refs == 0);
if (refs)
*refs = num_refs;
if (flags)
*flags = extent_flags;
out:
spin_unlock(&delayed_refs->lock);
btrfs_free_path(path);
return ret;
}
/*
* helper function to update an extent delayed ref in the
* rbtree. existing and update must both have the same

View file

@ -167,9 +167,6 @@ int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *
btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr);
int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr);
int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytenr,
u64 num_bytes, u64 *refs, u64 *flags);
int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, u64 orig_parent,
u64 parent, u64 orig_ref_root, u64 ref_root,

View file

@ -74,6 +74,11 @@ struct async_submit_bio {
int rw;
int mirror_num;
unsigned long bio_flags;
/*
* bio_offset is optional, can be used if the pages in the bio
* can't tell us where in the file the bio should go
*/
u64 bio_offset;
struct btrfs_work work;
};
@ -534,7 +539,8 @@ static void run_one_async_start(struct btrfs_work *work)
async = container_of(work, struct async_submit_bio, work);
fs_info = BTRFS_I(async->inode)->root->fs_info;
async->submit_bio_start(async->inode, async->rw, async->bio,
async->mirror_num, async->bio_flags);
async->mirror_num, async->bio_flags,
async->bio_offset);
}
static void run_one_async_done(struct btrfs_work *work)
@ -556,7 +562,8 @@ static void run_one_async_done(struct btrfs_work *work)
wake_up(&fs_info->async_submit_wait);
async->submit_bio_done(async->inode, async->rw, async->bio,
async->mirror_num, async->bio_flags);
async->mirror_num, async->bio_flags,
async->bio_offset);
}
static void run_one_async_free(struct btrfs_work *work)
@ -570,6 +577,7 @@ static void run_one_async_free(struct btrfs_work *work)
int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
int rw, struct bio *bio, int mirror_num,
unsigned long bio_flags,
u64 bio_offset,
extent_submit_bio_hook_t *submit_bio_start,
extent_submit_bio_hook_t *submit_bio_done)
{
@ -592,6 +600,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
async->work.flags = 0;
async->bio_flags = bio_flags;
async->bio_offset = bio_offset;
atomic_inc(&fs_info->nr_async_submits);
@ -627,7 +636,8 @@ static int btree_csum_one_bio(struct bio *bio)
static int __btree_submit_bio_start(struct inode *inode, int rw,
struct bio *bio, int mirror_num,
unsigned long bio_flags)
unsigned long bio_flags,
u64 bio_offset)
{
/*
* when we're called for a write, we're already in the async
@ -638,7 +648,8 @@ static int __btree_submit_bio_start(struct inode *inode, int rw,
}
static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
int mirror_num, unsigned long bio_flags)
int mirror_num, unsigned long bio_flags,
u64 bio_offset)
{
/*
* when we're called for a write, we're already in the async
@ -648,7 +659,8 @@ static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
}
static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
int mirror_num, unsigned long bio_flags)
int mirror_num, unsigned long bio_flags,
u64 bio_offset)
{
int ret;
@ -671,6 +683,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
*/
return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
inode, rw, bio, mirror_num, 0,
bio_offset,
__btree_submit_bio_start,
__btree_submit_bio_done);
}
@ -894,7 +907,8 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
root->ref_cows = 0;
root->track_dirty = 0;
root->in_radix = 0;
root->clean_orphans = 0;
root->orphan_item_inserted = 0;
root->orphan_cleanup_state = 0;
root->fs_info = fs_info;
root->objectid = objectid;
@ -903,13 +917,16 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
root->name = NULL;
root->in_sysfs = 0;
root->inode_tree = RB_ROOT;
root->block_rsv = NULL;
root->orphan_block_rsv = NULL;
INIT_LIST_HEAD(&root->dirty_list);
INIT_LIST_HEAD(&root->orphan_list);
INIT_LIST_HEAD(&root->root_list);
spin_lock_init(&root->node_lock);
spin_lock_init(&root->list_lock);
spin_lock_init(&root->orphan_lock);
spin_lock_init(&root->inode_lock);
spin_lock_init(&root->accounting_lock);
mutex_init(&root->objectid_mutex);
mutex_init(&root->log_mutex);
init_waitqueue_head(&root->log_writer_wait);
@ -968,42 +985,6 @@ static int find_and_setup_root(struct btrfs_root *tree_root,
return 0;
}
int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info)
{
struct extent_buffer *eb;
struct btrfs_root *log_root_tree = fs_info->log_root_tree;
u64 start = 0;
u64 end = 0;
int ret;
if (!log_root_tree)
return 0;
while (1) {
ret = find_first_extent_bit(&log_root_tree->dirty_log_pages,
0, &start, &end, EXTENT_DIRTY | EXTENT_NEW);
if (ret)
break;
clear_extent_bits(&log_root_tree->dirty_log_pages, start, end,
EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS);
}
eb = fs_info->log_root_tree->node;
WARN_ON(btrfs_header_level(eb) != 0);
WARN_ON(btrfs_header_nritems(eb) != 0);
ret = btrfs_free_reserved_extent(fs_info->tree_root,
eb->start, eb->len);
BUG_ON(ret);
free_extent_buffer(eb);
kfree(fs_info->log_root_tree);
fs_info->log_root_tree = NULL;
return 0;
}
static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info)
{
@ -1191,19 +1172,23 @@ again:
if (root)
return root;
ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid);
if (ret == 0)
ret = -ENOENT;
if (ret < 0)
return ERR_PTR(ret);
root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location);
if (IS_ERR(root))
return root;
WARN_ON(btrfs_root_refs(&root->root_item) == 0);
set_anon_super(&root->anon_super, NULL);
if (btrfs_root_refs(&root->root_item) == 0) {
ret = -ENOENT;
goto fail;
}
ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid);
if (ret < 0)
goto fail;
if (ret == 0)
root->orphan_item_inserted = 1;
ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
if (ret)
goto fail;
@ -1212,10 +1197,9 @@ again:
ret = radix_tree_insert(&fs_info->fs_roots_radix,
(unsigned long)root->root_key.objectid,
root);
if (ret == 0) {
if (ret == 0)
root->in_radix = 1;
root->clean_orphans = 1;
}
spin_unlock(&fs_info->fs_roots_radix_lock);
radix_tree_preload_end();
if (ret) {
@ -1461,10 +1445,6 @@ static int cleaner_kthread(void *arg)
struct btrfs_root *root = arg;
do {
smp_mb();
if (root->fs_info->closing)
break;
vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
if (!(root->fs_info->sb->s_flags & MS_RDONLY) &&
@ -1477,11 +1457,9 @@ static int cleaner_kthread(void *arg)
if (freezing(current)) {
refrigerator();
} else {
smp_mb();
if (root->fs_info->closing)
break;
set_current_state(TASK_INTERRUPTIBLE);
schedule();
if (!kthread_should_stop())
schedule();
__set_current_state(TASK_RUNNING);
}
} while (!kthread_should_stop());
@ -1493,36 +1471,40 @@ static int transaction_kthread(void *arg)
struct btrfs_root *root = arg;
struct btrfs_trans_handle *trans;
struct btrfs_transaction *cur;
u64 transid;
unsigned long now;
unsigned long delay;
int ret;
do {
smp_mb();
if (root->fs_info->closing)
break;
delay = HZ * 30;
vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
mutex_lock(&root->fs_info->transaction_kthread_mutex);
mutex_lock(&root->fs_info->trans_mutex);
spin_lock(&root->fs_info->new_trans_lock);
cur = root->fs_info->running_transaction;
if (!cur) {
mutex_unlock(&root->fs_info->trans_mutex);
spin_unlock(&root->fs_info->new_trans_lock);
goto sleep;
}
now = get_seconds();
if (now < cur->start_time || now - cur->start_time < 30) {
mutex_unlock(&root->fs_info->trans_mutex);
if (!cur->blocked &&
(now < cur->start_time || now - cur->start_time < 30)) {
spin_unlock(&root->fs_info->new_trans_lock);
delay = HZ * 5;
goto sleep;
}
mutex_unlock(&root->fs_info->trans_mutex);
trans = btrfs_start_transaction(root, 1);
ret = btrfs_commit_transaction(trans, root);
transid = cur->transid;
spin_unlock(&root->fs_info->new_trans_lock);
trans = btrfs_join_transaction(root, 1);
if (transid == trans->transid) {
ret = btrfs_commit_transaction(trans, root);
BUG_ON(ret);
} else {
btrfs_end_transaction(trans, root);
}
sleep:
wake_up_process(root->fs_info->cleaner_kthread);
mutex_unlock(&root->fs_info->transaction_kthread_mutex);
@ -1530,10 +1512,10 @@ sleep:
if (freezing(current)) {
refrigerator();
} else {
if (root->fs_info->closing)
break;
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(delay);
if (!kthread_should_stop() &&
!btrfs_transaction_blocked(root->fs_info))
schedule_timeout(delay);
__set_current_state(TASK_RUNNING);
}
} while (!kthread_should_stop());
@ -1620,6 +1602,13 @@ struct btrfs_root *open_ctree(struct super_block *sb,
INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
INIT_LIST_HEAD(&fs_info->space_info);
btrfs_mapping_init(&fs_info->mapping_tree);
btrfs_init_block_rsv(&fs_info->global_block_rsv);
btrfs_init_block_rsv(&fs_info->delalloc_block_rsv);
btrfs_init_block_rsv(&fs_info->trans_block_rsv);
btrfs_init_block_rsv(&fs_info->chunk_block_rsv);
btrfs_init_block_rsv(&fs_info->empty_block_rsv);
INIT_LIST_HEAD(&fs_info->durable_block_rsv_list);
mutex_init(&fs_info->durable_block_rsv_mutex);
atomic_set(&fs_info->nr_async_submits, 0);
atomic_set(&fs_info->async_delalloc_pages, 0);
atomic_set(&fs_info->async_submit_draining, 0);
@ -1759,9 +1748,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
min_t(u64, fs_devices->num_devices,
fs_info->thread_pool_size),
&fs_info->generic_worker);
btrfs_init_workers(&fs_info->enospc_workers, "enospc",
fs_info->thread_pool_size,
&fs_info->generic_worker);
/* a higher idle thresh on the submit workers makes it much more
* likely that bios will be send down in a sane order to the
@ -1809,7 +1795,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
btrfs_start_workers(&fs_info->endio_meta_workers, 1);
btrfs_start_workers(&fs_info->endio_meta_write_workers, 1);
btrfs_start_workers(&fs_info->endio_write_workers, 1);
btrfs_start_workers(&fs_info->enospc_workers, 1);
fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
@ -1912,17 +1897,18 @@ struct btrfs_root *open_ctree(struct super_block *sb,
csum_root->track_dirty = 1;
fs_info->generation = generation;
fs_info->last_trans_committed = generation;
fs_info->data_alloc_profile = (u64)-1;
fs_info->metadata_alloc_profile = (u64)-1;
fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;
ret = btrfs_read_block_groups(extent_root);
if (ret) {
printk(KERN_ERR "Failed to read block groups: %d\n", ret);
goto fail_block_groups;
}
fs_info->generation = generation;
fs_info->last_trans_committed = generation;
fs_info->data_alloc_profile = (u64)-1;
fs_info->metadata_alloc_profile = (u64)-1;
fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;
fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
"btrfs-cleaner");
if (IS_ERR(fs_info->cleaner_kthread))
@ -1977,6 +1963,9 @@ struct btrfs_root *open_ctree(struct super_block *sb,
BUG_ON(ret);
if (!(sb->s_flags & MS_RDONLY)) {
ret = btrfs_cleanup_fs_roots(fs_info);
BUG_ON(ret);
ret = btrfs_recover_relocation(tree_root);
if (ret < 0) {
printk(KERN_WARNING
@ -2040,7 +2029,6 @@ fail_sb_buffer:
btrfs_stop_workers(&fs_info->endio_meta_write_workers);
btrfs_stop_workers(&fs_info->endio_write_workers);
btrfs_stop_workers(&fs_info->submit_workers);
btrfs_stop_workers(&fs_info->enospc_workers);
fail_iput:
invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
iput(fs_info->btree_inode);
@ -2405,11 +2393,11 @@ int btrfs_commit_super(struct btrfs_root *root)
down_write(&root->fs_info->cleanup_work_sem);
up_write(&root->fs_info->cleanup_work_sem);
trans = btrfs_start_transaction(root, 1);
trans = btrfs_join_transaction(root, 1);
ret = btrfs_commit_transaction(trans, root);
BUG_ON(ret);
/* run commit again to drop the original snapshot */
trans = btrfs_start_transaction(root, 1);
trans = btrfs_join_transaction(root, 1);
btrfs_commit_transaction(trans, root);
ret = btrfs_write_and_wait_transaction(NULL, root);
BUG_ON(ret);
@ -2426,15 +2414,15 @@ int close_ctree(struct btrfs_root *root)
fs_info->closing = 1;
smp_mb();
kthread_stop(root->fs_info->transaction_kthread);
kthread_stop(root->fs_info->cleaner_kthread);
if (!(fs_info->sb->s_flags & MS_RDONLY)) {
ret = btrfs_commit_super(root);
if (ret)
printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
}
kthread_stop(root->fs_info->transaction_kthread);
kthread_stop(root->fs_info->cleaner_kthread);
fs_info->closing = 2;
smp_mb();
@ -2473,7 +2461,6 @@ int close_ctree(struct btrfs_root *root)
btrfs_stop_workers(&fs_info->endio_meta_write_workers);
btrfs_stop_workers(&fs_info->endio_write_workers);
btrfs_stop_workers(&fs_info->submit_workers);
btrfs_stop_workers(&fs_info->enospc_workers);
btrfs_close_devices(fs_info->fs_devices);
btrfs_mapping_tree_free(&fs_info->mapping_tree);

View file

@ -87,7 +87,7 @@ int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
int metadata);
int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
int rw, struct bio *bio, int mirror_num,
unsigned long bio_flags,
unsigned long bio_flags, u64 bio_offset,
extent_submit_bio_hook_t *submit_bio_start,
extent_submit_bio_hook_t *submit_bio_done);
@ -95,8 +95,6 @@ int btrfs_congested_async(struct btrfs_fs_info *info, int iodone);
unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info);
int btrfs_write_tree_block(struct extent_buffer *buf);
int btrfs_wait_tree_block_writeback(struct extent_buffer *buf);
int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_add_log_tree(struct btrfs_trans_handle *trans,

File diff suppressed because it is too large Load diff

View file

@ -135,7 +135,7 @@ static struct extent_state *alloc_extent_state(gfp_t mask)
return state;
}
static void free_extent_state(struct extent_state *state)
void free_extent_state(struct extent_state *state)
{
if (!state)
return;
@ -335,21 +335,18 @@ static int merge_state(struct extent_io_tree *tree,
}
static int set_state_cb(struct extent_io_tree *tree,
struct extent_state *state,
unsigned long bits)
struct extent_state *state, int *bits)
{
if (tree->ops && tree->ops->set_bit_hook) {
return tree->ops->set_bit_hook(tree->mapping->host,
state->start, state->end,
state->state, bits);
state, bits);
}
return 0;
}
static void clear_state_cb(struct extent_io_tree *tree,
struct extent_state *state,
unsigned long bits)
struct extent_state *state, int *bits)
{
if (tree->ops && tree->ops->clear_bit_hook)
tree->ops->clear_bit_hook(tree->mapping->host, state, bits);
@ -367,9 +364,10 @@ static void clear_state_cb(struct extent_io_tree *tree,
*/
static int insert_state(struct extent_io_tree *tree,
struct extent_state *state, u64 start, u64 end,
int bits)
int *bits)
{
struct rb_node *node;
int bits_to_set = *bits & ~EXTENT_CTLBITS;
int ret;
if (end < start) {
@ -384,9 +382,9 @@ static int insert_state(struct extent_io_tree *tree,
if (ret)
return ret;
if (bits & EXTENT_DIRTY)
if (bits_to_set & EXTENT_DIRTY)
tree->dirty_bytes += end - start + 1;
state->state |= bits;
state->state |= bits_to_set;
node = tree_insert(&tree->state, end, &state->rb_node);
if (node) {
struct extent_state *found;
@ -456,13 +454,13 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
* struct is freed and removed from the tree
*/
static int clear_state_bit(struct extent_io_tree *tree,
struct extent_state *state, int bits, int wake,
int delete)
struct extent_state *state,
int *bits, int wake)
{
int bits_to_clear = bits & ~EXTENT_DO_ACCOUNTING;
int bits_to_clear = *bits & ~EXTENT_CTLBITS;
int ret = state->state & bits_to_clear;
if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
u64 range = state->end - state->start + 1;
WARN_ON(range > tree->dirty_bytes);
tree->dirty_bytes -= range;
@ -471,9 +469,8 @@ static int clear_state_bit(struct extent_io_tree *tree,
state->state &= ~bits_to_clear;
if (wake)
wake_up(&state->wq);
if (delete || state->state == 0) {
if (state->state == 0) {
if (state->tree) {
clear_state_cb(tree, state, state->state);
rb_erase(&state->rb_node, &tree->state);
state->tree = NULL;
free_extent_state(state);
@ -514,6 +511,10 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
int set = 0;
int clear = 0;
if (delete)
bits |= ~EXTENT_CTLBITS;
bits |= EXTENT_FIRST_DELALLOC;
if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY))
clear = 1;
again:
@ -580,8 +581,7 @@ hit_next:
if (err)
goto out;
if (state->end <= end) {
set |= clear_state_bit(tree, state, bits, wake,
delete);
set |= clear_state_bit(tree, state, &bits, wake);
if (last_end == (u64)-1)
goto out;
start = last_end + 1;
@ -602,7 +602,7 @@ hit_next:
if (wake)
wake_up(&state->wq);
set |= clear_state_bit(tree, prealloc, bits, wake, delete);
set |= clear_state_bit(tree, prealloc, &bits, wake);
prealloc = NULL;
goto out;
@ -613,7 +613,7 @@ hit_next:
else
next_node = NULL;
set |= clear_state_bit(tree, state, bits, wake, delete);
set |= clear_state_bit(tree, state, &bits, wake);
if (last_end == (u64)-1)
goto out;
start = last_end + 1;
@ -706,19 +706,19 @@ out:
static int set_state_bits(struct extent_io_tree *tree,
struct extent_state *state,
int bits)
int *bits)
{
int ret;
int bits_to_set = *bits & ~EXTENT_CTLBITS;
ret = set_state_cb(tree, state, bits);
if (ret)
return ret;
if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
u64 range = state->end - state->start + 1;
tree->dirty_bytes += range;
}
state->state |= bits;
state->state |= bits_to_set;
return 0;
}
@ -745,10 +745,9 @@ static void cache_state(struct extent_state *state,
* [start, end] is inclusive This takes the tree lock.
*/
static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
int bits, int exclusive_bits, u64 *failed_start,
struct extent_state **cached_state,
gfp_t mask)
int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
int bits, int exclusive_bits, u64 *failed_start,
struct extent_state **cached_state, gfp_t mask)
{
struct extent_state *state;
struct extent_state *prealloc = NULL;
@ -757,6 +756,7 @@ static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
u64 last_start;
u64 last_end;
bits |= EXTENT_FIRST_DELALLOC;
again:
if (!prealloc && (mask & __GFP_WAIT)) {
prealloc = alloc_extent_state(mask);
@ -778,7 +778,7 @@ again:
*/
node = tree_search(tree, start);
if (!node) {
err = insert_state(tree, prealloc, start, end, bits);
err = insert_state(tree, prealloc, start, end, &bits);
prealloc = NULL;
BUG_ON(err == -EEXIST);
goto out;
@ -802,7 +802,7 @@ hit_next:
goto out;
}
err = set_state_bits(tree, state, bits);
err = set_state_bits(tree, state, &bits);
if (err)
goto out;
@ -852,7 +852,7 @@ hit_next:
if (err)
goto out;
if (state->end <= end) {
err = set_state_bits(tree, state, bits);
err = set_state_bits(tree, state, &bits);
if (err)
goto out;
cache_state(state, cached_state);
@ -877,7 +877,7 @@ hit_next:
else
this_end = last_start - 1;
err = insert_state(tree, prealloc, start, this_end,
bits);
&bits);
BUG_ON(err == -EEXIST);
if (err) {
prealloc = NULL;
@ -903,7 +903,7 @@ hit_next:
err = split_state(tree, state, prealloc, end + 1);
BUG_ON(err == -EEXIST);
err = set_state_bits(tree, prealloc, bits);
err = set_state_bits(tree, prealloc, &bits);
if (err) {
prealloc = NULL;
goto out;
@ -966,8 +966,7 @@ int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
{
return clear_extent_bit(tree, start, end,
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING, 0, 0,
NULL, mask);
EXTENT_DO_ACCOUNTING, 0, 0, NULL, mask);
}
int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
@ -1435,9 +1434,6 @@ int extent_clear_unlock_delalloc(struct inode *inode,
if (op & EXTENT_CLEAR_DELALLOC)
clear_bits |= EXTENT_DELALLOC;
if (op & EXTENT_CLEAR_ACCOUNTING)
clear_bits |= EXTENT_DO_ACCOUNTING;
clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS);
if (!(op & (EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY |
EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK |
@ -1916,7 +1912,7 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num,
if (tree->ops && tree->ops->submit_bio_hook)
tree->ops->submit_bio_hook(page->mapping->host, rw, bio,
mirror_num, bio_flags);
mirror_num, bio_flags, start);
else
submit_bio(rw, bio);
if (bio_flagged(bio, BIO_EOPNOTSUPP))
@ -2020,6 +2016,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
sector_t sector;
struct extent_map *em;
struct block_device *bdev;
struct btrfs_ordered_extent *ordered;
int ret;
int nr = 0;
size_t page_offset = 0;
@ -2031,7 +2028,15 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
set_page_extent_mapped(page);
end = page_end;
lock_extent(tree, start, end, GFP_NOFS);
while (1) {
lock_extent(tree, start, end, GFP_NOFS);
ordered = btrfs_lookup_ordered_extent(inode, start);
if (!ordered)
break;
unlock_extent(tree, start, end, GFP_NOFS);
btrfs_start_ordered_extent(inode, ordered, 1);
btrfs_put_ordered_extent(ordered);
}
if (page->index == last_byte >> PAGE_CACHE_SHIFT) {
char *userpage;

View file

@ -16,7 +16,9 @@
#define EXTENT_BOUNDARY (1 << 9)
#define EXTENT_NODATASUM (1 << 10)
#define EXTENT_DO_ACCOUNTING (1 << 11)
#define EXTENT_FIRST_DELALLOC (1 << 12)
#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
/* flags for bio submission */
#define EXTENT_BIO_COMPRESSED 1
@ -47,7 +49,7 @@ struct extent_state;
typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw,
struct bio *bio, int mirror_num,
unsigned long bio_flags);
unsigned long bio_flags, u64 bio_offset);
struct extent_io_ops {
int (*fill_delalloc)(struct inode *inode, struct page *locked_page,
u64 start, u64 end, int *page_started,
@ -69,10 +71,10 @@ struct extent_io_ops {
struct extent_state *state);
int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
struct extent_state *state, int uptodate);
int (*set_bit_hook)(struct inode *inode, u64 start, u64 end,
unsigned long old, unsigned long bits);
int (*set_bit_hook)(struct inode *inode, struct extent_state *state,
int *bits);
int (*clear_bit_hook)(struct inode *inode, struct extent_state *state,
unsigned long bits);
int *bits);
int (*merge_extent_hook)(struct inode *inode,
struct extent_state *new,
struct extent_state *other);
@ -176,6 +178,7 @@ u64 count_range_bits(struct extent_io_tree *tree,
u64 *start, u64 search_end,
u64 max_bytes, unsigned long bits);
void free_extent_state(struct extent_state *state);
int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
int bits, int filled, struct extent_state *cached_state);
int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
@ -185,6 +188,9 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask);
int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
int bits, gfp_t mask);
int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
int bits, int exclusive_bits, u64 *failed_start,
struct extent_state **cached_state, gfp_t mask);
int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask);
int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,

View file

@ -149,13 +149,14 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
}
int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
struct bio *bio, u32 *dst)
static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
struct inode *inode, struct bio *bio,
u64 logical_offset, u32 *dst, int dio)
{
u32 sum;
struct bio_vec *bvec = bio->bi_io_vec;
int bio_index = 0;
u64 offset;
u64 offset = 0;
u64 item_start_offset = 0;
u64 item_last_offset = 0;
u64 disk_bytenr;
@ -174,8 +175,11 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
WARN_ON(bio->bi_vcnt <= 0);
disk_bytenr = (u64)bio->bi_sector << 9;
if (dio)
offset = logical_offset;
while (bio_index < bio->bi_vcnt) {
offset = page_offset(bvec->bv_page) + bvec->bv_offset;
if (!dio)
offset = page_offset(bvec->bv_page) + bvec->bv_offset;
ret = btrfs_find_ordered_sum(inode, offset, disk_bytenr, &sum);
if (ret == 0)
goto found;
@ -238,6 +242,7 @@ found:
else
set_state_private(io_tree, offset, sum);
disk_bytenr += bvec->bv_len;
offset += bvec->bv_len;
bio_index++;
bvec++;
}
@ -245,6 +250,18 @@ found:
return 0;
}
int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
struct bio *bio, u32 *dst)
{
return __btrfs_lookup_bio_sums(root, inode, bio, 0, dst, 0);
}
int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
struct bio *bio, u64 offset, u32 *dst)
{
return __btrfs_lookup_bio_sums(root, inode, bio, offset, dst, 1);
}
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
struct list_head *list)
{
@ -657,6 +674,9 @@ again:
goto found;
}
ret = PTR_ERR(item);
if (ret != -EFBIG && ret != -ENOENT)
goto fail_unlock;
if (ret == -EFBIG) {
u32 item_size;
/* we found one, but it isn't big enough yet */

View file

@ -46,32 +46,42 @@
static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
int write_bytes,
struct page **prepared_pages,
const char __user *buf)
struct iov_iter *i)
{
long page_fault = 0;
int i;
size_t copied;
int pg = 0;
int offset = pos & (PAGE_CACHE_SIZE - 1);
for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) {
while (write_bytes > 0) {
size_t count = min_t(size_t,
PAGE_CACHE_SIZE - offset, write_bytes);
struct page *page = prepared_pages[i];
fault_in_pages_readable(buf, count);
struct page *page = prepared_pages[pg];
again:
if (unlikely(iov_iter_fault_in_readable(i, count)))
return -EFAULT;
/* Copy data from userspace to the current page */
kmap(page);
page_fault = __copy_from_user(page_address(page) + offset,
buf, count);
copied = iov_iter_copy_from_user(page, i, offset, count);
/* Flush processor's dcache for this page */
flush_dcache_page(page);
kunmap(page);
buf += count;
write_bytes -= count;
iov_iter_advance(i, copied);
write_bytes -= copied;
if (page_fault)
break;
if (unlikely(copied == 0)) {
count = min_t(size_t, PAGE_CACHE_SIZE - offset,
iov_iter_single_seg_count(i));
goto again;
}
if (unlikely(copied < PAGE_CACHE_SIZE - offset)) {
offset += copied;
} else {
pg++;
offset = 0;
}
}
return page_fault ? -EFAULT : 0;
return 0;
}
/*
@ -126,8 +136,7 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
end_of_last_block = start_pos + num_bytes - 1;
err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
NULL);
if (err)
return err;
BUG_ON(err);
for (i = 0; i < num_pages; i++) {
struct page *p = pages[i];
@ -142,7 +151,7 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
* at this time.
*/
}
return err;
return 0;
}
/*
@ -823,45 +832,46 @@ again:
return 0;
}
static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
loff_t pos;
struct file *file = iocb->ki_filp;
struct inode *inode = fdentry(file)->d_inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct page *pinned[2];
struct page **pages = NULL;
struct iov_iter i;
loff_t *ppos = &iocb->ki_pos;
loff_t start_pos;
ssize_t num_written = 0;
ssize_t err = 0;
size_t count;
size_t ocount;
int ret = 0;
struct inode *inode = fdentry(file)->d_inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct page **pages = NULL;
int nrptrs;
struct page *pinned[2];
unsigned long first_index;
unsigned long last_index;
int will_write;
int buffered = 0;
will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
(file->f_flags & O_DIRECT));
nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE,
PAGE_CACHE_SIZE / (sizeof(struct page *)));
pinned[0] = NULL;
pinned[1] = NULL;
pos = *ppos;
start_pos = pos;
vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
/* do the reserve before the mutex lock in case we have to do some
* flushing. We wouldn't deadlock, but this is more polite.
*/
err = btrfs_reserve_metadata_for_delalloc(root, inode, 1);
if (err)
goto out_nolock;
mutex_lock(&inode->i_mutex);
err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
if (err)
goto out;
count = ocount;
current->backing_dev_info = inode->i_mapping->backing_dev_info;
err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
if (err)
@ -875,15 +885,53 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
goto out;
file_update_time(file);
BTRFS_I(inode)->sequence++;
if (unlikely(file->f_flags & O_DIRECT)) {
num_written = generic_file_direct_write(iocb, iov, &nr_segs,
pos, ppos, count,
ocount);
/*
* the generic O_DIRECT will update in-memory i_size after the
* DIOs are done. But our endio handlers that update the on
* disk i_size never update past the in memory i_size. So we
* need one more update here to catch any additions to the
* file
*/
if (inode->i_size != BTRFS_I(inode)->disk_i_size) {
btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
mark_inode_dirty(inode);
}
if (num_written < 0) {
ret = num_written;
num_written = 0;
goto out;
} else if (num_written == count) {
/* pick up pos changes done by the generic code */
pos = *ppos;
goto out;
}
/*
* We are going to do buffered for the rest of the range, so we
* need to make sure to invalidate the buffered pages when we're
* done.
*/
buffered = 1;
pos += num_written;
}
iov_iter_init(&i, iov, nr_segs, count, num_written);
nrptrs = min((iov_iter_count(&i) + PAGE_CACHE_SIZE - 1) /
PAGE_CACHE_SIZE, PAGE_CACHE_SIZE /
(sizeof(struct page *)));
pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
/* generic_write_checks can change our pos */
start_pos = pos;
BTRFS_I(inode)->sequence++;
first_index = pos >> PAGE_CACHE_SHIFT;
last_index = (pos + count) >> PAGE_CACHE_SHIFT;
last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT;
/*
* there are lots of better ways to do this, but this code
@ -900,7 +948,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
unlock_page(pinned[0]);
}
}
if ((pos + count) & (PAGE_CACHE_SIZE - 1)) {
if ((pos + iov_iter_count(&i)) & (PAGE_CACHE_SIZE - 1)) {
pinned[1] = grab_cache_page(inode->i_mapping, last_index);
if (!PageUptodate(pinned[1])) {
ret = btrfs_readpage(NULL, pinned[1]);
@ -911,10 +959,10 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
}
}
while (count > 0) {
while (iov_iter_count(&i) > 0) {
size_t offset = pos & (PAGE_CACHE_SIZE - 1);
size_t write_bytes = min(count, nrptrs *
(size_t)PAGE_CACHE_SIZE -
size_t write_bytes = min(iov_iter_count(&i),
nrptrs * (size_t)PAGE_CACHE_SIZE -
offset);
size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
PAGE_CACHE_SHIFT;
@ -922,7 +970,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
WARN_ON(num_pages > nrptrs);
memset(pages, 0, sizeof(struct page *) * nrptrs);
ret = btrfs_check_data_free_space(root, inode, write_bytes);
ret = btrfs_delalloc_reserve_space(inode, write_bytes);
if (ret)
goto out;
@ -930,26 +978,20 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
pos, first_index, last_index,
write_bytes);
if (ret) {
btrfs_free_reserved_data_space(root, inode,
write_bytes);
btrfs_delalloc_release_space(inode, write_bytes);
goto out;
}
ret = btrfs_copy_from_user(pos, num_pages,
write_bytes, pages, buf);
if (ret) {
btrfs_free_reserved_data_space(root, inode,
write_bytes);
btrfs_drop_pages(pages, num_pages);
goto out;
write_bytes, pages, &i);
if (ret == 0) {
dirty_and_release_pages(NULL, root, file, pages,
num_pages, pos, write_bytes);
}
ret = dirty_and_release_pages(NULL, root, file, pages,
num_pages, pos, write_bytes);
btrfs_drop_pages(pages, num_pages);
if (ret) {
btrfs_free_reserved_data_space(root, inode,
write_bytes);
btrfs_delalloc_release_space(inode, write_bytes);
goto out;
}
@ -965,8 +1007,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
btrfs_throttle(root);
}
buf += write_bytes;
count -= write_bytes;
pos += write_bytes;
num_written += write_bytes;
@ -976,9 +1016,7 @@ out:
mutex_unlock(&inode->i_mutex);
if (ret)
err = ret;
btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
out_nolock:
kfree(pages);
if (pinned[0])
page_cache_release(pinned[0]);
@ -1008,7 +1046,7 @@ out_nolock:
num_written = err;
if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
trans = btrfs_start_transaction(root, 1);
trans = btrfs_start_transaction(root, 0);
ret = btrfs_log_dentry_safe(trans, root,
file->f_dentry);
if (ret == 0) {
@ -1023,7 +1061,7 @@ out_nolock:
btrfs_end_transaction(trans, root);
}
}
if (file->f_flags & O_DIRECT) {
if (file->f_flags & O_DIRECT && buffered) {
invalidate_mapping_pages(inode->i_mapping,
start_pos >> PAGE_CACHE_SHIFT,
(start_pos + num_written - 1) >> PAGE_CACHE_SHIFT);
@ -1104,9 +1142,9 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
if (file && file->private_data)
btrfs_ioctl_trans_end(file);
trans = btrfs_start_transaction(root, 1);
if (!trans) {
ret = -ENOMEM;
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out;
}
@ -1161,7 +1199,7 @@ const struct file_operations btrfs_file_operations = {
.read = do_sync_read,
.aio_read = generic_file_aio_read,
.splice_read = generic_file_splice_read,
.write = btrfs_file_write,
.aio_write = btrfs_file_aio_write,
.mmap = btrfs_file_mmap,
.open = generic_file_open,
.release = btrfs_release_file,

View file

@ -49,6 +49,33 @@ static int find_name_in_backref(struct btrfs_path *path, const char *name,
return 0;
}
struct btrfs_inode_ref *
btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
const char *name, int name_len,
u64 inode_objectid, u64 ref_objectid, int mod)
{
struct btrfs_key key;
struct btrfs_inode_ref *ref;
int ins_len = mod < 0 ? -1 : 0;
int cow = mod != 0;
int ret;
key.objectid = inode_objectid;
key.type = BTRFS_INODE_REF_KEY;
key.offset = ref_objectid;
ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
if (ret < 0)
return ERR_PTR(ret);
if (ret > 0)
return NULL;
if (!find_name_in_backref(path, name, name_len, &ref))
return NULL;
return ref;
}
int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
const char *name, int name_len,

File diff suppressed because it is too large Load diff

View file

@ -239,23 +239,19 @@ static noinline int create_subvol(struct btrfs_root *root,
u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
u64 index = 0;
ret = btrfs_find_free_objectid(NULL, root->fs_info->tree_root,
0, &objectid);
if (ret)
return ret;
/*
* 1 - inode item
* 2 - refs
* 1 - root item
* 2 - dir items
*/
ret = btrfs_reserve_metadata_space(root, 6);
if (ret)
return ret;
trans = btrfs_start_transaction(root, 1);
BUG_ON(!trans);
ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
0, &objectid);
if (ret)
goto fail;
trans = btrfs_start_transaction(root, 6);
if (IS_ERR(trans))
return PTR_ERR(trans);
leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
0, objectid, NULL, 0, 0, 0);
@ -345,13 +341,10 @@ fail:
err = btrfs_commit_transaction(trans, root);
if (err && !ret)
ret = err;
btrfs_unreserve_metadata_space(root, 6);
return ret;
}
static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
char *name, int namelen)
static int create_snapshot(struct btrfs_root *root, struct dentry *dentry)
{
struct inode *inode;
struct btrfs_pending_snapshot *pending_snapshot;
@ -361,40 +354,33 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
if (!root->ref_cows)
return -EINVAL;
/*
* 1 - inode item
* 2 - refs
* 1 - root item
* 2 - dir items
*/
ret = btrfs_reserve_metadata_space(root, 6);
pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS);
if (!pending_snapshot)
return -ENOMEM;
btrfs_init_block_rsv(&pending_snapshot->block_rsv);
pending_snapshot->dentry = dentry;
pending_snapshot->root = root;
trans = btrfs_start_transaction(root->fs_info->extent_root, 5);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto fail;
}
ret = btrfs_snap_reserve_metadata(trans, pending_snapshot);
BUG_ON(ret);
list_add(&pending_snapshot->list,
&trans->transaction->pending_snapshots);
ret = btrfs_commit_transaction(trans, root->fs_info->extent_root);
BUG_ON(ret);
ret = pending_snapshot->error;
if (ret)
goto fail;
pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS);
if (!pending_snapshot) {
ret = -ENOMEM;
btrfs_unreserve_metadata_space(root, 6);
goto fail;
}
pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS);
if (!pending_snapshot->name) {
ret = -ENOMEM;
kfree(pending_snapshot);
btrfs_unreserve_metadata_space(root, 6);
goto fail;
}
memcpy(pending_snapshot->name, name, namelen);
pending_snapshot->name[namelen] = '\0';
pending_snapshot->dentry = dentry;
trans = btrfs_start_transaction(root, 1);
BUG_ON(!trans);
pending_snapshot->root = root;
list_add(&pending_snapshot->list,
&trans->transaction->pending_snapshots);
ret = btrfs_commit_transaction(trans, root);
BUG_ON(ret);
btrfs_unreserve_metadata_space(root, 6);
btrfs_orphan_cleanup(pending_snapshot->snap);
inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry);
if (IS_ERR(inode)) {
@ -405,6 +391,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
d_instantiate(dentry, inode);
ret = 0;
fail:
kfree(pending_snapshot);
return ret;
}
@ -456,8 +443,7 @@ static noinline int btrfs_mksubvol(struct path *parent,
goto out_up_read;
if (snap_src) {
error = create_snapshot(snap_src, dentry,
name, namelen);
error = create_snapshot(snap_src, dentry);
} else {
error = create_subvol(BTRFS_I(dir)->root, dentry,
name, namelen);
@ -601,19 +587,9 @@ static int btrfs_defrag_file(struct file *file,
if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)
BTRFS_I(inode)->force_compress = 1;
ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE);
if (ret) {
ret = -ENOSPC;
break;
}
ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1);
if (ret) {
btrfs_free_reserved_data_space(root, inode,
PAGE_CACHE_SIZE);
ret = -ENOSPC;
break;
}
ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
if (ret)
goto err_unlock;
again:
if (inode->i_size == 0 ||
i > ((inode->i_size - 1) >> PAGE_CACHE_SHIFT)) {
@ -622,8 +598,10 @@ again:
}
page = grab_cache_page(inode->i_mapping, i);
if (!page)
if (!page) {
ret = -ENOMEM;
goto err_reservations;
}
if (!PageUptodate(page)) {
btrfs_readpage(NULL, page);
@ -631,6 +609,7 @@ again:
if (!PageUptodate(page)) {
unlock_page(page);
page_cache_release(page);
ret = -EIO;
goto err_reservations;
}
}
@ -644,8 +623,7 @@ again:
wait_on_page_writeback(page);
if (PageDirty(page)) {
btrfs_free_reserved_data_space(root, inode,
PAGE_CACHE_SIZE);
btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
goto loop_unlock;
}
@ -683,7 +661,6 @@ loop_unlock:
page_cache_release(page);
mutex_unlock(&inode->i_mutex);
btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1);
i++;
}
@ -713,9 +690,9 @@ loop_unlock:
return 0;
err_reservations:
btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
err_unlock:
mutex_unlock(&inode->i_mutex);
btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
return ret;
}
@ -811,7 +788,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
device->name, (unsigned long long)new_size);
if (new_size > old_size) {
trans = btrfs_start_transaction(root, 1);
trans = btrfs_start_transaction(root, 0);
ret = btrfs_grow_device(trans, device, new_size);
btrfs_commit_transaction(trans, root);
} else {
@ -1300,7 +1277,13 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
if (err)
goto out_up_write;
trans = btrfs_start_transaction(root, 1);
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
goto out;
}
trans->block_rsv = &root->fs_info->global_block_rsv;
ret = btrfs_unlink_subvol(trans, root, dir,
dest->root_key.objectid,
dentry->d_name.name,
@ -1314,10 +1297,12 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
dest->root_item.drop_level = 0;
btrfs_set_root_refs(&dest->root_item, 0);
ret = btrfs_insert_orphan_item(trans,
root->fs_info->tree_root,
dest->root_key.objectid);
BUG_ON(ret);
if (!xchg(&dest->orphan_item_inserted, 1)) {
ret = btrfs_insert_orphan_item(trans,
root->fs_info->tree_root,
dest->root_key.objectid);
BUG_ON(ret);
}
ret = btrfs_commit_transaction(trans, root);
BUG_ON(ret);
@ -1358,8 +1343,10 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
ret = -EPERM;
goto out;
}
btrfs_defrag_root(root, 0);
btrfs_defrag_root(root->fs_info->extent_root, 0);
ret = btrfs_defrag_root(root, 0);
if (ret)
goto out;
ret = btrfs_defrag_root(root->fs_info->extent_root, 0);
break;
case S_IFREG:
if (!(file->f_mode & FMODE_WRITE)) {
@ -1389,9 +1376,11 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
/* the rest are all set to zero by kzalloc */
range->len = (u64)-1;
}
btrfs_defrag_file(file, range);
ret = btrfs_defrag_file(file, range);
kfree(range);
break;
default:
ret = -EINVAL;
}
out:
mnt_drop_write(file->f_path.mnt);
@ -1550,12 +1539,6 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
btrfs_wait_ordered_range(src, off, off+len);
}
trans = btrfs_start_transaction(root, 1);
BUG_ON(!trans);
/* punch hole in destination first */
btrfs_drop_extents(trans, inode, off, off + len, &hint_byte, 1);
/* clone data */
key.objectid = src->i_ino;
key.type = BTRFS_EXTENT_DATA_KEY;
@ -1566,7 +1549,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
* note the key will change type as we walk through the
* tree.
*/
ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
goto out;
@ -1629,12 +1612,31 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
new_key.objectid = inode->i_ino;
new_key.offset = key.offset + destoff - off;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out;
}
if (type == BTRFS_FILE_EXTENT_REG ||
type == BTRFS_FILE_EXTENT_PREALLOC) {
if (off > key.offset) {
datao += off - key.offset;
datal -= off - key.offset;
}
if (key.offset + datal > off + len)
datal = off + len - key.offset;
ret = btrfs_drop_extents(trans, inode,
new_key.offset,
new_key.offset + datal,
&hint_byte, 1);
BUG_ON(ret);
ret = btrfs_insert_empty_item(trans, root, path,
&new_key, size);
if (ret)
goto out;
BUG_ON(ret);
leaf = path->nodes[0];
slot = path->slots[0];
@ -1645,14 +1647,6 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
extent = btrfs_item_ptr(leaf, slot,
struct btrfs_file_extent_item);
if (off > key.offset) {
datao += off - key.offset;
datal -= off - key.offset;
}
if (key.offset + datal > off + len)
datal = off + len - key.offset;
/* disko == 0 means it's a hole */
if (!disko)
datao = 0;
@ -1683,14 +1677,21 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
if (comp && (skip || trim)) {
ret = -EINVAL;
btrfs_end_transaction(trans, root);
goto out;
}
size -= skip + trim;
datal -= skip + trim;
ret = btrfs_drop_extents(trans, inode,
new_key.offset,
new_key.offset + datal,
&hint_byte, 1);
BUG_ON(ret);
ret = btrfs_insert_empty_item(trans, root, path,
&new_key, size);
if (ret)
goto out;
BUG_ON(ret);
if (skip) {
u32 start =
@ -1708,8 +1709,17 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
}
btrfs_mark_buffer_dirty(leaf);
}
btrfs_release_path(root, path);
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
if (new_key.offset + datal > inode->i_size)
btrfs_i_size_write(inode,
new_key.offset + datal);
BTRFS_I(inode)->flags = BTRFS_I(src)->flags;
ret = btrfs_update_inode(trans, root, inode);
BUG_ON(ret);
btrfs_end_transaction(trans, root);
}
next:
btrfs_release_path(root, path);
key.offset++;
@ -1717,17 +1727,7 @@ next:
ret = 0;
out:
btrfs_release_path(root, path);
if (ret == 0) {
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
if (destoff + olen > inode->i_size)
btrfs_i_size_write(inode, destoff + olen);
BTRFS_I(inode)->flags = BTRFS_I(src)->flags;
ret = btrfs_update_inode(trans, root, inode);
}
btrfs_end_transaction(trans, root);
unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
if (ret)
vmtruncate(inode, 0);
out_unlock:
mutex_unlock(&src->i_mutex);
mutex_unlock(&inode->i_mutex);

View file

@ -124,6 +124,15 @@ static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset)
return 1;
}
static int range_overlaps(struct btrfs_ordered_extent *entry, u64 file_offset,
u64 len)
{
if (file_offset + len <= entry->file_offset ||
entry->file_offset + entry->len <= file_offset)
return 0;
return 1;
}
/*
* look find the first ordered struct that has this offset, otherwise
* the first one less than this offset
@ -161,8 +170,9 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
* The tree is given a single reference on the ordered extent that was
* inserted.
*/
int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
u64 start, u64 len, u64 disk_len, int type)
static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
u64 start, u64 len, u64 disk_len,
int type, int dio)
{
struct btrfs_ordered_inode_tree *tree;
struct rb_node *node;
@ -182,6 +192,9 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE)
set_bit(type, &entry->flags);
if (dio)
set_bit(BTRFS_ORDERED_DIRECT, &entry->flags);
/* one ref for the tree */
atomic_set(&entry->refs, 1);
init_waitqueue_head(&entry->wait);
@ -203,6 +216,20 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
return 0;
}
int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
u64 start, u64 len, u64 disk_len, int type)
{
return __btrfs_add_ordered_extent(inode, file_offset, start, len,
disk_len, type, 0);
}
int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
u64 start, u64 len, u64 disk_len, int type)
{
return __btrfs_add_ordered_extent(inode, file_offset, start, len,
disk_len, type, 1);
}
/*
* Add a struct btrfs_ordered_sum into the list of checksums to be inserted
* when an ordered extent is finished. If the list covers more than one
@ -311,13 +338,6 @@ static int __btrfs_remove_ordered_extent(struct inode *inode,
tree->last = NULL;
set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
spin_lock(&BTRFS_I(inode)->accounting_lock);
WARN_ON(!BTRFS_I(inode)->outstanding_extents);
BTRFS_I(inode)->outstanding_extents--;
spin_unlock(&BTRFS_I(inode)->accounting_lock);
btrfs_unreserve_metadata_for_delalloc(BTRFS_I(inode)->root,
inode, 1);
spin_lock(&root->fs_info->ordered_extent_lock);
list_del_init(&entry->root_extent_list);
@ -491,7 +511,8 @@ void btrfs_start_ordered_extent(struct inode *inode,
* start IO on any dirty ones so the wait doesn't stall waiting
* for pdflush to find them
*/
filemap_fdatawrite_range(inode->i_mapping, start, end);
if (!test_bit(BTRFS_ORDERED_DIRECT, &entry->flags))
filemap_fdatawrite_range(inode->i_mapping, start, end);
if (wait) {
wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE,
&entry->flags));
@ -588,6 +609,47 @@ out:
return entry;
}
/* Since the DIO code tries to lock a wide area we need to look for any ordered
* extents that exist in the range, rather than just the start of the range.
*/
struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
u64 file_offset,
u64 len)
{
struct btrfs_ordered_inode_tree *tree;
struct rb_node *node;
struct btrfs_ordered_extent *entry = NULL;
tree = &BTRFS_I(inode)->ordered_tree;
spin_lock(&tree->lock);
node = tree_search(tree, file_offset);
if (!node) {
node = tree_search(tree, file_offset + len);
if (!node)
goto out;
}
while (1) {
entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
if (range_overlaps(entry, file_offset, len))
break;
if (entry->file_offset >= file_offset + len) {
entry = NULL;
break;
}
entry = NULL;
node = rb_next(node);
if (!node)
break;
}
out:
if (entry)
atomic_inc(&entry->refs);
spin_unlock(&tree->lock);
return entry;
}
/*
* lookup and return any extent before 'file_offset'. NULL is returned
* if none is found

View file

@ -72,6 +72,8 @@ struct btrfs_ordered_sum {
#define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */
#define BTRFS_ORDERED_DIRECT 5 /* set when we're doing DIO with this extent */
struct btrfs_ordered_extent {
/* logical offset in the file */
u64 file_offset;
@ -140,7 +142,9 @@ int btrfs_dec_test_ordered_pending(struct inode *inode,
struct btrfs_ordered_extent **cached,
u64 file_offset, u64 io_size);
int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
u64 start, u64 len, u64 disk_len, int tyep);
u64 start, u64 len, u64 disk_len, int type);
int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
u64 start, u64 len, u64 disk_len, int type);
int btrfs_add_ordered_sum(struct inode *inode,
struct btrfs_ordered_extent *entry,
struct btrfs_ordered_sum *sum);
@ -151,6 +155,9 @@ void btrfs_start_ordered_extent(struct inode *inode,
int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
struct btrfs_ordered_extent *
btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
u64 file_offset,
u64 len);
int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
struct btrfs_ordered_extent *ordered);
int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum);

File diff suppressed because it is too large Load diff

View file

@ -259,6 +259,8 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
struct extent_buffer *leaf;
struct btrfs_path *path;
struct btrfs_key key;
struct btrfs_key root_key;
struct btrfs_root *root;
int err = 0;
int ret;
@ -270,6 +272,9 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
key.type = BTRFS_ORPHAN_ITEM_KEY;
key.offset = 0;
root_key.type = BTRFS_ROOT_ITEM_KEY;
root_key.offset = (u64)-1;
while (1) {
ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
if (ret < 0) {
@ -294,13 +299,25 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
key.type != BTRFS_ORPHAN_ITEM_KEY)
break;
ret = btrfs_find_dead_roots(tree_root, key.offset);
if (ret) {
root_key.objectid = key.offset;
key.offset++;
root = btrfs_read_fs_root_no_name(tree_root->fs_info,
&root_key);
if (!IS_ERR(root))
continue;
ret = PTR_ERR(root);
if (ret != -ENOENT) {
err = ret;
break;
}
key.offset++;
ret = btrfs_find_dead_roots(tree_root, root_key.objectid);
if (ret) {
err = ret;
break;
}
}
btrfs_free_path(path);

View file

@ -498,7 +498,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
btrfs_start_delalloc_inodes(root, 0);
btrfs_wait_ordered_extents(root, 0, 0);
trans = btrfs_start_transaction(root, 1);
trans = btrfs_start_transaction(root, 0);
ret = btrfs_commit_transaction(trans, root);
return ret;
}
@ -694,11 +694,11 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
if (btrfs_super_log_root(&root->fs_info->super_copy) != 0)
return -EINVAL;
/* recover relocation */
ret = btrfs_recover_relocation(root);
ret = btrfs_cleanup_fs_roots(root->fs_info);
WARN_ON(ret);
ret = btrfs_cleanup_fs_roots(root->fs_info);
/* recover relocation */
ret = btrfs_recover_relocation(root);
WARN_ON(ret);
sb->s_flags &= ~MS_RDONLY;
@ -714,34 +714,18 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
struct list_head *head = &root->fs_info->space_info;
struct btrfs_space_info *found;
u64 total_used = 0;
u64 data_used = 0;
int bits = dentry->d_sb->s_blocksize_bits;
__be32 *fsid = (__be32 *)root->fs_info->fsid;
rcu_read_lock();
list_for_each_entry_rcu(found, head, list) {
if (found->flags & (BTRFS_BLOCK_GROUP_DUP|
BTRFS_BLOCK_GROUP_RAID10|
BTRFS_BLOCK_GROUP_RAID1)) {
total_used += found->bytes_used;
if (found->flags & BTRFS_BLOCK_GROUP_DATA)
data_used += found->bytes_used;
else
data_used += found->total_bytes;
}
total_used += found->bytes_used;
if (found->flags & BTRFS_BLOCK_GROUP_DATA)
data_used += found->bytes_used;
else
data_used += found->total_bytes;
}
list_for_each_entry_rcu(found, head, list)
total_used += found->disk_used;
rcu_read_unlock();
buf->f_namelen = BTRFS_NAME_LEN;
buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits;
buf->f_bfree = buf->f_blocks - (total_used >> bits);
buf->f_bavail = buf->f_blocks - (data_used >> bits);
buf->f_bavail = buf->f_bfree;
buf->f_bsize = dentry->d_sb->s_blocksize;
buf->f_type = BTRFS_SUPER_MAGIC;

View file

@ -165,54 +165,89 @@ enum btrfs_trans_type {
TRANS_USERSPACE,
};
static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
int num_blocks, int type)
static int may_wait_transaction(struct btrfs_root *root, int type)
{
struct btrfs_trans_handle *h =
kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
int ret;
mutex_lock(&root->fs_info->trans_mutex);
if (!root->fs_info->log_root_recovering &&
((type == TRANS_START && !root->fs_info->open_ioctl_trans) ||
type == TRANS_USERSPACE))
return 1;
return 0;
}
static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
u64 num_items, int type)
{
struct btrfs_trans_handle *h;
struct btrfs_transaction *cur_trans;
int retries = 0;
int ret;
again:
h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
if (!h)
return ERR_PTR(-ENOMEM);
mutex_lock(&root->fs_info->trans_mutex);
if (may_wait_transaction(root, type))
wait_current_trans(root);
ret = join_transaction(root);
BUG_ON(ret);
h->transid = root->fs_info->running_transaction->transid;
h->transaction = root->fs_info->running_transaction;
h->blocks_reserved = num_blocks;
cur_trans = root->fs_info->running_transaction;
cur_trans->use_count++;
mutex_unlock(&root->fs_info->trans_mutex);
h->transid = cur_trans->transid;
h->transaction = cur_trans;
h->blocks_used = 0;
h->block_group = 0;
h->alloc_exclude_nr = 0;
h->alloc_exclude_start = 0;
h->bytes_reserved = 0;
h->delayed_ref_updates = 0;
h->block_rsv = NULL;
smp_mb();
if (cur_trans->blocked && may_wait_transaction(root, type)) {
btrfs_commit_transaction(h, root);
goto again;
}
if (num_items > 0) {
ret = btrfs_trans_reserve_metadata(h, root, num_items,
&retries);
if (ret == -EAGAIN) {
btrfs_commit_transaction(h, root);
goto again;
}
if (ret < 0) {
btrfs_end_transaction(h, root);
return ERR_PTR(ret);
}
}
mutex_lock(&root->fs_info->trans_mutex);
record_root_in_trans(h, root);
mutex_unlock(&root->fs_info->trans_mutex);
if (!current->journal_info && type != TRANS_USERSPACE)
current->journal_info = h;
root->fs_info->running_transaction->use_count++;
record_root_in_trans(h, root);
mutex_unlock(&root->fs_info->trans_mutex);
return h;
}
struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
int num_blocks)
int num_items)
{
return start_transaction(root, num_blocks, TRANS_START);
return start_transaction(root, num_items, TRANS_START);
}
struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
int num_blocks)
{
return start_transaction(root, num_blocks, TRANS_JOIN);
return start_transaction(root, 0, TRANS_JOIN);
}
struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
int num_blocks)
{
return start_transaction(r, num_blocks, TRANS_USERSPACE);
return start_transaction(r, 0, TRANS_USERSPACE);
}
/* wait for a transaction commit to be fully complete */
@ -286,10 +321,36 @@ void btrfs_throttle(struct btrfs_root *root)
mutex_unlock(&root->fs_info->trans_mutex);
}
static int should_end_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
int ret;
ret = btrfs_block_rsv_check(trans, root,
&root->fs_info->global_block_rsv, 0, 5);
return ret ? 1 : 0;
}
int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
struct btrfs_transaction *cur_trans = trans->transaction;
int updates;
if (cur_trans->blocked || cur_trans->delayed_refs.flushing)
return 1;
updates = trans->delayed_ref_updates;
trans->delayed_ref_updates = 0;
if (updates)
btrfs_run_delayed_refs(trans, root, updates);
return should_end_transaction(trans, root);
}
static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root, int throttle)
{
struct btrfs_transaction *cur_trans;
struct btrfs_transaction *cur_trans = trans->transaction;
struct btrfs_fs_info *info = root->fs_info;
int count = 0;
@ -313,9 +374,21 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
count++;
}
btrfs_trans_release_metadata(trans, root);
if (!root->fs_info->open_ioctl_trans &&
should_end_transaction(trans, root))
trans->transaction->blocked = 1;
if (cur_trans->blocked && !cur_trans->in_commit) {
if (throttle)
return btrfs_commit_transaction(trans, root);
else
wake_up_process(info->transaction_kthread);
}
mutex_lock(&info->trans_mutex);
cur_trans = info->running_transaction;
WARN_ON(cur_trans != trans->transaction);
WARN_ON(cur_trans != info->running_transaction);
WARN_ON(cur_trans->num_writers < 1);
cur_trans->num_writers--;
@ -603,6 +676,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
btrfs_free_log(trans, root);
btrfs_update_reloc_root(trans, root);
btrfs_orphan_commit_root(trans, root);
if (root->commit_root != root->node) {
switch_commit_root(root);
@ -627,30 +701,30 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
{
struct btrfs_fs_info *info = root->fs_info;
int ret;
struct btrfs_trans_handle *trans;
int ret;
unsigned long nr;
smp_mb();
if (root->defrag_running)
if (xchg(&root->defrag_running, 1))
return 0;
trans = btrfs_start_transaction(root, 1);
while (1) {
root->defrag_running = 1;
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans))
return PTR_ERR(trans);
ret = btrfs_defrag_leaves(trans, root, cacheonly);
nr = trans->blocks_used;
btrfs_end_transaction(trans, root);
btrfs_btree_balance_dirty(info->tree_root, nr);
cond_resched();
trans = btrfs_start_transaction(root, 1);
if (root->fs_info->closing || ret != -EAGAIN)
break;
}
root->defrag_running = 0;
smp_mb();
btrfs_end_transaction(trans, root);
return 0;
return ret;
}
#if 0
@ -758,47 +832,63 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
struct btrfs_root *root = pending->root;
struct btrfs_root *parent_root;
struct inode *parent_inode;
struct dentry *dentry;
struct extent_buffer *tmp;
struct extent_buffer *old;
int ret;
u64 objectid;
int namelen;
int retries = 0;
u64 to_reserve = 0;
u64 index = 0;
parent_inode = pending->dentry->d_parent->d_inode;
parent_root = BTRFS_I(parent_inode)->root;
u64 objectid;
new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS);
if (!new_root_item) {
ret = -ENOMEM;
pending->error = -ENOMEM;
goto fail;
}
ret = btrfs_find_free_objectid(trans, tree_root, 0, &objectid);
if (ret)
if (ret) {
pending->error = ret;
goto fail;
}
btrfs_reloc_pre_snapshot(trans, pending, &to_reserve);
btrfs_orphan_pre_snapshot(trans, pending, &to_reserve);
if (to_reserve > 0) {
ret = btrfs_block_rsv_add(trans, root, &pending->block_rsv,
to_reserve, &retries);
if (ret) {
pending->error = ret;
goto fail;
}
}
key.objectid = objectid;
/* record when the snapshot was created in key.offset */
key.offset = trans->transid;
btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
key.offset = (u64)-1;
key.type = BTRFS_ROOT_ITEM_KEY;
memcpy(&pending->root_key, &key, sizeof(key));
pending->root_key.offset = (u64)-1;
trans->block_rsv = &pending->block_rsv;
dentry = pending->dentry;
parent_inode = dentry->d_parent->d_inode;
parent_root = BTRFS_I(parent_inode)->root;
record_root_in_trans(trans, parent_root);
/*
* insert the directory item
*/
namelen = strlen(pending->name);
ret = btrfs_set_inode_index(parent_inode, &index);
BUG_ON(ret);
ret = btrfs_insert_dir_item(trans, parent_root,
pending->name, namelen,
parent_inode->i_ino,
&pending->root_key, BTRFS_FT_DIR, index);
dentry->d_name.name, dentry->d_name.len,
parent_inode->i_ino, &key,
BTRFS_FT_DIR, index);
BUG_ON(ret);
btrfs_i_size_write(parent_inode, parent_inode->i_size + namelen * 2);
btrfs_i_size_write(parent_inode, parent_inode->i_size +
dentry->d_name.len * 2);
ret = btrfs_update_inode(trans, parent_root, parent_inode);
BUG_ON(ret);
@ -815,22 +905,32 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
free_extent_buffer(old);
btrfs_set_root_node(new_root_item, tmp);
ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
new_root_item);
BUG_ON(ret);
/* record when the snapshot was created in key.offset */
key.offset = trans->transid;
ret = btrfs_insert_root(trans, tree_root, &key, new_root_item);
btrfs_tree_unlock(tmp);
free_extent_buffer(tmp);
ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root,
pending->root_key.objectid,
parent_root->root_key.objectid,
parent_inode->i_ino, index, pending->name,
namelen);
BUG_ON(ret);
/*
* insert root back/forward references
*/
ret = btrfs_add_root_ref(trans, tree_root, objectid,
parent_root->root_key.objectid,
parent_inode->i_ino, index,
dentry->d_name.name, dentry->d_name.len);
BUG_ON(ret);
key.offset = (u64)-1;
pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key);
BUG_ON(IS_ERR(pending->snap));
btrfs_reloc_post_snapshot(trans, pending);
btrfs_orphan_post_snapshot(trans, pending);
fail:
kfree(new_root_item);
return ret;
btrfs_block_rsv_release(root, &pending->block_rsv, (u64)-1);
return 0;
}
/*
@ -878,6 +978,16 @@ int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
return ret;
}
int btrfs_transaction_blocked(struct btrfs_fs_info *info)
{
int ret = 0;
spin_lock(&info->new_trans_lock);
if (info->running_transaction)
ret = info->running_transaction->blocked;
spin_unlock(&info->new_trans_lock);
return ret;
}
int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
@ -899,6 +1009,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
ret = btrfs_run_delayed_refs(trans, root, 0);
BUG_ON(ret);
btrfs_trans_release_metadata(trans, root);
cur_trans = trans->transaction;
/*
* set the flushing flag so procs in this transaction have to
@ -951,9 +1063,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
snap_pending = 1;
WARN_ON(cur_trans != trans->transaction);
prepare_to_wait(&cur_trans->writer_wait, &wait,
TASK_UNINTERRUPTIBLE);
if (cur_trans->num_writers > 1)
timeout = MAX_SCHEDULE_TIMEOUT;
else if (should_grow)
@ -976,6 +1085,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
*/
btrfs_run_ordered_operations(root, 1);
prepare_to_wait(&cur_trans->writer_wait, &wait,
TASK_UNINTERRUPTIBLE);
smp_mb();
if (cur_trans->num_writers > 1 || should_grow)
schedule_timeout(timeout);
@ -1103,9 +1215,9 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root)
if (btrfs_header_backref_rev(root->node) <
BTRFS_MIXED_BACKREF_REV)
btrfs_drop_snapshot(root, 0);
btrfs_drop_snapshot(root, NULL, 0);
else
btrfs_drop_snapshot(root, 1);
btrfs_drop_snapshot(root, NULL, 1);
}
return 0;
}

View file

@ -45,20 +45,23 @@ struct btrfs_transaction {
struct btrfs_trans_handle {
u64 transid;
u64 block_group;
u64 bytes_reserved;
unsigned long blocks_reserved;
unsigned long blocks_used;
struct btrfs_transaction *transaction;
u64 block_group;
u64 alloc_exclude_start;
u64 alloc_exclude_nr;
unsigned long delayed_ref_updates;
struct btrfs_transaction *transaction;
struct btrfs_block_rsv *block_rsv;
};
struct btrfs_pending_snapshot {
struct dentry *dentry;
struct btrfs_root *root;
char *name;
struct btrfs_key root_key;
struct btrfs_root *snap;
/* block reservation for the operation */
struct btrfs_block_rsv block_rsv;
/* extra metadata reseration for relocation */
int error;
struct list_head list;
};
@ -85,11 +88,11 @@ static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,
int btrfs_end_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
int num_blocks);
int num_items);
struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
int num_blocks);
int num_blocks);
struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
int num_blocks);
int num_blocks);
int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
@ -103,6 +106,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
void btrfs_throttle(struct btrfs_root *root);
int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
@ -112,5 +117,6 @@ int btrfs_write_marked_extents(struct btrfs_root *root,
struct extent_io_tree *dirty_pages, int mark);
int btrfs_wait_marked_extents(struct btrfs_root *root,
struct extent_io_tree *dirty_pages, int mark);
int btrfs_transaction_blocked(struct btrfs_fs_info *info);
int btrfs_transaction_in_commit(struct btrfs_fs_info *info);
#endif

View file

@ -117,13 +117,14 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
path->nodes[1], 0,
cache_only, &last_ret,
&root->defrag_progress);
WARN_ON(ret && ret != -EAGAIN);
if (ret) {
WARN_ON(ret == -EAGAIN);
goto out;
}
if (next_key_ret == 0) {
memcpy(&root->defrag_progress, &key, sizeof(key));
ret = -EAGAIN;
}
btrfs_release_path(root, path);
out:
if (path)
btrfs_free_path(path);

View file

@ -135,6 +135,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
int ret;
int err = 0;
mutex_lock(&root->log_mutex);
if (root->log_root) {
@ -155,17 +156,19 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
mutex_lock(&root->fs_info->tree_log_mutex);
if (!root->fs_info->log_root_tree) {
ret = btrfs_init_log_root_tree(trans, root->fs_info);
BUG_ON(ret);
if (ret)
err = ret;
}
if (!root->log_root) {
if (err == 0 && !root->log_root) {
ret = btrfs_add_log_tree(trans, root);
BUG_ON(ret);
if (ret)
err = ret;
}
mutex_unlock(&root->fs_info->tree_log_mutex);
root->log_batch++;
atomic_inc(&root->log_writers);
mutex_unlock(&root->log_mutex);
return 0;
return err;
}
/*
@ -376,7 +379,7 @@ insert:
BUG_ON(ret);
}
} else if (ret) {
BUG();
return ret;
}
dst_ptr = btrfs_item_ptr_offset(path->nodes[0],
path->slots[0]);
@ -1699,9 +1702,9 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
next = btrfs_find_create_tree_block(root, bytenr, blocksize);
wc->process_func(root, next, wc, ptr_gen);
if (*level == 1) {
wc->process_func(root, next, wc, ptr_gen);
path->slots[*level]++;
if (wc->free) {
btrfs_read_buffer(next, ptr_gen);
@ -1734,35 +1737,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
WARN_ON(*level < 0);
WARN_ON(*level >= BTRFS_MAX_LEVEL);
if (path->nodes[*level] == root->node)
parent = path->nodes[*level];
else
parent = path->nodes[*level + 1];
bytenr = path->nodes[*level]->start;
blocksize = btrfs_level_size(root, *level);
root_owner = btrfs_header_owner(parent);
root_gen = btrfs_header_generation(parent);
wc->process_func(root, path->nodes[*level], wc,
btrfs_header_generation(path->nodes[*level]));
if (wc->free) {
next = path->nodes[*level];
btrfs_tree_lock(next);
clean_tree_block(trans, root, next);
btrfs_set_lock_blocking(next);
btrfs_wait_tree_block_writeback(next);
btrfs_tree_unlock(next);
WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID);
ret = btrfs_free_reserved_extent(root, bytenr, blocksize);
BUG_ON(ret);
}
free_extent_buffer(path->nodes[*level]);
path->nodes[*level] = NULL;
*level += 1;
path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
cond_resched();
return 0;
@ -1781,7 +1756,7 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
slot = path->slots[i];
if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
if (slot + 1 < btrfs_header_nritems(path->nodes[i])) {
struct extent_buffer *node;
node = path->nodes[i];
path->slots[i]++;
@ -2047,7 +2022,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
mutex_unlock(&log_root_tree->log_mutex);
ret = update_log_root(trans, log);
BUG_ON(ret);
mutex_lock(&log_root_tree->log_mutex);
if (atomic_dec_and_test(&log_root_tree->log_writers)) {
@ -2056,6 +2030,15 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
wake_up(&log_root_tree->log_writer_wait);
}
if (ret) {
BUG_ON(ret != -ENOSPC);
root->fs_info->last_trans_log_full_commit = trans->transid;
btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
mutex_unlock(&log_root_tree->log_mutex);
ret = -EAGAIN;
goto out;
}
index2 = log_root_tree->log_transid % 2;
if (atomic_read(&log_root_tree->log_commit[index2])) {
btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
@ -2129,15 +2112,10 @@ out:
return 0;
}
/*
* free all the extents used by the tree log. This should be called
* at commit time of the full transaction
*/
int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root)
static void free_log_tree(struct btrfs_trans_handle *trans,
struct btrfs_root *log)
{
int ret;
struct btrfs_root *log;
struct key;
u64 start;
u64 end;
struct walk_control wc = {
@ -2145,10 +2123,6 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root)
.process_func = process_one_buffer
};
if (!root->log_root || root->fs_info->log_root_recovering)
return 0;
log = root->log_root;
ret = walk_log_tree(trans, log, &wc);
BUG_ON(ret);
@ -2162,14 +2136,30 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root)
EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS);
}
if (log->log_transid > 0) {
ret = btrfs_del_root(trans, root->fs_info->log_root_tree,
&log->root_key);
BUG_ON(ret);
}
root->log_root = NULL;
free_extent_buffer(log->node);
kfree(log);
}
/*
* free all the extents used by the tree log. This should be called
* at commit time of the full transaction
*/
int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root)
{
if (root->log_root) {
free_log_tree(trans, root->log_root);
root->log_root = NULL;
}
return 0;
}
int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info)
{
if (fs_info->log_root_tree) {
free_log_tree(trans, fs_info->log_root_tree);
fs_info->log_root_tree = NULL;
}
return 0;
}
@ -2203,6 +2193,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
struct btrfs_dir_item *di;
struct btrfs_path *path;
int ret;
int err = 0;
int bytes_del = 0;
if (BTRFS_I(dir)->logged_trans < trans->transid)
@ -2218,7 +2209,11 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
path = btrfs_alloc_path();
di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino,
name, name_len, -1);
if (di && !IS_ERR(di)) {
if (IS_ERR(di)) {
err = PTR_ERR(di);
goto fail;
}
if (di) {
ret = btrfs_delete_one_dir_name(trans, log, path, di);
bytes_del += name_len;
BUG_ON(ret);
@ -2226,7 +2221,11 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
btrfs_release_path(log, path);
di = btrfs_lookup_dir_index_item(trans, log, path, dir->i_ino,
index, name, name_len, -1);
if (di && !IS_ERR(di)) {
if (IS_ERR(di)) {
err = PTR_ERR(di);
goto fail;
}
if (di) {
ret = btrfs_delete_one_dir_name(trans, log, path, di);
bytes_del += name_len;
BUG_ON(ret);
@ -2244,6 +2243,10 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
btrfs_release_path(log, path);
ret = btrfs_search_slot(trans, log, &key, path, 0, 1);
if (ret < 0) {
err = ret;
goto fail;
}
if (ret == 0) {
struct btrfs_inode_item *item;
u64 i_size;
@ -2261,9 +2264,13 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
ret = 0;
btrfs_release_path(log, path);
}
fail:
btrfs_free_path(path);
mutex_unlock(&BTRFS_I(dir)->log_mutex);
if (ret == -ENOSPC) {
root->fs_info->last_trans_log_full_commit = trans->transid;
ret = 0;
}
btrfs_end_log_trans(root);
return 0;
@ -2291,6 +2298,10 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
ret = btrfs_del_inode_ref(trans, log, name, name_len, inode->i_ino,
dirid, &index);
mutex_unlock(&BTRFS_I(inode)->log_mutex);
if (ret == -ENOSPC) {
root->fs_info->last_trans_log_full_commit = trans->transid;
ret = 0;
}
btrfs_end_log_trans(root);
return ret;
@ -2318,7 +2329,8 @@ static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans,
else
key.type = BTRFS_DIR_LOG_INDEX_KEY;
ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*item));
BUG_ON(ret);
if (ret)
return ret;
item = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_dir_log_item);
@ -2343,6 +2355,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
struct btrfs_key max_key;
struct btrfs_root *log = root->log_root;
struct extent_buffer *src;
int err = 0;
int ret;
int i;
int nritems;
@ -2405,6 +2418,10 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
ret = overwrite_item(trans, log, dst_path,
path->nodes[0], path->slots[0],
&tmp);
if (ret) {
err = ret;
goto done;
}
}
}
btrfs_release_path(root, path);
@ -2432,7 +2449,10 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
goto done;
ret = overwrite_item(trans, log, dst_path, src, i,
&min_key);
BUG_ON(ret);
if (ret) {
err = ret;
goto done;
}
}
path->slots[0] = nritems;
@ -2454,22 +2474,30 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
ret = overwrite_item(trans, log, dst_path,
path->nodes[0], path->slots[0],
&tmp);
BUG_ON(ret);
last_offset = tmp.offset;
if (ret)
err = ret;
else
last_offset = tmp.offset;
goto done;
}
}
done:
*last_offset_ret = last_offset;
btrfs_release_path(root, path);
btrfs_release_path(log, dst_path);
/* insert the log range keys to indicate where the log is valid */
ret = insert_dir_log_key(trans, log, path, key_type, inode->i_ino,
first_offset, last_offset);
BUG_ON(ret);
return 0;
if (err == 0) {
*last_offset_ret = last_offset;
/*
* insert the log range keys to indicate where the log
* is valid
*/
ret = insert_dir_log_key(trans, log, path, key_type,
inode->i_ino, first_offset,
last_offset);
if (ret)
err = ret;
}
return err;
}
/*
@ -2501,7 +2529,8 @@ again:
ret = log_dir_items(trans, root, inode, path,
dst_path, key_type, min_key,
&max_key);
BUG_ON(ret);
if (ret)
return ret;
if (max_key == (u64)-1)
break;
min_key = max_key + 1;
@ -2535,8 +2564,8 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans,
while (1) {
ret = btrfs_search_slot(trans, log, &key, path, -1, 1);
if (ret != 1)
BUG_ON(ret == 0);
if (ret < 0)
break;
if (path->slots[0] == 0)
@ -2554,7 +2583,7 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans,
btrfs_release_path(log, path);
}
btrfs_release_path(log, path);
return 0;
return ret;
}
static noinline int copy_items(struct btrfs_trans_handle *trans,
@ -2587,7 +2616,10 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
}
ret = btrfs_insert_empty_items(trans, log, dst_path,
ins_keys, ins_sizes, nr);
BUG_ON(ret);
if (ret) {
kfree(ins_data);
return ret;
}
for (i = 0; i < nr; i++, dst_path->slots[0]++) {
dst_offset = btrfs_item_ptr_offset(dst_path->nodes[0],
@ -2660,16 +2692,17 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
* we have to do this after the loop above to avoid changing the
* log tree while trying to change the log tree.
*/
ret = 0;
while (!list_empty(&ordered_sums)) {
struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next,
struct btrfs_ordered_sum,
list);
ret = btrfs_csum_file_blocks(trans, log, sums);
BUG_ON(ret);
if (!ret)
ret = btrfs_csum_file_blocks(trans, log, sums);
list_del(&sums->list);
kfree(sums);
}
return 0;
return ret;
}
/* log a single inode in the tree log.
@ -2697,6 +2730,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *log = root->log_root;
struct extent_buffer *src = NULL;
u32 size;
int err = 0;
int ret;
int nritems;
int ins_start_slot = 0;
@ -2739,7 +2773,10 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
} else {
ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0);
}
BUG_ON(ret);
if (ret) {
err = ret;
goto out_unlock;
}
path->keep_locks = 1;
while (1) {
@ -2768,7 +2805,10 @@ again:
ret = copy_items(trans, log, dst_path, src, ins_start_slot,
ins_nr, inode_only);
BUG_ON(ret);
if (ret) {
err = ret;
goto out_unlock;
}
ins_nr = 1;
ins_start_slot = path->slots[0];
next_slot:
@ -2784,7 +2824,10 @@ next_slot:
ret = copy_items(trans, log, dst_path, src,
ins_start_slot,
ins_nr, inode_only);
BUG_ON(ret);
if (ret) {
err = ret;
goto out_unlock;
}
ins_nr = 0;
}
btrfs_release_path(root, path);
@ -2802,7 +2845,10 @@ next_slot:
ret = copy_items(trans, log, dst_path, src,
ins_start_slot,
ins_nr, inode_only);
BUG_ON(ret);
if (ret) {
err = ret;
goto out_unlock;
}
ins_nr = 0;
}
WARN_ON(ins_nr);
@ -2810,14 +2856,18 @@ next_slot:
btrfs_release_path(root, path);
btrfs_release_path(log, dst_path);
ret = log_directory_changes(trans, root, inode, path, dst_path);
BUG_ON(ret);
if (ret) {
err = ret;
goto out_unlock;
}
}
BTRFS_I(inode)->logged_trans = trans->transid;
out_unlock:
mutex_unlock(&BTRFS_I(inode)->log_mutex);
btrfs_free_path(path);
btrfs_free_path(dst_path);
return 0;
return err;
}
/*
@ -2942,10 +2992,13 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
goto end_no_trans;
}
start_log_trans(trans, root);
ret = start_log_trans(trans, root);
if (ret)
goto end_trans;
ret = btrfs_log_inode(trans, root, inode, inode_only);
BUG_ON(ret);
if (ret)
goto end_trans;
/*
* for regular files, if its inode is already on disk, we don't
@ -2955,8 +3008,10 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
*/
if (S_ISREG(inode->i_mode) &&
BTRFS_I(inode)->generation <= last_committed &&
BTRFS_I(inode)->last_unlink_trans <= last_committed)
goto no_parent;
BTRFS_I(inode)->last_unlink_trans <= last_committed) {
ret = 0;
goto end_trans;
}
inode_only = LOG_INODE_EXISTS;
while (1) {
@ -2970,15 +3025,21 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
if (BTRFS_I(inode)->generation >
root->fs_info->last_trans_committed) {
ret = btrfs_log_inode(trans, root, inode, inode_only);
BUG_ON(ret);
if (ret)
goto end_trans;
}
if (IS_ROOT(parent))
break;
parent = parent->d_parent;
}
no_parent:
ret = 0;
end_trans:
if (ret < 0) {
BUG_ON(ret != -ENOSPC);
root->fs_info->last_trans_log_full_commit = trans->transid;
ret = 1;
}
btrfs_end_log_trans(root);
end_no_trans:
return ret;
@ -3020,7 +3081,7 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
path = btrfs_alloc_path();
BUG_ON(!path);
trans = btrfs_start_transaction(fs_info->tree_root, 1);
trans = btrfs_start_transaction(fs_info->tree_root, 0);
wc.trans = trans;
wc.pin = 1;

View file

@ -25,6 +25,8 @@
int btrfs_sync_log(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root);
int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_recover_log_trees(struct btrfs_root *tree_root);
int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct dentry *dentry);

View file

@ -1097,7 +1097,7 @@ static int btrfs_rm_dev_item(struct btrfs_root *root,
if (!path)
return -ENOMEM;
trans = btrfs_start_transaction(root, 1);
trans = btrfs_start_transaction(root, 0);
key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
key.type = BTRFS_DEV_ITEM_KEY;
key.offset = device->devid;
@ -1486,7 +1486,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
goto error;
}
trans = btrfs_start_transaction(root, 1);
trans = btrfs_start_transaction(root, 0);
lock_chunks(root);
device->barriers = 1;
@ -1751,9 +1751,10 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
/* step one, relocate all the extents inside this chunk */
ret = btrfs_relocate_block_group(extent_root, chunk_offset);
BUG_ON(ret);
if (ret)
return ret;
trans = btrfs_start_transaction(root, 1);
trans = btrfs_start_transaction(root, 0);
BUG_ON(!trans);
lock_chunks(root);
@ -1925,7 +1926,7 @@ int btrfs_balance(struct btrfs_root *dev_root)
break;
BUG_ON(ret);
trans = btrfs_start_transaction(dev_root, 1);
trans = btrfs_start_transaction(dev_root, 0);
BUG_ON(!trans);
ret = btrfs_grow_device(trans, device, old_size);
@ -2094,11 +2095,7 @@ again:
}
/* Shrinking succeeded, else we would be at "done". */
trans = btrfs_start_transaction(root, 1);
if (!trans) {
ret = -ENOMEM;
goto done;
}
trans = btrfs_start_transaction(root, 0);
lock_chunks(root);
device->disk_total_bytes = new_size;

View file

@ -154,15 +154,10 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans,
if (trans)
return do_setxattr(trans, inode, name, value, size, flags);
ret = btrfs_reserve_metadata_space(root, 2);
if (ret)
return ret;
trans = btrfs_start_transaction(root, 2);
if (IS_ERR(trans))
return PTR_ERR(trans);
trans = btrfs_start_transaction(root, 1);
if (!trans) {
ret = -ENOMEM;
goto out;
}
btrfs_set_trans_block_group(trans, inode);
ret = do_setxattr(trans, inode, name, value, size, flags);
@ -174,7 +169,6 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans,
BUG_ON(ret);
out:
btrfs_end_transaction_throttle(trans, root);
btrfs_unreserve_metadata_space(root, 2);
return ret;
}

View file

@ -82,6 +82,8 @@ struct dio {
int reap_counter; /* rate limit reaping */
get_block_t *get_block; /* block mapping function */
dio_iodone_t *end_io; /* IO completion function */
dio_submit_t *submit_io; /* IO submition function */
loff_t logical_offset_in_bio; /* current first logical block in bio */
sector_t final_block_in_bio; /* current final block in bio + 1 */
sector_t next_block_for_io; /* next block to be put under IO,
in dio_blocks units */
@ -96,6 +98,7 @@ struct dio {
unsigned cur_page_offset; /* Offset into it, in bytes */
unsigned cur_page_len; /* Nr of bytes at cur_page_offset */
sector_t cur_page_block; /* Where it starts */
loff_t cur_page_fs_offset; /* Offset in file */
/* BIO completion state */
spinlock_t bio_lock; /* protects BIO fields below */
@ -300,6 +303,26 @@ static void dio_bio_end_io(struct bio *bio, int error)
spin_unlock_irqrestore(&dio->bio_lock, flags);
}
/**
* dio_end_io - handle the end io action for the given bio
* @bio: The direct io bio thats being completed
* @error: Error if there was one
*
* This is meant to be called by any filesystem that uses their own dio_submit_t
* so that the DIO specific endio actions are dealt with after the filesystem
* has done it's completion work.
*/
void dio_end_io(struct bio *bio, int error)
{
struct dio *dio = bio->bi_private;
if (dio->is_async)
dio_bio_end_aio(bio, error);
else
dio_bio_end_io(bio, error);
}
EXPORT_SYMBOL_GPL(dio_end_io);
static int
dio_bio_alloc(struct dio *dio, struct block_device *bdev,
sector_t first_sector, int nr_vecs)
@ -316,6 +339,7 @@ dio_bio_alloc(struct dio *dio, struct block_device *bdev,
bio->bi_end_io = dio_bio_end_io;
dio->bio = bio;
dio->logical_offset_in_bio = dio->cur_page_fs_offset;
return 0;
}
@ -340,10 +364,15 @@ static void dio_bio_submit(struct dio *dio)
if (dio->is_async && dio->rw == READ)
bio_set_pages_dirty(bio);
submit_bio(dio->rw, bio);
if (dio->submit_io)
dio->submit_io(dio->rw, bio, dio->inode,
dio->logical_offset_in_bio);
else
submit_bio(dio->rw, bio);
dio->bio = NULL;
dio->boundary = 0;
dio->logical_offset_in_bio = 0;
}
/*
@ -603,10 +632,26 @@ static int dio_send_cur_page(struct dio *dio)
int ret = 0;
if (dio->bio) {
loff_t cur_offset = dio->block_in_file << dio->blkbits;
loff_t bio_next_offset = dio->logical_offset_in_bio +
dio->bio->bi_size;
/*
* See whether this new request is contiguous with the old
* See whether this new request is contiguous with the old.
*
* Btrfs cannot handl having logically non-contiguous requests
* submitted. For exmple if you have
*
* Logical: [0-4095][HOLE][8192-12287]
* Phyiscal: [0-4095] [4096-8181]
*
* We cannot submit those pages together as one BIO. So if our
* current logical offset in the file does not equal what would
* be the next logical offset in the bio, submit the bio we
* have.
*/
if (dio->final_block_in_bio != dio->cur_page_block)
if (dio->final_block_in_bio != dio->cur_page_block ||
cur_offset != bio_next_offset)
dio_bio_submit(dio);
/*
* Submit now if the underlying fs is about to perform a
@ -701,6 +746,7 @@ submit_page_section(struct dio *dio, struct page *page,
dio->cur_page_offset = offset;
dio->cur_page_len = len;
dio->cur_page_block = blocknr;
dio->cur_page_fs_offset = dio->block_in_file << dio->blkbits;
out:
return ret;
}
@ -935,7 +981,7 @@ static ssize_t
direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
const struct iovec *iov, loff_t offset, unsigned long nr_segs,
unsigned blkbits, get_block_t get_block, dio_iodone_t end_io,
struct dio *dio)
dio_submit_t submit_io, struct dio *dio)
{
unsigned long user_addr;
unsigned long flags;
@ -952,6 +998,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
dio->get_block = get_block;
dio->end_io = end_io;
dio->submit_io = submit_io;
dio->final_block_in_bio = -1;
dio->next_block_for_io = -1;
@ -1008,7 +1055,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
}
} /* end iovec loop */
if (ret == -ENOTBLK && (rw & WRITE)) {
if (ret == -ENOTBLK) {
/*
* The remaining part of the request will be
* be handled by buffered I/O when we return
@ -1110,7 +1157,7 @@ ssize_t
__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
struct block_device *bdev, const struct iovec *iov, loff_t offset,
unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
int flags)
dio_submit_t submit_io, int flags)
{
int seg;
size_t size;
@ -1197,7 +1244,8 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
(end > i_size_read(inode)));
retval = direct_io_worker(rw, iocb, inode, iov, offset,
nr_segs, blkbits, get_block, end_io, dio);
nr_segs, blkbits, get_block, end_io,
submit_io, dio);
/*
* In case of error extending write may have instantiated a few

View file

@ -2251,10 +2251,15 @@ static inline int xip_truncate_page(struct address_space *mapping, loff_t from)
#endif
#ifdef CONFIG_BLOCK
struct bio;
typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode,
loff_t file_offset);
void dio_end_io(struct bio *bio, int error);
ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
struct block_device *bdev, const struct iovec *iov, loff_t offset,
unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
int lock_type);
dio_submit_t submit_io, int lock_type);
enum {
/* need locking between buffered and direct access */
@ -2270,7 +2275,7 @@ static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb,
dio_iodone_t end_io)
{
return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
nr_segs, get_block, end_io,
nr_segs, get_block, end_io, NULL,
DIO_LOCKING | DIO_SKIP_HOLES);
}
@ -2280,7 +2285,7 @@ static inline ssize_t blockdev_direct_IO_no_locking(int rw, struct kiocb *iocb,
dio_iodone_t end_io)
{
return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
nr_segs, get_block, end_io, 0);
nr_segs, get_block, end_io, NULL, 0);
}
#endif

View file

@ -1275,7 +1275,7 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
{
struct file *filp = iocb->ki_filp;
ssize_t retval;
unsigned long seg;
unsigned long seg = 0;
size_t count;
loff_t *ppos = &iocb->ki_pos;
@ -1302,21 +1302,47 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
retval = mapping->a_ops->direct_IO(READ, iocb,
iov, pos, nr_segs);
}
if (retval > 0)
if (retval > 0) {
*ppos = pos + retval;
if (retval) {
count -= retval;
}
/*
* Btrfs can have a short DIO read if we encounter
* compressed extents, so if there was an error, or if
* we've already read everything we wanted to, or if
* there was a short read because we hit EOF, go ahead
* and return. Otherwise fallthrough to buffered io for
* the rest of the read.
*/
if (retval < 0 || !count || *ppos >= size) {
file_accessed(filp);
goto out;
}
}
}
count = retval;
for (seg = 0; seg < nr_segs; seg++) {
read_descriptor_t desc;
loff_t offset = 0;
/*
* If we did a short DIO read we need to skip the section of the
* iov that we've already read data into.
*/
if (count) {
if (count > iov[seg].iov_len) {
count -= iov[seg].iov_len;
continue;
}
offset = count;
count = 0;
}
desc.written = 0;
desc.arg.buf = iov[seg].iov_base;
desc.count = iov[seg].iov_len;
desc.arg.buf = iov[seg].iov_base + offset;
desc.count = iov[seg].iov_len - offset;
if (desc.count == 0)
continue;
desc.error = 0;