Merge git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable

* git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable:
  Btrfs: BUG to BUG_ON changes
  Btrfs: remove dead code
  Btrfs: remove dead code
  Btrfs: fix typos in comments
  Btrfs: remove unused ftrace include
  Btrfs: fix __ucmpdi2 compile bug on 32 bit builds
  Btrfs: free inode struct when btrfs_new_inode fails
  Btrfs: fix race in worker_loop
  Btrfs: add flushoncommit mount option
  Btrfs: notreelog mount option
  Btrfs: introduce btrfs_show_options
  Btrfs: rework allocation clustering
  Btrfs: Optimize locking in btrfs_next_leaf()
  Btrfs: break up btrfs_search_slot into smaller pieces
  Btrfs: kill the pinned_mutex
  Btrfs: kill the block group alloc mutex
  Btrfs: clean up find_free_extent
  Btrfs: free space cache cleanups
  Btrfs: unplug in the async bio submission threads
  Btrfs: keep processing bios for a given bdev if our proc is batching
This commit is contained in:
Linus Torvalds 2009-04-03 15:14:44 -07:00
commit b983471794
17 changed files with 994 additions and 556 deletions

View file

@ -20,7 +20,6 @@
#include <linux/list.h> #include <linux/list.h>
#include <linux/spinlock.h> #include <linux/spinlock.h>
#include <linux/freezer.h> #include <linux/freezer.h>
#include <linux/ftrace.h>
#include "async-thread.h" #include "async-thread.h"
#define WORK_QUEUED_BIT 0 #define WORK_QUEUED_BIT 0
@ -195,6 +194,9 @@ again_locked:
if (!list_empty(&worker->pending)) if (!list_empty(&worker->pending))
continue; continue;
if (kthread_should_stop())
break;
/* still no more work?, sleep for real */ /* still no more work?, sleep for real */
spin_lock_irq(&worker->lock); spin_lock_irq(&worker->lock);
set_current_state(TASK_INTERRUPTIBLE); set_current_state(TASK_INTERRUPTIBLE);
@ -208,7 +210,8 @@ again_locked:
worker->working = 0; worker->working = 0;
spin_unlock_irq(&worker->lock); spin_unlock_irq(&worker->lock);
schedule(); if (!kthread_should_stop())
schedule();
} }
__set_current_state(TASK_RUNNING); __set_current_state(TASK_RUNNING);
} }

View file

@ -1244,9 +1244,9 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
* readahead one full node of leaves, finding things that are close * readahead one full node of leaves, finding things that are close
* to the block in 'slot', and triggering ra on them. * to the block in 'slot', and triggering ra on them.
*/ */
static noinline void reada_for_search(struct btrfs_root *root, static void reada_for_search(struct btrfs_root *root,
struct btrfs_path *path, struct btrfs_path *path,
int level, int slot, u64 objectid) int level, int slot, u64 objectid)
{ {
struct extent_buffer *node; struct extent_buffer *node;
struct btrfs_disk_key disk_key; struct btrfs_disk_key disk_key;
@ -1446,6 +1446,117 @@ noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
} }
} }
/*
* helper function for btrfs_search_slot. The goal is to find a block
* in cache without setting the path to blocking. If we find the block
* we return zero and the path is unchanged.
*
* If we can't find the block, we set the path blocking and do some
* reada. -EAGAIN is returned and the search must be repeated.
*/
static int
read_block_for_search(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_path *p,
struct extent_buffer **eb_ret, int level, int slot,
struct btrfs_key *key)
{
u64 blocknr;
u64 gen;
u32 blocksize;
struct extent_buffer *b = *eb_ret;
struct extent_buffer *tmp;
blocknr = btrfs_node_blockptr(b, slot);
gen = btrfs_node_ptr_generation(b, slot);
blocksize = btrfs_level_size(root, level - 1);
tmp = btrfs_find_tree_block(root, blocknr, blocksize);
if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
*eb_ret = tmp;
return 0;
}
/*
* reduce lock contention at high levels
* of the btree by dropping locks before
* we read.
*/
btrfs_release_path(NULL, p);
if (tmp)
free_extent_buffer(tmp);
if (p->reada)
reada_for_search(root, p, level, slot, key->objectid);
tmp = read_tree_block(root, blocknr, blocksize, gen);
if (tmp)
free_extent_buffer(tmp);
return -EAGAIN;
}
/*
* helper function for btrfs_search_slot. This does all of the checks
* for node-level blocks and does any balancing required based on
* the ins_len.
*
* If no extra work was required, zero is returned. If we had to
* drop the path, -EAGAIN is returned and btrfs_search_slot must
* start over
*/
static int
setup_nodes_for_search(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_path *p,
struct extent_buffer *b, int level, int ins_len)
{
int ret;
if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >=
BTRFS_NODEPTRS_PER_BLOCK(root) - 3) {
int sret;
sret = reada_for_balance(root, p, level);
if (sret)
goto again;
btrfs_set_path_blocking(p);
sret = split_node(trans, root, p, level);
btrfs_clear_path_blocking(p, NULL);
BUG_ON(sret > 0);
if (sret) {
ret = sret;
goto done;
}
b = p->nodes[level];
} else if (ins_len < 0 && btrfs_header_nritems(b) <
BTRFS_NODEPTRS_PER_BLOCK(root) / 4) {
int sret;
sret = reada_for_balance(root, p, level);
if (sret)
goto again;
btrfs_set_path_blocking(p);
sret = balance_level(trans, root, p, level);
btrfs_clear_path_blocking(p, NULL);
if (sret) {
ret = sret;
goto done;
}
b = p->nodes[level];
if (!b) {
btrfs_release_path(NULL, p);
goto again;
}
BUG_ON(btrfs_header_nritems(b) == 1);
}
return 0;
again:
ret = -EAGAIN;
done:
return ret;
}
/* /*
* look for key in the tree. path is filled in with nodes along the way * look for key in the tree. path is filled in with nodes along the way
* if key is found, we return zero and you can find the item in the leaf * if key is found, we return zero and you can find the item in the leaf
@ -1464,16 +1575,11 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
ins_len, int cow) ins_len, int cow)
{ {
struct extent_buffer *b; struct extent_buffer *b;
struct extent_buffer *tmp;
int slot; int slot;
int ret; int ret;
int level; int level;
int should_reada = p->reada;
int lowest_unlock = 1; int lowest_unlock = 1;
int blocksize;
u8 lowest_level = 0; u8 lowest_level = 0;
u64 blocknr;
u64 gen;
lowest_level = p->lowest_level; lowest_level = p->lowest_level;
WARN_ON(lowest_level && ins_len > 0); WARN_ON(lowest_level && ins_len > 0);
@ -1502,7 +1608,11 @@ again:
if (cow) { if (cow) {
int wret; int wret;
/* is a cow on this block not required */ /*
* if we don't really need to cow this block
* then we don't want to set the path blocking,
* so we test it here
*/
if (btrfs_header_generation(b) == trans->transid && if (btrfs_header_generation(b) == trans->transid &&
btrfs_header_owner(b) == root->root_key.objectid && btrfs_header_owner(b) == root->root_key.objectid &&
!btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) { !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) {
@ -1557,51 +1667,15 @@ cow_done:
if (ret && slot > 0) if (ret && slot > 0)
slot -= 1; slot -= 1;
p->slots[level] = slot; p->slots[level] = slot;
if ((p->search_for_split || ins_len > 0) && ret = setup_nodes_for_search(trans, root, p, b, level,
btrfs_header_nritems(b) >= ins_len);
BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { if (ret == -EAGAIN)
int sret; goto again;
else if (ret)
goto done;
b = p->nodes[level];
slot = p->slots[level];
sret = reada_for_balance(root, p, level);
if (sret)
goto again;
btrfs_set_path_blocking(p);
sret = split_node(trans, root, p, level);
btrfs_clear_path_blocking(p, NULL);
BUG_ON(sret > 0);
if (sret) {
ret = sret;
goto done;
}
b = p->nodes[level];
slot = p->slots[level];
} else if (ins_len < 0 &&
btrfs_header_nritems(b) <
BTRFS_NODEPTRS_PER_BLOCK(root) / 4) {
int sret;
sret = reada_for_balance(root, p, level);
if (sret)
goto again;
btrfs_set_path_blocking(p);
sret = balance_level(trans, root, p, level);
btrfs_clear_path_blocking(p, NULL);
if (sret) {
ret = sret;
goto done;
}
b = p->nodes[level];
if (!b) {
btrfs_release_path(NULL, p);
goto again;
}
slot = p->slots[level];
BUG_ON(btrfs_header_nritems(b) == 1);
}
unlock_up(p, level, lowest_unlock); unlock_up(p, level, lowest_unlock);
/* this is only true while dropping a snapshot */ /* this is only true while dropping a snapshot */
@ -1610,44 +1684,11 @@ cow_done:
goto done; goto done;
} }
blocknr = btrfs_node_blockptr(b, slot); ret = read_block_for_search(trans, root, p,
gen = btrfs_node_ptr_generation(b, slot); &b, level, slot, key);
blocksize = btrfs_level_size(root, level - 1); if (ret == -EAGAIN)
goto again;
tmp = btrfs_find_tree_block(root, blocknr, blocksize);
if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
b = tmp;
} else {
/*
* reduce lock contention at high levels
* of the btree by dropping locks before
* we read.
*/
if (level > 0) {
btrfs_release_path(NULL, p);
if (tmp)
free_extent_buffer(tmp);
if (should_reada)
reada_for_search(root, p,
level, slot,
key->objectid);
tmp = read_tree_block(root, blocknr,
blocksize, gen);
if (tmp)
free_extent_buffer(tmp);
goto again;
} else {
btrfs_set_path_blocking(p);
if (tmp)
free_extent_buffer(tmp);
if (should_reada)
reada_for_search(root, p,
level, slot,
key->objectid);
b = read_node_slot(root, b, slot);
}
}
if (!p->skip_locking) { if (!p->skip_locking) {
int lret; int lret;
@ -2116,8 +2157,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
BUG_ON(!path->nodes[level]); BUG_ON(!path->nodes[level]);
lower = path->nodes[level]; lower = path->nodes[level];
nritems = btrfs_header_nritems(lower); nritems = btrfs_header_nritems(lower);
if (slot > nritems) BUG_ON(slot > nritems);
BUG();
if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root)) if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
BUG(); BUG();
if (slot != nritems) { if (slot != nritems) {
@ -4086,28 +4126,44 @@ next:
int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
{ {
int slot; int slot;
int level = 1; int level;
struct extent_buffer *c; struct extent_buffer *c;
struct extent_buffer *next = NULL; struct extent_buffer *next;
struct btrfs_key key; struct btrfs_key key;
u32 nritems; u32 nritems;
int ret; int ret;
int old_spinning = path->leave_spinning;
int force_blocking = 0;
nritems = btrfs_header_nritems(path->nodes[0]); nritems = btrfs_header_nritems(path->nodes[0]);
if (nritems == 0) if (nritems == 0)
return 1; return 1;
btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); /*
* we take the blocks in an order that upsets lockdep. Using
* blocking mode is the only way around it.
*/
#ifdef CONFIG_DEBUG_LOCK_ALLOC
force_blocking = 1;
#endif
btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
again:
level = 1;
next = NULL;
btrfs_release_path(root, path); btrfs_release_path(root, path);
path->keep_locks = 1; path->keep_locks = 1;
if (!force_blocking)
path->leave_spinning = 1;
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
path->keep_locks = 0; path->keep_locks = 0;
if (ret < 0) if (ret < 0)
return ret; return ret;
btrfs_set_path_blocking(path);
nritems = btrfs_header_nritems(path->nodes[0]); nritems = btrfs_header_nritems(path->nodes[0]);
/* /*
* by releasing the path above we dropped all our locks. A balance * by releasing the path above we dropped all our locks. A balance
@ -4117,19 +4173,24 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
*/ */
if (nritems > 0 && path->slots[0] < nritems - 1) { if (nritems > 0 && path->slots[0] < nritems - 1) {
path->slots[0]++; path->slots[0]++;
ret = 0;
goto done; goto done;
} }
while (level < BTRFS_MAX_LEVEL) { while (level < BTRFS_MAX_LEVEL) {
if (!path->nodes[level]) if (!path->nodes[level]) {
return 1; ret = 1;
goto done;
}
slot = path->slots[level] + 1; slot = path->slots[level] + 1;
c = path->nodes[level]; c = path->nodes[level];
if (slot >= btrfs_header_nritems(c)) { if (slot >= btrfs_header_nritems(c)) {
level++; level++;
if (level == BTRFS_MAX_LEVEL) if (level == BTRFS_MAX_LEVEL) {
return 1; ret = 1;
goto done;
}
continue; continue;
} }
@ -4138,16 +4199,22 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
free_extent_buffer(next); free_extent_buffer(next);
} }
/* the path was set to blocking above */ next = c;
if (level == 1 && (path->locks[1] || path->skip_locking) && ret = read_block_for_search(NULL, root, path, &next, level,
path->reada) slot, &key);
reada_for_search(root, path, level, slot, 0); if (ret == -EAGAIN)
goto again;
next = read_node_slot(root, c, slot);
if (!path->skip_locking) { if (!path->skip_locking) {
btrfs_assert_tree_locked(c); ret = btrfs_try_spin_lock(next);
btrfs_tree_lock(next); if (!ret) {
btrfs_set_lock_blocking(next); btrfs_set_path_blocking(path);
btrfs_tree_lock(next);
if (!force_blocking)
btrfs_clear_path_blocking(path, next);
}
if (force_blocking)
btrfs_set_lock_blocking(next);
} }
break; break;
} }
@ -4157,27 +4224,42 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
c = path->nodes[level]; c = path->nodes[level];
if (path->locks[level]) if (path->locks[level])
btrfs_tree_unlock(c); btrfs_tree_unlock(c);
free_extent_buffer(c); free_extent_buffer(c);
path->nodes[level] = next; path->nodes[level] = next;
path->slots[level] = 0; path->slots[level] = 0;
if (!path->skip_locking) if (!path->skip_locking)
path->locks[level] = 1; path->locks[level] = 1;
if (!level) if (!level)
break; break;
btrfs_set_path_blocking(path); ret = read_block_for_search(NULL, root, path, &next, level,
if (level == 1 && path->locks[1] && path->reada) 0, &key);
reada_for_search(root, path, level, slot, 0); if (ret == -EAGAIN)
next = read_node_slot(root, next, 0); goto again;
if (!path->skip_locking) { if (!path->skip_locking) {
btrfs_assert_tree_locked(path->nodes[level]); btrfs_assert_tree_locked(path->nodes[level]);
btrfs_tree_lock(next); ret = btrfs_try_spin_lock(next);
btrfs_set_lock_blocking(next); if (!ret) {
btrfs_set_path_blocking(path);
btrfs_tree_lock(next);
if (!force_blocking)
btrfs_clear_path_blocking(path, next);
}
if (force_blocking)
btrfs_set_lock_blocking(next);
} }
} }
ret = 0;
done: done:
unlock_up(path, 0, 1); unlock_up(path, 0, 1);
return 0; path->leave_spinning = old_spinning;
if (!old_spinning)
btrfs_set_path_blocking(path);
return ret;
} }
/* /*

View file

@ -143,12 +143,15 @@ static int btrfs_csum_sizes[] = { 4, 0 };
#define BTRFS_FT_MAX 9 #define BTRFS_FT_MAX 9
/* /*
* the key defines the order in the tree, and so it also defines (optimal) * The key defines the order in the tree, and so it also defines (optimal)
* block layout. objectid corresonds to the inode number. The flags * block layout.
* tells us things about the object, and is a kind of stream selector. *
* so for a given inode, keys with flags of 1 might refer to the inode * objectid corresponds to the inode number.
* data, flags of 2 may point to file data in the btree and flags == 3 *
* may point to extents. * type tells us things about the object, and is a kind of stream selector.
* so for a given inode, keys with type of 1 might refer to the inode data,
* type of 2 may point to file data in the btree and type == 3 may point to
* extents.
* *
* offset is the starting byte offset for this key in the stream. * offset is the starting byte offset for this key in the stream.
* *
@ -200,7 +203,7 @@ struct btrfs_dev_item {
/* /*
* starting byte of this partition on the device, * starting byte of this partition on the device,
* to allowr for stripe alignment in the future * to allow for stripe alignment in the future
*/ */
__le64 start_offset; __le64 start_offset;
@ -633,18 +636,35 @@ struct btrfs_space_info {
struct rw_semaphore groups_sem; struct rw_semaphore groups_sem;
}; };
struct btrfs_free_space { /*
struct rb_node bytes_index; * free clusters are used to claim free space in relatively large chunks,
struct rb_node offset_index; * allowing us to do less seeky writes. They are used for all metadata
u64 offset; * allocations and data allocations in ssd mode.
u64 bytes; */
struct btrfs_free_cluster {
spinlock_t lock;
spinlock_t refill_lock;
struct rb_root root;
/* largest extent in this cluster */
u64 max_size;
/* first extent starting offset */
u64 window_start;
struct btrfs_block_group_cache *block_group;
/*
* when a cluster is allocated from a block group, we put the
* cluster onto a list in the block group so that it can
* be freed before the block group is freed.
*/
struct list_head block_group_list;
}; };
struct btrfs_block_group_cache { struct btrfs_block_group_cache {
struct btrfs_key key; struct btrfs_key key;
struct btrfs_block_group_item item; struct btrfs_block_group_item item;
spinlock_t lock; spinlock_t lock;
struct mutex alloc_mutex;
struct mutex cache_mutex; struct mutex cache_mutex;
u64 pinned; u64 pinned;
u64 reserved; u64 reserved;
@ -656,6 +676,7 @@ struct btrfs_block_group_cache {
struct btrfs_space_info *space_info; struct btrfs_space_info *space_info;
/* free space cache stuff */ /* free space cache stuff */
spinlock_t tree_lock;
struct rb_root free_space_bytes; struct rb_root free_space_bytes;
struct rb_root free_space_offset; struct rb_root free_space_offset;
@ -667,6 +688,11 @@ struct btrfs_block_group_cache {
/* usage count */ /* usage count */
atomic_t count; atomic_t count;
/* List of struct btrfs_free_clusters for this block group.
* Today it will only have one thing on it, but that may change
*/
struct list_head cluster_list;
}; };
struct btrfs_leaf_ref_tree { struct btrfs_leaf_ref_tree {
@ -728,7 +754,6 @@ struct btrfs_fs_info {
struct mutex tree_log_mutex; struct mutex tree_log_mutex;
struct mutex transaction_kthread_mutex; struct mutex transaction_kthread_mutex;
struct mutex cleaner_mutex; struct mutex cleaner_mutex;
struct mutex pinned_mutex;
struct mutex chunk_mutex; struct mutex chunk_mutex;
struct mutex drop_mutex; struct mutex drop_mutex;
struct mutex volume_mutex; struct mutex volume_mutex;
@ -839,8 +864,12 @@ struct btrfs_fs_info {
spinlock_t delalloc_lock; spinlock_t delalloc_lock;
spinlock_t new_trans_lock; spinlock_t new_trans_lock;
u64 delalloc_bytes; u64 delalloc_bytes;
u64 last_alloc;
u64 last_data_alloc; /* data_alloc_cluster is only used in ssd mode */
struct btrfs_free_cluster data_alloc_cluster;
/* all metadata allocations go through this cluster */
struct btrfs_free_cluster meta_alloc_cluster;
spinlock_t ref_cache_lock; spinlock_t ref_cache_lock;
u64 total_ref_cache_size; u64 total_ref_cache_size;
@ -932,7 +961,6 @@ struct btrfs_root {
}; };
/* /*
* inode items have the data typically returned from stat and store other * inode items have the data typically returned from stat and store other
* info about object characteristics. There is one for every file and dir in * info about object characteristics. There is one for every file and dir in
* the FS * the FS
@ -963,7 +991,7 @@ struct btrfs_root {
#define BTRFS_EXTENT_CSUM_KEY 128 #define BTRFS_EXTENT_CSUM_KEY 128
/* /*
* root items point to tree roots. There are typically in the root * root items point to tree roots. They are typically in the root
* tree used by the super block to find all the other trees * tree used by the super block to find all the other trees
*/ */
#define BTRFS_ROOT_ITEM_KEY 132 #define BTRFS_ROOT_ITEM_KEY 132
@ -1010,6 +1038,8 @@ struct btrfs_root {
#define BTRFS_MOUNT_SSD (1 << 3) #define BTRFS_MOUNT_SSD (1 << 3)
#define BTRFS_MOUNT_DEGRADED (1 << 4) #define BTRFS_MOUNT_DEGRADED (1 << 4)
#define BTRFS_MOUNT_COMPRESS (1 << 5) #define BTRFS_MOUNT_COMPRESS (1 << 5)
#define BTRFS_MOUNT_NOTREELOG (1 << 6)
#define BTRFS_MOUNT_FLUSHONCOMMIT (1 << 7)
#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@ -1748,6 +1778,7 @@ static inline struct dentry *fdentry(struct file *file)
} }
/* extent-tree.c */ /* extent-tree.c */
void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_root *root, unsigned long count); struct btrfs_root *root, unsigned long count);
int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len);
@ -2174,21 +2205,4 @@ int btrfs_check_acl(struct inode *inode, int mask);
int btrfs_init_acl(struct inode *inode, struct inode *dir); int btrfs_init_acl(struct inode *inode, struct inode *dir);
int btrfs_acl_chmod(struct inode *inode); int btrfs_acl_chmod(struct inode *inode);
/* free-space-cache.c */
int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
u64 bytenr, u64 size);
int btrfs_add_free_space_lock(struct btrfs_block_group_cache *block_group,
u64 offset, u64 bytes);
int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
u64 bytenr, u64 size);
int btrfs_remove_free_space_lock(struct btrfs_block_group_cache *block_group,
u64 offset, u64 bytes);
void btrfs_remove_free_space_cache(struct btrfs_block_group_cache
*block_group);
struct btrfs_free_space *btrfs_find_free_space(struct btrfs_block_group_cache
*block_group, u64 offset,
u64 bytes);
void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
u64 bytes);
u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group);
#endif #endif

View file

@ -18,7 +18,6 @@
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/sort.h> #include <linux/sort.h>
#include <linux/ftrace.h>
#include "ctree.h" #include "ctree.h"
#include "delayed-ref.h" #include "delayed-ref.h"
#include "transaction.h" #include "transaction.h"

View file

@ -38,6 +38,7 @@
#include "locking.h" #include "locking.h"
#include "ref-cache.h" #include "ref-cache.h"
#include "tree-log.h" #include "tree-log.h"
#include "free-space-cache.h"
static struct extent_io_ops btree_extent_io_ops; static struct extent_io_ops btree_extent_io_ops;
static void end_workqueue_fn(struct btrfs_work *work); static void end_workqueue_fn(struct btrfs_work *work);
@ -1412,8 +1413,6 @@ static int bio_ready_for_csum(struct bio *bio)
ret = extent_range_uptodate(io_tree, start + length, ret = extent_range_uptodate(io_tree, start + length,
start + buf_len - 1); start + buf_len - 1);
if (ret == 1)
return ret;
return ret; return ret;
} }
@ -1647,12 +1646,15 @@ struct btrfs_root *open_ctree(struct super_block *sb,
mutex_init(&fs_info->ordered_operations_mutex); mutex_init(&fs_info->ordered_operations_mutex);
mutex_init(&fs_info->tree_log_mutex); mutex_init(&fs_info->tree_log_mutex);
mutex_init(&fs_info->drop_mutex); mutex_init(&fs_info->drop_mutex);
mutex_init(&fs_info->pinned_mutex);
mutex_init(&fs_info->chunk_mutex); mutex_init(&fs_info->chunk_mutex);
mutex_init(&fs_info->transaction_kthread_mutex); mutex_init(&fs_info->transaction_kthread_mutex);
mutex_init(&fs_info->cleaner_mutex); mutex_init(&fs_info->cleaner_mutex);
mutex_init(&fs_info->volume_mutex); mutex_init(&fs_info->volume_mutex);
mutex_init(&fs_info->tree_reloc_mutex); mutex_init(&fs_info->tree_reloc_mutex);
btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
btrfs_init_free_cluster(&fs_info->data_alloc_cluster);
init_waitqueue_head(&fs_info->transaction_throttle); init_waitqueue_head(&fs_info->transaction_throttle);
init_waitqueue_head(&fs_info->transaction_wait); init_waitqueue_head(&fs_info->transaction_wait);
init_waitqueue_head(&fs_info->async_submit_wait); init_waitqueue_head(&fs_info->async_submit_wait);

View file

@ -31,6 +31,7 @@
#include "volumes.h" #include "volumes.h"
#include "locking.h" #include "locking.h"
#include "ref-cache.h" #include "ref-cache.h"
#include "free-space-cache.h"
#define PENDING_EXTENT_INSERT 0 #define PENDING_EXTENT_INSERT 0
#define PENDING_EXTENT_DELETE 1 #define PENDING_EXTENT_DELETE 1
@ -166,7 +167,6 @@ static int add_new_free_space(struct btrfs_block_group_cache *block_group,
u64 extent_start, extent_end, size; u64 extent_start, extent_end, size;
int ret; int ret;
mutex_lock(&info->pinned_mutex);
while (start < end) { while (start < end) {
ret = find_first_extent_bit(&info->pinned_extents, start, ret = find_first_extent_bit(&info->pinned_extents, start,
&extent_start, &extent_end, &extent_start, &extent_end,
@ -192,7 +192,6 @@ static int add_new_free_space(struct btrfs_block_group_cache *block_group,
ret = btrfs_add_free_space(block_group, start, size); ret = btrfs_add_free_space(block_group, start, size);
BUG_ON(ret); BUG_ON(ret);
} }
mutex_unlock(&info->pinned_mutex);
return 0; return 0;
} }
@ -291,8 +290,8 @@ next:
block_group->key.objectid + block_group->key.objectid +
block_group->key.offset); block_group->key.offset);
remove_sb_from_cache(root, block_group);
block_group->cached = 1; block_group->cached = 1;
remove_sb_from_cache(root, block_group);
ret = 0; ret = 0;
err: err:
btrfs_free_path(path); btrfs_free_path(path);
@ -326,7 +325,7 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(
return cache; return cache;
} }
static inline void put_block_group(struct btrfs_block_group_cache *cache) void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
{ {
if (atomic_dec_and_test(&cache->count)) if (atomic_dec_and_test(&cache->count))
kfree(cache); kfree(cache);
@ -399,12 +398,12 @@ again:
div_factor(cache->key.offset, factor)) { div_factor(cache->key.offset, factor)) {
group_start = cache->key.objectid; group_start = cache->key.objectid;
spin_unlock(&cache->lock); spin_unlock(&cache->lock);
put_block_group(cache); btrfs_put_block_group(cache);
goto found; goto found;
} }
} }
spin_unlock(&cache->lock); spin_unlock(&cache->lock);
put_block_group(cache); btrfs_put_block_group(cache);
cond_resched(); cond_resched();
} }
if (!wrapped) { if (!wrapped) {
@ -1594,7 +1593,7 @@ int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
if (!block_group || block_group->ro) if (!block_group || block_group->ro)
readonly = 1; readonly = 1;
if (block_group) if (block_group)
put_block_group(block_group); btrfs_put_block_group(block_group);
return readonly; return readonly;
} }
@ -2018,7 +2017,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
WARN_ON(ret); WARN_ON(ret);
} }
} }
put_block_group(cache); btrfs_put_block_group(cache);
total -= num_bytes; total -= num_bytes;
bytenr += num_bytes; bytenr += num_bytes;
} }
@ -2035,7 +2034,7 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
return 0; return 0;
bytenr = cache->key.objectid; bytenr = cache->key.objectid;
put_block_group(cache); btrfs_put_block_group(cache);
return bytenr; return bytenr;
} }
@ -2047,7 +2046,6 @@ int btrfs_update_pinned_extents(struct btrfs_root *root,
struct btrfs_block_group_cache *cache; struct btrfs_block_group_cache *cache;
struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_fs_info *fs_info = root->fs_info;
WARN_ON(!mutex_is_locked(&root->fs_info->pinned_mutex));
if (pin) { if (pin) {
set_extent_dirty(&fs_info->pinned_extents, set_extent_dirty(&fs_info->pinned_extents,
bytenr, bytenr + num - 1, GFP_NOFS); bytenr, bytenr + num - 1, GFP_NOFS);
@ -2055,7 +2053,6 @@ int btrfs_update_pinned_extents(struct btrfs_root *root,
clear_extent_dirty(&fs_info->pinned_extents, clear_extent_dirty(&fs_info->pinned_extents,
bytenr, bytenr + num - 1, GFP_NOFS); bytenr, bytenr + num - 1, GFP_NOFS);
} }
mutex_unlock(&root->fs_info->pinned_mutex);
while (num > 0) { while (num > 0) {
cache = btrfs_lookup_block_group(fs_info, bytenr); cache = btrfs_lookup_block_group(fs_info, bytenr);
@ -2081,7 +2078,7 @@ int btrfs_update_pinned_extents(struct btrfs_root *root,
if (cache->cached) if (cache->cached)
btrfs_add_free_space(cache, bytenr, len); btrfs_add_free_space(cache, bytenr, len);
} }
put_block_group(cache); btrfs_put_block_group(cache);
bytenr += len; bytenr += len;
num -= len; num -= len;
} }
@ -2112,7 +2109,7 @@ static int update_reserved_extents(struct btrfs_root *root,
} }
spin_unlock(&cache->lock); spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock); spin_unlock(&cache->space_info->lock);
put_block_group(cache); btrfs_put_block_group(cache);
bytenr += len; bytenr += len;
num -= len; num -= len;
} }
@ -2127,7 +2124,6 @@ int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents; struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
int ret; int ret;
mutex_lock(&root->fs_info->pinned_mutex);
while (1) { while (1) {
ret = find_first_extent_bit(pinned_extents, last, ret = find_first_extent_bit(pinned_extents, last,
&start, &end, EXTENT_DIRTY); &start, &end, EXTENT_DIRTY);
@ -2136,7 +2132,6 @@ int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
set_extent_dirty(copy, start, end, GFP_NOFS); set_extent_dirty(copy, start, end, GFP_NOFS);
last = end + 1; last = end + 1;
} }
mutex_unlock(&root->fs_info->pinned_mutex);
return 0; return 0;
} }
@ -2149,7 +2144,6 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
int ret; int ret;
while (1) { while (1) {
mutex_lock(&root->fs_info->pinned_mutex);
ret = find_first_extent_bit(unpin, 0, &start, &end, ret = find_first_extent_bit(unpin, 0, &start, &end,
EXTENT_DIRTY); EXTENT_DIRTY);
if (ret) if (ret)
@ -2163,7 +2157,6 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
cond_resched(); cond_resched();
} }
mutex_unlock(&root->fs_info->pinned_mutex);
return ret; return ret;
} }
@ -2205,7 +2198,6 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans,
free_extent_buffer(buf); free_extent_buffer(buf);
pinit: pinit:
btrfs_set_path_blocking(path); btrfs_set_path_blocking(path);
mutex_lock(&root->fs_info->pinned_mutex);
/* unlocks the pinned mutex */ /* unlocks the pinned mutex */
btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
@ -2511,8 +2503,6 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
*/ */
if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID && if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID &&
owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
mutex_lock(&root->fs_info->pinned_mutex);
/* unlocks the pinned mutex */ /* unlocks the pinned mutex */
btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
update_reserved_extents(root, bytenr, num_bytes, 0); update_reserved_extents(root, bytenr, num_bytes, 0);
@ -2554,228 +2544,237 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
{ {
int ret = 0; int ret = 0;
struct btrfs_root *root = orig_root->fs_info->extent_root; struct btrfs_root *root = orig_root->fs_info->extent_root;
u64 total_needed = num_bytes; struct btrfs_free_cluster *last_ptr = NULL;
u64 *last_ptr = NULL;
u64 last_wanted = 0;
struct btrfs_block_group_cache *block_group = NULL; struct btrfs_block_group_cache *block_group = NULL;
int chunk_alloc_done = 0;
int empty_cluster = 2 * 1024 * 1024; int empty_cluster = 2 * 1024 * 1024;
int allowed_chunk_alloc = 0; int allowed_chunk_alloc = 0;
struct list_head *head = NULL, *cur = NULL;
int loop = 0;
int extra_loop = 0;
struct btrfs_space_info *space_info; struct btrfs_space_info *space_info;
int last_ptr_loop = 0;
int loop = 0;
WARN_ON(num_bytes < root->sectorsize); WARN_ON(num_bytes < root->sectorsize);
btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
ins->objectid = 0; ins->objectid = 0;
ins->offset = 0; ins->offset = 0;
space_info = __find_space_info(root->fs_info, data);
if (orig_root->ref_cows || empty_size) if (orig_root->ref_cows || empty_size)
allowed_chunk_alloc = 1; allowed_chunk_alloc = 1;
if (data & BTRFS_BLOCK_GROUP_METADATA) { if (data & BTRFS_BLOCK_GROUP_METADATA) {
last_ptr = &root->fs_info->last_alloc; last_ptr = &root->fs_info->meta_alloc_cluster;
if (!btrfs_test_opt(root, SSD)) if (!btrfs_test_opt(root, SSD))
empty_cluster = 64 * 1024; empty_cluster = 64 * 1024;
} }
if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) {
last_ptr = &root->fs_info->last_data_alloc; last_ptr = &root->fs_info->data_alloc_cluster;
}
if (last_ptr) { if (last_ptr) {
if (*last_ptr) { spin_lock(&last_ptr->lock);
hint_byte = *last_ptr; if (last_ptr->block_group)
last_wanted = *last_ptr; hint_byte = last_ptr->window_start;
} else spin_unlock(&last_ptr->lock);
empty_size += empty_cluster;
} else {
empty_cluster = 0;
} }
search_start = max(search_start, first_logical_byte(root, 0)); search_start = max(search_start, first_logical_byte(root, 0));
search_start = max(search_start, hint_byte); search_start = max(search_start, hint_byte);
if (last_wanted && search_start != last_wanted) { if (!last_ptr) {
last_wanted = 0; empty_cluster = 0;
empty_size += empty_cluster; loop = 1;
} }
total_needed += empty_size; if (search_start == hint_byte) {
block_group = btrfs_lookup_block_group(root->fs_info, search_start); block_group = btrfs_lookup_block_group(root->fs_info,
if (!block_group) search_start);
block_group = btrfs_lookup_first_block_group(root->fs_info, if (block_group && block_group_bits(block_group, data)) {
search_start); down_read(&space_info->groups_sem);
space_info = __find_space_info(root->fs_info, data); goto have_block_group;
} else if (block_group) {
btrfs_put_block_group(block_group);
}
}
search:
down_read(&space_info->groups_sem); down_read(&space_info->groups_sem);
while (1) { list_for_each_entry(block_group, &space_info->block_groups, list) {
struct btrfs_free_space *free_space; u64 offset;
/*
* the only way this happens if our hint points to a block
* group thats not of the proper type, while looping this
* should never happen
*/
if (empty_size)
extra_loop = 1;
if (!block_group) atomic_inc(&block_group->count);
goto new_group_no_lock; search_start = block_group->key.objectid;
have_block_group:
if (unlikely(!block_group->cached)) { if (unlikely(!block_group->cached)) {
mutex_lock(&block_group->cache_mutex); mutex_lock(&block_group->cache_mutex);
ret = cache_block_group(root, block_group); ret = cache_block_group(root, block_group);
mutex_unlock(&block_group->cache_mutex); mutex_unlock(&block_group->cache_mutex);
if (ret) if (ret) {
btrfs_put_block_group(block_group);
break; break;
}
} }
mutex_lock(&block_group->alloc_mutex);
if (unlikely(!block_group_bits(block_group, data)))
goto new_group;
if (unlikely(block_group->ro)) if (unlikely(block_group->ro))
goto new_group; goto loop;
free_space = btrfs_find_free_space(block_group, search_start, if (last_ptr) {
total_needed); /*
if (free_space) { * the refill lock keeps out other
u64 start = block_group->key.objectid; * people trying to start a new cluster
u64 end = block_group->key.objectid +
block_group->key.offset;
search_start = stripe_align(root, free_space->offset);
/* move on to the next group */
if (search_start + num_bytes >= search_end)
goto new_group;
/* move on to the next group */
if (search_start + num_bytes > end)
goto new_group;
if (last_wanted && search_start != last_wanted) {
total_needed += empty_cluster;
empty_size += empty_cluster;
last_wanted = 0;
/*
* if search_start is still in this block group
* then we just re-search this block group
*/
if (search_start >= start &&
search_start < end) {
mutex_unlock(&block_group->alloc_mutex);
continue;
}
/* else we go to the next block group */
goto new_group;
}
if (exclude_nr > 0 &&
(search_start + num_bytes > exclude_start &&
search_start < exclude_start + exclude_nr)) {
search_start = exclude_start + exclude_nr;
/*
* if search_start is still in this block group
* then we just re-search this block group
*/
if (search_start >= start &&
search_start < end) {
mutex_unlock(&block_group->alloc_mutex);
last_wanted = 0;
continue;
}
/* else we go to the next block group */
goto new_group;
}
ins->objectid = search_start;
ins->offset = num_bytes;
btrfs_remove_free_space_lock(block_group, search_start,
num_bytes);
/* we are all good, lets return */
mutex_unlock(&block_group->alloc_mutex);
break;
}
new_group:
mutex_unlock(&block_group->alloc_mutex);
put_block_group(block_group);
block_group = NULL;
new_group_no_lock:
/* don't try to compare new allocations against the
* last allocation any more
*/
last_wanted = 0;
/*
* Here's how this works.
* loop == 0: we were searching a block group via a hint
* and didn't find anything, so we start at
* the head of the block groups and keep searching
* loop == 1: we're searching through all of the block groups
* if we hit the head again we have searched
* all of the block groups for this space and we
* need to try and allocate, if we cant error out.
* loop == 2: we allocated more space and are looping through
* all of the block groups again.
*/
if (loop == 0) {
head = &space_info->block_groups;
cur = head->next;
loop++;
} else if (loop == 1 && cur == head) {
int keep_going;
/* at this point we give up on the empty_size
* allocations and just try to allocate the min
* space.
*
* The extra_loop field was set if an empty_size
* allocation was attempted above, and if this
* is try we need to try the loop again without
* the additional empty_size.
*/ */
total_needed -= empty_size; spin_lock(&last_ptr->refill_lock);
empty_size = 0; offset = btrfs_alloc_from_cluster(block_group, last_ptr,
keep_going = extra_loop; num_bytes, search_start);
loop++; if (offset) {
/* we have a block, we're done */
spin_unlock(&last_ptr->refill_lock);
goto checks;
}
if (allowed_chunk_alloc && !chunk_alloc_done) { spin_lock(&last_ptr->lock);
up_read(&space_info->groups_sem); /*
ret = do_chunk_alloc(trans, root, num_bytes + * whoops, this cluster doesn't actually point to
2 * 1024 * 1024, data, 1); * this block group. Get a ref on the block
down_read(&space_info->groups_sem); * group is does point to and try again
if (ret < 0) */
goto loop_check; if (!last_ptr_loop && last_ptr->block_group &&
head = &space_info->block_groups; last_ptr->block_group != block_group) {
btrfs_put_block_group(block_group);
block_group = last_ptr->block_group;
atomic_inc(&block_group->count);
spin_unlock(&last_ptr->lock);
spin_unlock(&last_ptr->refill_lock);
last_ptr_loop = 1;
search_start = block_group->key.objectid;
goto have_block_group;
}
spin_unlock(&last_ptr->lock);
/*
* this cluster didn't work out, free it and
* start over
*/
btrfs_return_cluster_to_free_space(NULL, last_ptr);
last_ptr_loop = 0;
/* allocate a cluster in this block group */
ret = btrfs_find_space_cluster(trans,
block_group, last_ptr,
offset, num_bytes,
empty_cluster + empty_size);
if (ret == 0) {
/* /*
* we've allocated a new chunk, keep * now pull our allocation out of this
* trying * cluster
*/ */
keep_going = 1; offset = btrfs_alloc_from_cluster(block_group,
chunk_alloc_done = 1; last_ptr, num_bytes,
} else if (!allowed_chunk_alloc) { search_start);
space_info->force_alloc = 1; if (offset) {
/* we found one, proceed */
spin_unlock(&last_ptr->refill_lock);
goto checks;
}
} }
loop_check: /*
if (keep_going) { * at this point we either didn't find a cluster
cur = head->next; * or we weren't able to allocate a block from our
extra_loop = 0; * cluster. Free the cluster we've been trying
} else { * to use, and go to the next block group
break; */
if (loop < 2) {
btrfs_return_cluster_to_free_space(NULL,
last_ptr);
spin_unlock(&last_ptr->refill_lock);
goto loop;
} }
} else if (cur == head) { spin_unlock(&last_ptr->refill_lock);
break;
} }
block_group = list_entry(cur, struct btrfs_block_group_cache, offset = btrfs_find_space_for_alloc(block_group, search_start,
list); num_bytes, empty_size);
atomic_inc(&block_group->count); if (!offset)
goto loop;
checks:
search_start = stripe_align(root, offset);
search_start = block_group->key.objectid; /* move on to the next group */
cur = cur->next; if (search_start + num_bytes >= search_end) {
btrfs_add_free_space(block_group, offset, num_bytes);
goto loop;
}
/* move on to the next group */
if (search_start + num_bytes >
block_group->key.objectid + block_group->key.offset) {
btrfs_add_free_space(block_group, offset, num_bytes);
goto loop;
}
if (exclude_nr > 0 &&
(search_start + num_bytes > exclude_start &&
search_start < exclude_start + exclude_nr)) {
search_start = exclude_start + exclude_nr;
btrfs_add_free_space(block_group, offset, num_bytes);
/*
* if search_start is still in this block group
* then we just re-search this block group
*/
if (search_start >= block_group->key.objectid &&
search_start < (block_group->key.objectid +
block_group->key.offset))
goto have_block_group;
goto loop;
}
ins->objectid = search_start;
ins->offset = num_bytes;
if (offset < search_start)
btrfs_add_free_space(block_group, offset,
search_start - offset);
BUG_ON(offset > search_start);
/* we are all good, lets return */
break;
loop:
btrfs_put_block_group(block_group);
}
up_read(&space_info->groups_sem);
/* loop == 0, try to find a clustered alloc in every block group
* loop == 1, try again after forcing a chunk allocation
* loop == 2, set empty_size and empty_cluster to 0 and try again
*/
if (!ins->objectid && loop < 3 &&
(empty_size || empty_cluster || allowed_chunk_alloc)) {
if (loop >= 2) {
empty_size = 0;
empty_cluster = 0;
}
if (allowed_chunk_alloc) {
ret = do_chunk_alloc(trans, root, num_bytes +
2 * 1024 * 1024, data, 1);
allowed_chunk_alloc = 0;
} else {
space_info->force_alloc = 1;
}
if (loop < 3) {
loop++;
goto search;
}
ret = -ENOSPC;
} else if (!ins->objectid) {
ret = -ENOSPC;
} }
/* we found what we needed */ /* we found what we needed */
@ -2783,21 +2782,10 @@ loop_check:
if (!(data & BTRFS_BLOCK_GROUP_DATA)) if (!(data & BTRFS_BLOCK_GROUP_DATA))
trans->block_group = block_group->key.objectid; trans->block_group = block_group->key.objectid;
if (last_ptr) btrfs_put_block_group(block_group);
*last_ptr = ins->objectid + ins->offset;
ret = 0; ret = 0;
} else if (!ret) {
printk(KERN_ERR "btrfs searching for %llu bytes, "
"num_bytes %llu, loop %d, allowed_alloc %d\n",
(unsigned long long)total_needed,
(unsigned long long)num_bytes,
loop, allowed_chunk_alloc);
ret = -ENOSPC;
} }
if (block_group)
put_block_group(block_group);
up_read(&space_info->groups_sem);
return ret; return ret;
} }
@ -2902,7 +2890,7 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
ret = btrfs_discard_extent(root, start, len); ret = btrfs_discard_extent(root, start, len);
btrfs_add_free_space(cache, start, len); btrfs_add_free_space(cache, start, len);
put_block_group(cache); btrfs_put_block_group(cache);
update_reserved_extents(root, start, len, 0); update_reserved_extents(root, start, len, 0);
return ret; return ret;
@ -3040,7 +3028,7 @@ int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans,
ret = btrfs_remove_free_space(block_group, ins->objectid, ret = btrfs_remove_free_space(block_group, ins->objectid,
ins->offset); ins->offset);
BUG_ON(ret); BUG_ON(ret);
put_block_group(block_group); btrfs_put_block_group(block_group);
ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid,
ref_generation, owner, ins, 1); ref_generation, owner, ins, 1);
return ret; return ret;
@ -5729,7 +5717,7 @@ next:
WARN_ON(block_group->reserved > 0); WARN_ON(block_group->reserved > 0);
WARN_ON(btrfs_block_group_used(&block_group->item) > 0); WARN_ON(btrfs_block_group_used(&block_group->item) > 0);
spin_unlock(&block_group->lock); spin_unlock(&block_group->lock);
put_block_group(block_group); btrfs_put_block_group(block_group);
ret = 0; ret = 0;
out: out:
btrfs_free_path(path); btrfs_free_path(path);
@ -5856,9 +5844,10 @@ int btrfs_read_block_groups(struct btrfs_root *root)
atomic_set(&cache->count, 1); atomic_set(&cache->count, 1);
spin_lock_init(&cache->lock); spin_lock_init(&cache->lock);
mutex_init(&cache->alloc_mutex); spin_lock_init(&cache->tree_lock);
mutex_init(&cache->cache_mutex); mutex_init(&cache->cache_mutex);
INIT_LIST_HEAD(&cache->list); INIT_LIST_HEAD(&cache->list);
INIT_LIST_HEAD(&cache->cluster_list);
read_extent_buffer(leaf, &cache->item, read_extent_buffer(leaf, &cache->item,
btrfs_item_ptr_offset(leaf, path->slots[0]), btrfs_item_ptr_offset(leaf, path->slots[0]),
sizeof(cache->item)); sizeof(cache->item));
@ -5912,9 +5901,10 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
atomic_set(&cache->count, 1); atomic_set(&cache->count, 1);
spin_lock_init(&cache->lock); spin_lock_init(&cache->lock);
mutex_init(&cache->alloc_mutex); spin_lock_init(&cache->tree_lock);
mutex_init(&cache->cache_mutex); mutex_init(&cache->cache_mutex);
INIT_LIST_HEAD(&cache->list); INIT_LIST_HEAD(&cache->list);
INIT_LIST_HEAD(&cache->cluster_list);
btrfs_set_block_group_used(&cache->item, bytes_used); btrfs_set_block_group_used(&cache->item, bytes_used);
btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
@ -5974,8 +5964,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
spin_unlock(&block_group->space_info->lock); spin_unlock(&block_group->space_info->lock);
block_group->space_info->full = 0; block_group->space_info->full = 0;
put_block_group(block_group); btrfs_put_block_group(block_group);
put_block_group(block_group); btrfs_put_block_group(block_group);
ret = btrfs_search_slot(trans, root, &key, path, -1, 1); ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
if (ret > 0) if (ret > 0)

View file

@ -2884,25 +2884,19 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
disko = 0; disko = 0;
flags = 0; flags = 0;
switch (em->block_start) { if (em->block_start == EXTENT_MAP_LAST_BYTE) {
case EXTENT_MAP_LAST_BYTE:
end = 1; end = 1;
flags |= FIEMAP_EXTENT_LAST; flags |= FIEMAP_EXTENT_LAST;
break; } else if (em->block_start == EXTENT_MAP_HOLE) {
case EXTENT_MAP_HOLE:
flags |= FIEMAP_EXTENT_UNWRITTEN; flags |= FIEMAP_EXTENT_UNWRITTEN;
break; } else if (em->block_start == EXTENT_MAP_INLINE) {
case EXTENT_MAP_INLINE:
flags |= (FIEMAP_EXTENT_DATA_INLINE | flags |= (FIEMAP_EXTENT_DATA_INLINE |
FIEMAP_EXTENT_NOT_ALIGNED); FIEMAP_EXTENT_NOT_ALIGNED);
break; } else if (em->block_start == EXTENT_MAP_DELALLOC) {
case EXTENT_MAP_DELALLOC:
flags |= (FIEMAP_EXTENT_DELALLOC | flags |= (FIEMAP_EXTENT_DELALLOC |
FIEMAP_EXTENT_UNKNOWN); FIEMAP_EXTENT_UNKNOWN);
break; } else {
default:
disko = em->block_start; disko = em->block_start;
break;
} }
if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
flags |= FIEMAP_EXTENT_ENCODED; flags |= FIEMAP_EXTENT_ENCODED;

View file

@ -234,7 +234,6 @@ int add_extent_mapping(struct extent_map_tree *tree,
rb = tree_insert(&tree->map, em->start, &em->rb_node); rb = tree_insert(&tree->map, em->start, &em->rb_node);
if (rb) { if (rb) {
ret = -EEXIST; ret = -EEXIST;
free_extent_map(merge);
goto out; goto out;
} }
atomic_inc(&em->refs); atomic_inc(&em->refs);

View file

@ -18,6 +18,15 @@
#include <linux/sched.h> #include <linux/sched.h>
#include "ctree.h" #include "ctree.h"
#include "free-space-cache.h"
#include "transaction.h"
struct btrfs_free_space {
struct rb_node bytes_index;
struct rb_node offset_index;
u64 offset;
u64 bytes;
};
static int tree_insert_offset(struct rb_root *root, u64 offset, static int tree_insert_offset(struct rb_root *root, u64 offset,
struct rb_node *node) struct rb_node *node)
@ -68,14 +77,24 @@ static int tree_insert_bytes(struct rb_root *root, u64 bytes,
} }
/* /*
* searches the tree for the given offset. If contains is set we will return * searches the tree for the given offset.
* the free space that contains the given offset. If contains is not set we *
* will return the free space that starts at or after the given offset and is * fuzzy == 1: this is used for allocations where we are given a hint of where
* at least bytes long. * to look for free space. Because the hint may not be completely on an offset
* mark, or the hint may no longer point to free space we need to fudge our
* results a bit. So we look for free space starting at or after offset with at
* least bytes size. We prefer to find as close to the given offset as we can.
* Also if the offset is within a free space range, then we will return the free
* space that contains the given offset, which means we can return a free space
* chunk with an offset before the provided offset.
*
* fuzzy == 0: this is just a normal tree search. Give us the free space that
* starts at the given offset which is at least bytes size, and if its not there
* return NULL.
*/ */
static struct btrfs_free_space *tree_search_offset(struct rb_root *root, static struct btrfs_free_space *tree_search_offset(struct rb_root *root,
u64 offset, u64 bytes, u64 offset, u64 bytes,
int contains) int fuzzy)
{ {
struct rb_node *n = root->rb_node; struct rb_node *n = root->rb_node;
struct btrfs_free_space *entry, *ret = NULL; struct btrfs_free_space *entry, *ret = NULL;
@ -84,13 +103,14 @@ static struct btrfs_free_space *tree_search_offset(struct rb_root *root,
entry = rb_entry(n, struct btrfs_free_space, offset_index); entry = rb_entry(n, struct btrfs_free_space, offset_index);
if (offset < entry->offset) { if (offset < entry->offset) {
if (!contains && if (fuzzy &&
(!ret || entry->offset < ret->offset) && (!ret || entry->offset < ret->offset) &&
(bytes <= entry->bytes)) (bytes <= entry->bytes))
ret = entry; ret = entry;
n = n->rb_left; n = n->rb_left;
} else if (offset > entry->offset) { } else if (offset > entry->offset) {
if ((entry->offset + entry->bytes - 1) >= offset && if (fuzzy &&
(entry->offset + entry->bytes - 1) >= offset &&
bytes <= entry->bytes) { bytes <= entry->bytes) {
ret = entry; ret = entry;
break; break;
@ -171,6 +191,7 @@ static int link_free_space(struct btrfs_block_group_cache *block_group,
int ret = 0; int ret = 0;
BUG_ON(!info->bytes);
ret = tree_insert_offset(&block_group->free_space_offset, info->offset, ret = tree_insert_offset(&block_group->free_space_offset, info->offset,
&info->offset_index); &info->offset_index);
if (ret) if (ret)
@ -184,108 +205,70 @@ static int link_free_space(struct btrfs_block_group_cache *block_group,
return ret; return ret;
} }
static int __btrfs_add_free_space(struct btrfs_block_group_cache *block_group, int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
u64 offset, u64 bytes) u64 offset, u64 bytes)
{ {
struct btrfs_free_space *right_info; struct btrfs_free_space *right_info;
struct btrfs_free_space *left_info; struct btrfs_free_space *left_info;
struct btrfs_free_space *info = NULL; struct btrfs_free_space *info = NULL;
struct btrfs_free_space *alloc_info;
int ret = 0; int ret = 0;
alloc_info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS); info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
if (!alloc_info) if (!info)
return -ENOMEM; return -ENOMEM;
info->offset = offset;
info->bytes = bytes;
spin_lock(&block_group->tree_lock);
/* /*
* first we want to see if there is free space adjacent to the range we * first we want to see if there is free space adjacent to the range we
* are adding, if there is remove that struct and add a new one to * are adding, if there is remove that struct and add a new one to
* cover the entire range * cover the entire range
*/ */
right_info = tree_search_offset(&block_group->free_space_offset, right_info = tree_search_offset(&block_group->free_space_offset,
offset+bytes, 0, 1); offset+bytes, 0, 0);
left_info = tree_search_offset(&block_group->free_space_offset, left_info = tree_search_offset(&block_group->free_space_offset,
offset-1, 0, 1); offset-1, 0, 1);
if (right_info && right_info->offset == offset+bytes) { if (right_info) {
unlink_free_space(block_group, right_info); unlink_free_space(block_group, right_info);
info = right_info; info->bytes += right_info->bytes;
info->offset = offset; kfree(right_info);
info->bytes += bytes;
} else if (right_info && right_info->offset != offset+bytes) {
printk(KERN_ERR "btrfs adding space in the middle of an "
"existing free space area. existing: "
"offset=%llu, bytes=%llu. new: offset=%llu, "
"bytes=%llu\n", (unsigned long long)right_info->offset,
(unsigned long long)right_info->bytes,
(unsigned long long)offset,
(unsigned long long)bytes);
BUG();
} }
if (left_info) { if (left_info && left_info->offset + left_info->bytes == offset) {
unlink_free_space(block_group, left_info); unlink_free_space(block_group, left_info);
info->offset = left_info->offset;
if (unlikely((left_info->offset + left_info->bytes) != info->bytes += left_info->bytes;
offset)) { kfree(left_info);
printk(KERN_ERR "btrfs free space to the left "
"of new free space isn't "
"quite right. existing: offset=%llu, "
"bytes=%llu. new: offset=%llu, bytes=%llu\n",
(unsigned long long)left_info->offset,
(unsigned long long)left_info->bytes,
(unsigned long long)offset,
(unsigned long long)bytes);
BUG();
}
if (info) {
info->offset = left_info->offset;
info->bytes += left_info->bytes;
kfree(left_info);
} else {
info = left_info;
info->bytes += bytes;
}
} }
if (info) {
ret = link_free_space(block_group, info);
if (!ret)
info = NULL;
goto out;
}
info = alloc_info;
alloc_info = NULL;
info->offset = offset;
info->bytes = bytes;
ret = link_free_space(block_group, info); ret = link_free_space(block_group, info);
if (ret) if (ret)
kfree(info); kfree(info);
out:
spin_unlock(&block_group->tree_lock);
if (ret) { if (ret) {
printk(KERN_ERR "btrfs: unable to add free space :%d\n", ret); printk(KERN_ERR "btrfs: unable to add free space :%d\n", ret);
if (ret == -EEXIST) BUG_ON(ret == -EEXIST);
BUG();
} }
kfree(alloc_info);
return ret; return ret;
} }
static int int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
__btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, u64 offset, u64 bytes)
u64 offset, u64 bytes)
{ {
struct btrfs_free_space *info; struct btrfs_free_space *info;
int ret = 0; int ret = 0;
spin_lock(&block_group->tree_lock);
info = tree_search_offset(&block_group->free_space_offset, offset, 0, info = tree_search_offset(&block_group->free_space_offset, offset, 0,
1); 1);
if (info && info->offset == offset) { if (info && info->offset == offset) {
if (info->bytes < bytes) { if (info->bytes < bytes) {
printk(KERN_ERR "Found free space at %llu, size %llu," printk(KERN_ERR "Found free space at %llu, size %llu,"
@ -295,12 +278,14 @@ __btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
(unsigned long long)bytes); (unsigned long long)bytes);
WARN_ON(1); WARN_ON(1);
ret = -EINVAL; ret = -EINVAL;
spin_unlock(&block_group->tree_lock);
goto out; goto out;
} }
unlink_free_space(block_group, info); unlink_free_space(block_group, info);
if (info->bytes == bytes) { if (info->bytes == bytes) {
kfree(info); kfree(info);
spin_unlock(&block_group->tree_lock);
goto out; goto out;
} }
@ -308,6 +293,7 @@ __btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
info->bytes -= bytes; info->bytes -= bytes;
ret = link_free_space(block_group, info); ret = link_free_space(block_group, info);
spin_unlock(&block_group->tree_lock);
BUG_ON(ret); BUG_ON(ret);
} else if (info && info->offset < offset && } else if (info && info->offset < offset &&
info->offset + info->bytes >= offset + bytes) { info->offset + info->bytes >= offset + bytes) {
@ -333,70 +319,33 @@ __btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
*/ */
kfree(info); kfree(info);
} }
spin_unlock(&block_group->tree_lock);
/* step two, insert a new info struct to cover anything /* step two, insert a new info struct to cover anything
* before the hole * before the hole
*/ */
ret = __btrfs_add_free_space(block_group, old_start, ret = btrfs_add_free_space(block_group, old_start,
offset - old_start); offset - old_start);
BUG_ON(ret); BUG_ON(ret);
} else { } else {
spin_unlock(&block_group->tree_lock);
if (!info) {
printk(KERN_ERR "couldn't find space %llu to free\n",
(unsigned long long)offset);
printk(KERN_ERR "cached is %d, offset %llu bytes %llu\n",
block_group->cached, block_group->key.objectid,
block_group->key.offset);
btrfs_dump_free_space(block_group, bytes);
} else if (info) {
printk(KERN_ERR "hmm, found offset=%llu bytes=%llu, "
"but wanted offset=%llu bytes=%llu\n",
info->offset, info->bytes, offset, bytes);
}
WARN_ON(1); WARN_ON(1);
} }
out: out:
return ret; return ret;
} }
int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
u64 offset, u64 bytes)
{
int ret;
struct btrfs_free_space *sp;
mutex_lock(&block_group->alloc_mutex);
ret = __btrfs_add_free_space(block_group, offset, bytes);
sp = tree_search_offset(&block_group->free_space_offset, offset, 0, 1);
BUG_ON(!sp);
mutex_unlock(&block_group->alloc_mutex);
return ret;
}
int btrfs_add_free_space_lock(struct btrfs_block_group_cache *block_group,
u64 offset, u64 bytes)
{
int ret;
struct btrfs_free_space *sp;
ret = __btrfs_add_free_space(block_group, offset, bytes);
sp = tree_search_offset(&block_group->free_space_offset, offset, 0, 1);
BUG_ON(!sp);
return ret;
}
int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
u64 offset, u64 bytes)
{
int ret = 0;
mutex_lock(&block_group->alloc_mutex);
ret = __btrfs_remove_free_space(block_group, offset, bytes);
mutex_unlock(&block_group->alloc_mutex);
return ret;
}
int btrfs_remove_free_space_lock(struct btrfs_block_group_cache *block_group,
u64 offset, u64 bytes)
{
int ret;
ret = __btrfs_remove_free_space(block_group, offset, bytes);
return ret;
}
void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
u64 bytes) u64 bytes)
{ {
@ -408,6 +357,8 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
info = rb_entry(n, struct btrfs_free_space, offset_index); info = rb_entry(n, struct btrfs_free_space, offset_index);
if (info->bytes >= bytes) if (info->bytes >= bytes)
count++; count++;
printk(KERN_ERR "entry offset %llu, bytes %llu\n", info->offset,
info->bytes);
} }
printk(KERN_INFO "%d blocks of free space at or bigger than bytes is" printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
"\n", count); "\n", count);
@ -428,68 +379,337 @@ u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group)
return ret; return ret;
} }
/*
* for a given cluster, put all of its extents back into the free
* space cache. If the block group passed doesn't match the block group
* pointed to by the cluster, someone else raced in and freed the
* cluster already. In that case, we just return without changing anything
*/
static int
__btrfs_return_cluster_to_free_space(
struct btrfs_block_group_cache *block_group,
struct btrfs_free_cluster *cluster)
{
struct btrfs_free_space *entry;
struct rb_node *node;
spin_lock(&cluster->lock);
if (cluster->block_group != block_group)
goto out;
cluster->window_start = 0;
node = rb_first(&cluster->root);
while(node) {
entry = rb_entry(node, struct btrfs_free_space, offset_index);
node = rb_next(&entry->offset_index);
rb_erase(&entry->offset_index, &cluster->root);
link_free_space(block_group, entry);
}
list_del_init(&cluster->block_group_list);
btrfs_put_block_group(cluster->block_group);
cluster->block_group = NULL;
cluster->root.rb_node = NULL;
out:
spin_unlock(&cluster->lock);
return 0;
}
void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
{ {
struct btrfs_free_space *info; struct btrfs_free_space *info;
struct rb_node *node; struct rb_node *node;
struct btrfs_free_cluster *cluster;
struct btrfs_free_cluster *safe;
spin_lock(&block_group->tree_lock);
list_for_each_entry_safe(cluster, safe, &block_group->cluster_list,
block_group_list) {
WARN_ON(cluster->block_group != block_group);
__btrfs_return_cluster_to_free_space(block_group, cluster);
}
mutex_lock(&block_group->alloc_mutex);
while ((node = rb_last(&block_group->free_space_bytes)) != NULL) { while ((node = rb_last(&block_group->free_space_bytes)) != NULL) {
info = rb_entry(node, struct btrfs_free_space, bytes_index); info = rb_entry(node, struct btrfs_free_space, bytes_index);
unlink_free_space(block_group, info); unlink_free_space(block_group, info);
kfree(info); kfree(info);
if (need_resched()) { if (need_resched()) {
mutex_unlock(&block_group->alloc_mutex); spin_unlock(&block_group->tree_lock);
cond_resched(); cond_resched();
mutex_lock(&block_group->alloc_mutex); spin_lock(&block_group->tree_lock);
} }
} }
mutex_unlock(&block_group->alloc_mutex); spin_unlock(&block_group->tree_lock);
} }
#if 0 u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
static struct btrfs_free_space *btrfs_find_free_space_offset(struct u64 offset, u64 bytes, u64 empty_size)
btrfs_block_group_cache
*block_group, u64 offset,
u64 bytes)
{ {
struct btrfs_free_space *ret; struct btrfs_free_space *entry = NULL;
u64 ret = 0;
mutex_lock(&block_group->alloc_mutex); spin_lock(&block_group->tree_lock);
ret = tree_search_offset(&block_group->free_space_offset, offset, entry = tree_search_offset(&block_group->free_space_offset, offset,
bytes, 0); bytes + empty_size, 1);
mutex_unlock(&block_group->alloc_mutex); if (!entry)
entry = tree_search_bytes(&block_group->free_space_bytes,
offset, bytes + empty_size);
if (entry) {
unlink_free_space(block_group, entry);
ret = entry->offset;
entry->offset += bytes;
entry->bytes -= bytes;
if (!entry->bytes)
kfree(entry);
else
link_free_space(block_group, entry);
}
spin_unlock(&block_group->tree_lock);
return ret; return ret;
} }
static struct btrfs_free_space *btrfs_find_free_space_bytes(struct /*
btrfs_block_group_cache * given a cluster, put all of its extents back into the free space
*block_group, u64 offset, * cache. If a block group is passed, this function will only free
u64 bytes) * a cluster that belongs to the passed block group.
*
* Otherwise, it'll get a reference on the block group pointed to by the
* cluster and remove the cluster from it.
*/
int btrfs_return_cluster_to_free_space(
struct btrfs_block_group_cache *block_group,
struct btrfs_free_cluster *cluster)
{ {
struct btrfs_free_space *ret; int ret;
mutex_lock(&block_group->alloc_mutex); /* first, get a safe pointer to the block group */
spin_lock(&cluster->lock);
if (!block_group) {
block_group = cluster->block_group;
if (!block_group) {
spin_unlock(&cluster->lock);
return 0;
}
} else if (cluster->block_group != block_group) {
/* someone else has already freed it don't redo their work */
spin_unlock(&cluster->lock);
return 0;
}
atomic_inc(&block_group->count);
spin_unlock(&cluster->lock);
ret = tree_search_bytes(&block_group->free_space_bytes, offset, bytes); /* now return any extents the cluster had on it */
mutex_unlock(&block_group->alloc_mutex); spin_lock(&block_group->tree_lock);
ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
spin_unlock(&block_group->tree_lock);
/* finally drop our ref */
btrfs_put_block_group(block_group);
return ret;
}
/*
* given a cluster, try to allocate 'bytes' from it, returns 0
* if it couldn't find anything suitably large, or a logical disk offset
* if things worked out
*/
u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
struct btrfs_free_cluster *cluster, u64 bytes,
u64 min_start)
{
struct btrfs_free_space *entry = NULL;
struct rb_node *node;
u64 ret = 0;
spin_lock(&cluster->lock);
if (bytes > cluster->max_size)
goto out;
if (cluster->block_group != block_group)
goto out;
node = rb_first(&cluster->root);
if (!node)
goto out;
entry = rb_entry(node, struct btrfs_free_space, offset_index);
while(1) {
if (entry->bytes < bytes || entry->offset < min_start) {
struct rb_node *node;
node = rb_next(&entry->offset_index);
if (!node)
break;
entry = rb_entry(node, struct btrfs_free_space,
offset_index);
continue;
}
ret = entry->offset;
entry->offset += bytes;
entry->bytes -= bytes;
if (entry->bytes == 0) {
rb_erase(&entry->offset_index, &cluster->root);
kfree(entry);
}
break;
}
out:
spin_unlock(&cluster->lock);
return ret;
}
/*
* here we try to find a cluster of blocks in a block group. The goal
* is to find at least bytes free and up to empty_size + bytes free.
* We might not find them all in one contiguous area.
*
* returns zero and sets up cluster if things worked out, otherwise
* it returns -enospc
*/
int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group,
struct btrfs_free_cluster *cluster,
u64 offset, u64 bytes, u64 empty_size)
{
struct btrfs_free_space *entry = NULL;
struct rb_node *node;
struct btrfs_free_space *next;
struct btrfs_free_space *last;
u64 min_bytes;
u64 window_start;
u64 window_free;
u64 max_extent = 0;
int total_retries = 0;
int ret;
/* for metadata, allow allocates with more holes */
if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
/*
* we want to do larger allocations when we are
* flushing out the delayed refs, it helps prevent
* making more work as we go along.
*/
if (trans->transaction->delayed_refs.flushing)
min_bytes = max(bytes, (bytes + empty_size) >> 1);
else
min_bytes = max(bytes, (bytes + empty_size) >> 4);
} else
min_bytes = max(bytes, (bytes + empty_size) >> 2);
spin_lock(&block_group->tree_lock);
spin_lock(&cluster->lock);
/* someone already found a cluster, hooray */
if (cluster->block_group) {
ret = 0;
goto out;
}
again:
min_bytes = min(min_bytes, bytes + empty_size);
entry = tree_search_bytes(&block_group->free_space_bytes,
offset, min_bytes);
if (!entry) {
ret = -ENOSPC;
goto out;
}
window_start = entry->offset;
window_free = entry->bytes;
last = entry;
max_extent = entry->bytes;
while(1) {
/* out window is just right, lets fill it */
if (window_free >= bytes + empty_size)
break;
node = rb_next(&last->offset_index);
if (!node) {
ret = -ENOSPC;
goto out;
}
next = rb_entry(node, struct btrfs_free_space, offset_index);
/*
* we haven't filled the empty size and the window is
* very large. reset and try again
*/
if (next->offset - window_start > (bytes + empty_size) * 2) {
entry = next;
window_start = entry->offset;
window_free = entry->bytes;
last = entry;
max_extent = 0;
total_retries++;
if (total_retries % 256 == 0) {
if (min_bytes >= (bytes + empty_size)) {
ret = -ENOSPC;
goto out;
}
/*
* grow our allocation a bit, we're not having
* much luck
*/
min_bytes *= 2;
goto again;
}
} else {
last = next;
window_free += next->bytes;
if (entry->bytes > max_extent)
max_extent = entry->bytes;
}
}
cluster->window_start = entry->offset;
/*
* now we've found our entries, pull them out of the free space
* cache and put them into the cluster rbtree
*
* The cluster includes an rbtree, but only uses the offset index
* of each free space cache entry.
*/
while(1) {
node = rb_next(&entry->offset_index);
unlink_free_space(block_group, entry);
ret = tree_insert_offset(&cluster->root, entry->offset,
&entry->offset_index);
BUG_ON(ret);
if (!node || entry == last)
break;
entry = rb_entry(node, struct btrfs_free_space, offset_index);
}
ret = 0;
cluster->max_size = max_extent;
atomic_inc(&block_group->count);
list_add_tail(&cluster->block_group_list, &block_group->cluster_list);
cluster->block_group = block_group;
out:
spin_unlock(&cluster->lock);
spin_unlock(&block_group->tree_lock);
return ret; return ret;
} }
#endif
struct btrfs_free_space *btrfs_find_free_space(struct btrfs_block_group_cache /*
*block_group, u64 offset, * simple code to zero out a cluster
u64 bytes) */
void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
{ {
struct btrfs_free_space *ret = NULL; spin_lock_init(&cluster->lock);
spin_lock_init(&cluster->refill_lock);
ret = tree_search_offset(&block_group->free_space_offset, offset, cluster->root.rb_node = NULL;
bytes, 0); cluster->max_size = 0;
if (!ret) INIT_LIST_HEAD(&cluster->block_group_list);
ret = tree_search_bytes(&block_group->free_space_bytes, cluster->block_group = NULL;
offset, bytes);
return ret;
} }

View file

@ -0,0 +1,44 @@
/*
* Copyright (C) 2009 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#ifndef __BTRFS_FREE_SPACE_CACHE
#define __BTRFS_FREE_SPACE_CACHE
int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
u64 bytenr, u64 size);
int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
u64 bytenr, u64 size);
void btrfs_remove_free_space_cache(struct btrfs_block_group_cache
*block_group);
u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
u64 offset, u64 bytes, u64 empty_size);
void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
u64 bytes);
u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group);
int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group,
struct btrfs_free_cluster *cluster,
u64 offset, u64 bytes, u64 empty_size);
void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster);
u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
struct btrfs_free_cluster *cluster, u64 bytes,
u64 min_start);
int btrfs_return_cluster_to_free_space(
struct btrfs_block_group_cache *block_group,
struct btrfs_free_cluster *cluster);
#endif

View file

@ -3481,8 +3481,10 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
if (dir) { if (dir) {
ret = btrfs_set_inode_index(dir, index); ret = btrfs_set_inode_index(dir, index);
if (ret) if (ret) {
iput(inode);
return ERR_PTR(ret); return ERR_PTR(ret);
}
} }
/* /*
* index_cnt is ignored for everything but a dir, * index_cnt is ignored for everything but a dir,
@ -3565,6 +3567,7 @@ fail:
if (dir) if (dir)
BTRFS_I(dir)->index_cnt--; BTRFS_I(dir)->index_cnt--;
btrfs_free_path(path); btrfs_free_path(path);
iput(inode);
return ERR_PTR(ret); return ERR_PTR(ret);
} }

View file

@ -60,8 +60,8 @@ void btrfs_clear_lock_blocking(struct extent_buffer *eb)
/* /*
* unfortunately, many of the places that currently set a lock to blocking * unfortunately, many of the places that currently set a lock to blocking
* don't end up blocking for every long, and often they don't block * don't end up blocking for very long, and often they don't block
* at all. For a dbench 50 run, if we don't spin one the blocking bit * at all. For a dbench 50 run, if we don't spin on the blocking bit
* at all, the context switch rate can jump up to 400,000/sec or more. * at all, the context switch rate can jump up to 400,000/sec or more.
* *
* So, we're still stuck with this crummy spin on the blocking bit, * So, we're still stuck with this crummy spin on the blocking bit,

View file

@ -24,6 +24,7 @@
#include <linux/highmem.h> #include <linux/highmem.h>
#include <linux/time.h> #include <linux/time.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/seq_file.h>
#include <linux/string.h> #include <linux/string.h>
#include <linux/smp_lock.h> #include <linux/smp_lock.h>
#include <linux/backing-dev.h> #include <linux/backing-dev.h>
@ -66,7 +67,8 @@ static void btrfs_put_super(struct super_block *sb)
enum { enum {
Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow,
Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier,
Opt_ssd, Opt_thread_pool, Opt_noacl, Opt_compress, Opt_err, Opt_ssd, Opt_thread_pool, Opt_noacl, Opt_compress, Opt_notreelog,
Opt_flushoncommit, Opt_err,
}; };
static match_table_t tokens = { static match_table_t tokens = {
@ -83,6 +85,8 @@ static match_table_t tokens = {
{Opt_compress, "compress"}, {Opt_compress, "compress"},
{Opt_ssd, "ssd"}, {Opt_ssd, "ssd"},
{Opt_noacl, "noacl"}, {Opt_noacl, "noacl"},
{Opt_notreelog, "notreelog"},
{Opt_flushoncommit, "flushoncommit"},
{Opt_err, NULL}, {Opt_err, NULL},
}; };
@ -222,6 +226,14 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
case Opt_noacl: case Opt_noacl:
root->fs_info->sb->s_flags &= ~MS_POSIXACL; root->fs_info->sb->s_flags &= ~MS_POSIXACL;
break; break;
case Opt_notreelog:
printk(KERN_INFO "btrfs: disabling tree log\n");
btrfs_set_opt(info->mount_opt, NOTREELOG);
break;
case Opt_flushoncommit:
printk(KERN_INFO "btrfs: turning on flush-on-commit\n");
btrfs_set_opt(info->mount_opt, FLUSHONCOMMIT);
break;
default: default:
break; break;
} }
@ -363,9 +375,8 @@ fail_close:
int btrfs_sync_fs(struct super_block *sb, int wait) int btrfs_sync_fs(struct super_block *sb, int wait)
{ {
struct btrfs_trans_handle *trans; struct btrfs_trans_handle *trans;
struct btrfs_root *root; struct btrfs_root *root = btrfs_sb(sb);
int ret; int ret;
root = btrfs_sb(sb);
if (sb->s_flags & MS_RDONLY) if (sb->s_flags & MS_RDONLY)
return 0; return 0;
@ -385,6 +396,41 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
return ret; return ret;
} }
static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
{
struct btrfs_root *root = btrfs_sb(vfs->mnt_sb);
struct btrfs_fs_info *info = root->fs_info;
if (btrfs_test_opt(root, DEGRADED))
seq_puts(seq, ",degraded");
if (btrfs_test_opt(root, NODATASUM))
seq_puts(seq, ",nodatasum");
if (btrfs_test_opt(root, NODATACOW))
seq_puts(seq, ",nodatacow");
if (btrfs_test_opt(root, NOBARRIER))
seq_puts(seq, ",nobarrier");
if (info->max_extent != (u64)-1)
seq_printf(seq, ",max_extent=%llu", info->max_extent);
if (info->max_inline != 8192 * 1024)
seq_printf(seq, ",max_inline=%llu", info->max_inline);
if (info->alloc_start != 0)
seq_printf(seq, ",alloc_start=%llu", info->alloc_start);
if (info->thread_pool_size != min_t(unsigned long,
num_online_cpus() + 2, 8))
seq_printf(seq, ",thread_pool=%d", info->thread_pool_size);
if (btrfs_test_opt(root, COMPRESS))
seq_puts(seq, ",compress");
if (btrfs_test_opt(root, SSD))
seq_puts(seq, ",ssd");
if (btrfs_test_opt(root, NOTREELOG))
seq_puts(seq, ",no-treelog");
if (btrfs_test_opt(root, FLUSHONCOMMIT))
seq_puts(seq, ",flush-on-commit");
if (!(root->fs_info->sb->s_flags & MS_POSIXACL))
seq_puts(seq, ",noacl");
return 0;
}
static void btrfs_write_super(struct super_block *sb) static void btrfs_write_super(struct super_block *sb)
{ {
sb->s_dirt = 0; sb->s_dirt = 0;
@ -630,7 +676,7 @@ static struct super_operations btrfs_super_ops = {
.put_super = btrfs_put_super, .put_super = btrfs_put_super,
.write_super = btrfs_write_super, .write_super = btrfs_write_super,
.sync_fs = btrfs_sync_fs, .sync_fs = btrfs_sync_fs,
.show_options = generic_show_options, .show_options = btrfs_show_options,
.write_inode = btrfs_write_inode, .write_inode = btrfs_write_inode,
.dirty_inode = btrfs_dirty_inode, .dirty_inode = btrfs_dirty_inode,
.alloc_inode = btrfs_alloc_inode, .alloc_inode = btrfs_alloc_inode,

View file

@ -53,8 +53,6 @@ static noinline int join_transaction(struct btrfs_root *root)
GFP_NOFS); GFP_NOFS);
BUG_ON(!cur_trans); BUG_ON(!cur_trans);
root->fs_info->generation++; root->fs_info->generation++;
root->fs_info->last_alloc = 0;
root->fs_info->last_data_alloc = 0;
cur_trans->num_writers = 1; cur_trans->num_writers = 1;
cur_trans->num_joined = 0; cur_trans->num_joined = 0;
cur_trans->transid = root->fs_info->generation; cur_trans->transid = root->fs_info->generation;
@ -974,6 +972,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
int ret; int ret;
int should_grow = 0; int should_grow = 0;
unsigned long now = get_seconds(); unsigned long now = get_seconds();
int flush_on_commit = btrfs_test_opt(root, FLUSHONCOMMIT);
btrfs_run_ordered_operations(root, 0); btrfs_run_ordered_operations(root, 0);
@ -1053,7 +1052,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
mutex_unlock(&root->fs_info->trans_mutex); mutex_unlock(&root->fs_info->trans_mutex);
if (snap_pending) { if (flush_on_commit || snap_pending) {
if (flush_on_commit)
btrfs_start_delalloc_inodes(root);
ret = btrfs_wait_ordered_extents(root, 1); ret = btrfs_wait_ordered_extents(root, 1);
BUG_ON(ret); BUG_ON(ret);
} }

View file

@ -262,11 +262,9 @@ static int process_one_buffer(struct btrfs_root *log,
struct extent_buffer *eb, struct extent_buffer *eb,
struct walk_control *wc, u64 gen) struct walk_control *wc, u64 gen)
{ {
if (wc->pin) { if (wc->pin)
mutex_lock(&log->fs_info->pinned_mutex);
btrfs_update_pinned_extents(log->fs_info->extent_root, btrfs_update_pinned_extents(log->fs_info->extent_root,
eb->start, eb->len, 1); eb->start, eb->len, 1);
}
if (btrfs_buffer_uptodate(eb, gen)) { if (btrfs_buffer_uptodate(eb, gen)) {
if (wc->write) if (wc->write)
@ -1224,8 +1222,7 @@ insert:
ret = insert_one_name(trans, root, path, key->objectid, key->offset, ret = insert_one_name(trans, root, path, key->objectid, key->offset,
name, name_len, log_type, &log_key); name, name_len, log_type, &log_key);
if (ret && ret != -ENOENT) BUG_ON(ret && ret != -ENOENT);
BUG();
goto out; goto out;
} }
@ -2900,6 +2897,11 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
sb = inode->i_sb; sb = inode->i_sb;
if (btrfs_test_opt(root, NOTREELOG)) {
ret = 1;
goto end_no_trans;
}
if (root->fs_info->last_trans_log_full_commit > if (root->fs_info->last_trans_log_full_commit >
root->fs_info->last_trans_committed) { root->fs_info->last_trans_committed) {
ret = 1; ret = 1;

View file

@ -20,6 +20,7 @@
#include <linux/buffer_head.h> #include <linux/buffer_head.h>
#include <linux/blkdev.h> #include <linux/blkdev.h>
#include <linux/random.h> #include <linux/random.h>
#include <linux/iocontext.h>
#include <asm/div64.h> #include <asm/div64.h>
#include "compat.h" #include "compat.h"
#include "ctree.h" #include "ctree.h"
@ -145,8 +146,9 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
int again = 0; int again = 0;
unsigned long num_run = 0; unsigned long num_run = 0;
unsigned long limit; unsigned long limit;
unsigned long last_waited = 0;
bdi = device->bdev->bd_inode->i_mapping->backing_dev_info; bdi = blk_get_backing_dev_info(device->bdev);
fs_info = device->dev_root->fs_info; fs_info = device->dev_root->fs_info;
limit = btrfs_async_submit_limit(fs_info); limit = btrfs_async_submit_limit(fs_info);
limit = limit * 2 / 3; limit = limit * 2 / 3;
@ -207,7 +209,32 @@ loop_lock:
if (pending && bdi_write_congested(bdi) && num_run > 16 && if (pending && bdi_write_congested(bdi) && num_run > 16 &&
fs_info->fs_devices->open_devices > 1) { fs_info->fs_devices->open_devices > 1) {
struct bio *old_head; struct bio *old_head;
struct io_context *ioc;
ioc = current->io_context;
/*
* the main goal here is that we don't want to
* block if we're going to be able to submit
* more requests without blocking.
*
* This code does two great things, it pokes into
* the elevator code from a filesystem _and_
* it makes assumptions about how batching works.
*/
if (ioc && ioc->nr_batch_requests > 0 &&
time_before(jiffies, ioc->last_waited + HZ/50UL) &&
(last_waited == 0 ||
ioc->last_waited == last_waited)) {
/*
* we want to go through our batch of
* requests and stop. So, we copy out
* the ioc->last_waited time and test
* against it before looping
*/
last_waited = ioc->last_waited;
continue;
}
spin_lock(&device->io_lock); spin_lock(&device->io_lock);
old_head = device->pending_bios; old_head = device->pending_bios;
@ -231,6 +258,18 @@ loop_lock:
if (device->pending_bios) if (device->pending_bios)
goto loop_lock; goto loop_lock;
spin_unlock(&device->io_lock); spin_unlock(&device->io_lock);
/*
* IO has already been through a long path to get here. Checksumming,
* async helper threads, perhaps compression. We've done a pretty
* good job of collecting a batch of IO and should just unplug
* the device right away.
*
* This will help anyone who is waiting on the IO, they might have
* already unplugged, but managed to do so before the bio they
* cared about found its way down here.
*/
blk_run_backing_dev(bdi, NULL);
done: done:
return 0; return 0;
} }

View file

@ -76,7 +76,7 @@ struct btrfs_device {
struct btrfs_fs_devices { struct btrfs_fs_devices {
u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */ u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
/* the device with this id has the most recent coyp of the super */ /* the device with this id has the most recent copy of the super */
u64 latest_devid; u64 latest_devid;
u64 latest_trans; u64 latest_trans;
u64 num_devices; u64 num_devices;