1
0
Fork 0

Merge branch 'for-linus-4.12' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs updates from Chris Mason:
 "This has fixes and cleanups Dave Sterba collected for the merge
  window.

  The biggest functional fixes are between btrfs raid5/6 and scrub, and
  raid5/6 and device replacement. Some of our pending qgroup fixes are
  included as well while I bash on the rest in testing.

  We also have the usual set of cleanups, including one that makes
  __btrfs_map_block() much more maintainable, and conversions from
  atomic_t to refcount_t"

* 'for-linus-4.12' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (71 commits)
  btrfs: fix the gfp_mask for the reada_zones radix tree
  Btrfs: fix reported number of inode blocks
  Btrfs: send, fix file hole not being preserved due to inline extent
  Btrfs: fix extent map leak during fallocate error path
  Btrfs: fix incorrect space accounting after failure to insert inline extent
  Btrfs: fix invalid attempt to free reserved space on failure to cow range
  btrfs: Handle delalloc error correctly to avoid ordered extent hang
  btrfs: Fix metadata underflow caused by btrfs_reloc_clone_csum error
  btrfs: check if the device is flush capable
  btrfs: delete unused member nobarriers
  btrfs: scrub: Fix RAID56 recovery race condition
  btrfs: scrub: Introduce full stripe lock for RAID56
  btrfs: Use ktime_get_real_ts for root ctime
  Btrfs: handle only applicable errors returned by btrfs_get_extent
  btrfs: qgroup: Fix qgroup corruption caused by inode_cache mount option
  btrfs: use q which is already obtained from bdev_get_queue
  Btrfs: switch to div64_u64 if with a u64 divisor
  Btrfs: update scrub_parity to use u64 stripe_len
  Btrfs: enable repair during read for raid56 profile
  btrfs: use clear_page where appropriate
  ...
hifive-unleashed-5.1
Linus Torvalds 2017-05-10 08:33:17 -07:00
commit 1176032cb1
40 changed files with 1629 additions and 834 deletions

View File

@ -2926,6 +2926,8 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs.git
S: Maintained
F: Documentation/filesystems/btrfs.txt
F: fs/btrfs/
F: include/linux/btrfs*
F: include/uapi/linux/btrfs*
BTTV VIDEO4LINUX DRIVER
M: Mauro Carvalho Chehab <mchehab@s-opensource.com>

View File

@ -26,6 +26,11 @@
#include "delayed-ref.h"
#include "locking.h"
enum merge_mode {
MERGE_IDENTICAL_KEYS = 1,
MERGE_IDENTICAL_PARENTS,
};
/* Just an arbitrary number so we can be sure this happened */
#define BACKREF_FOUND_SHARED 6
@ -533,7 +538,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
* slot==nritems. In that case, go to the next leaf before we continue.
*/
if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
if (time_seq == (u64)-1)
if (time_seq == SEQ_LAST)
ret = btrfs_next_leaf(root, path);
else
ret = btrfs_next_old_leaf(root, path, time_seq);
@ -577,7 +582,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
eie = NULL;
}
next:
if (time_seq == (u64)-1)
if (time_seq == SEQ_LAST)
ret = btrfs_next_item(root, path);
else
ret = btrfs_next_old_item(root, path, time_seq);
@ -629,7 +634,7 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
if (path->search_commit_root)
root_level = btrfs_header_level(root->commit_root);
else if (time_seq == (u64)-1)
else if (time_seq == SEQ_LAST)
root_level = btrfs_header_level(root->node);
else
root_level = btrfs_old_root_level(root, time_seq);
@ -640,7 +645,7 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
}
path->lowest_level = level;
if (time_seq == (u64)-1)
if (time_seq == SEQ_LAST)
ret = btrfs_search_slot(NULL, root, &ref->key_for_search, path,
0, 0);
else
@ -809,14 +814,12 @@ static int __add_missing_keys(struct btrfs_fs_info *fs_info,
/*
* merge backrefs and adjust counts accordingly
*
* mode = 1: merge identical keys, if key is set
* FIXME: if we add more keys in __add_prelim_ref, we can merge more here.
* additionally, we could even add a key range for the blocks we
* looked into to merge even more (-> replace unresolved refs by those
* having a parent).
* mode = 2: merge identical parents
* FIXME: For MERGE_IDENTICAL_KEYS, if we add more keys in __add_prelim_ref
* then we can merge more here. Additionally, we could even add a key
* range for the blocks we looked into to merge even more (-> replace
* unresolved refs by those having a parent).
*/
static void __merge_refs(struct list_head *head, int mode)
static void __merge_refs(struct list_head *head, enum merge_mode mode)
{
struct __prelim_ref *pos1;
@ -829,7 +832,7 @@ static void __merge_refs(struct list_head *head, int mode)
if (!ref_for_same_block(ref1, ref2))
continue;
if (mode == 1) {
if (mode == MERGE_IDENTICAL_KEYS) {
if (!ref1->parent && ref2->parent)
swap(ref1, ref2);
} else {
@ -1196,7 +1199,7 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
*
* NOTE: This can return values > 0
*
* If time_seq is set to (u64)-1, it will not search delayed_refs, and behave
* If time_seq is set to SEQ_LAST, it will not search delayed_refs, and behave
* much like trans == NULL case, the difference only lies in it will not
* commit root.
* The special case is for qgroup to search roots in commit_transaction().
@ -1243,7 +1246,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
path->skip_locking = 1;
}
if (time_seq == (u64)-1)
if (time_seq == SEQ_LAST)
path->skip_locking = 1;
/*
@ -1273,9 +1276,9 @@ again:
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
if (trans && likely(trans->type != __TRANS_DUMMY) &&
time_seq != (u64)-1) {
time_seq != SEQ_LAST) {
#else
if (trans && time_seq != (u64)-1) {
if (trans && time_seq != SEQ_LAST) {
#endif
/*
* look if there are updates for this ref queued and lock the
@ -1286,7 +1289,7 @@ again:
head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
if (head) {
if (!mutex_trylock(&head->mutex)) {
atomic_inc(&head->node.refs);
refcount_inc(&head->node.refs);
spin_unlock(&delayed_refs->lock);
btrfs_release_path(path);
@ -1374,7 +1377,7 @@ again:
if (ret)
goto out;
__merge_refs(&prefs, 1);
__merge_refs(&prefs, MERGE_IDENTICAL_KEYS);
ret = __resolve_indirect_refs(fs_info, path, time_seq, &prefs,
extent_item_pos, total_refs,
@ -1382,7 +1385,7 @@ again:
if (ret)
goto out;
__merge_refs(&prefs, 2);
__merge_refs(&prefs, MERGE_IDENTICAL_PARENTS);
while (!list_empty(&prefs)) {
ref = list_first_entry(&prefs, struct __prelim_ref, list);

View File

@ -124,6 +124,13 @@ struct btrfs_inode {
*/
u64 delalloc_bytes;
/*
* Total number of bytes pending delalloc that fall within a file
* range that is either a hole or beyond EOF (and no prealloc extent
* exists in the range). This is always <= delalloc_bytes.
*/
u64 new_delalloc_bytes;
/*
* total number of bytes pending defrag, used by stat to check whether
* it needs COW.

View File

@ -44,7 +44,7 @@
struct compressed_bio {
/* number of bios pending for this compressed extent */
atomic_t pending_bios;
refcount_t pending_bios;
/* the pages with the compressed data on them */
struct page **compressed_pages;
@ -161,7 +161,7 @@ static void end_compressed_bio_read(struct bio *bio)
/* if there are more bios still pending for this compressed
* extent, just exit
*/
if (!atomic_dec_and_test(&cb->pending_bios))
if (!refcount_dec_and_test(&cb->pending_bios))
goto out;
inode = cb->inode;
@ -274,7 +274,7 @@ static void end_compressed_bio_write(struct bio *bio)
/* if there are more bios still pending for this compressed
* extent, just exit
*/
if (!atomic_dec_and_test(&cb->pending_bios))
if (!refcount_dec_and_test(&cb->pending_bios))
goto out;
/* ok, we're the last bio for this extent, step one is to
@ -342,7 +342,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
if (!cb)
return -ENOMEM;
atomic_set(&cb->pending_bios, 0);
refcount_set(&cb->pending_bios, 0);
cb->errors = 0;
cb->inode = inode;
cb->start = start;
@ -363,7 +363,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
bio->bi_private = cb;
bio->bi_end_io = end_compressed_bio_write;
atomic_inc(&cb->pending_bios);
refcount_set(&cb->pending_bios, 1);
/* create and submit bios for the compressed pages */
bytes_left = compressed_len;
@ -388,7 +388,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
* we inc the count. Otherwise, the cb might get
* freed before we're done setting it up
*/
atomic_inc(&cb->pending_bios);
refcount_inc(&cb->pending_bios);
ret = btrfs_bio_wq_end_io(fs_info, bio,
BTRFS_WQ_ENDIO_DATA);
BUG_ON(ret); /* -ENOMEM */
@ -607,7 +607,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
if (!cb)
goto out;
atomic_set(&cb->pending_bios, 0);
refcount_set(&cb->pending_bios, 0);
cb->errors = 0;
cb->inode = inode;
cb->mirror_num = mirror_num;
@ -656,7 +656,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
bio_set_op_attrs (comp_bio, REQ_OP_READ, 0);
comp_bio->bi_private = cb;
comp_bio->bi_end_io = end_compressed_bio_read;
atomic_inc(&cb->pending_bios);
refcount_set(&cb->pending_bios, 1);
for (pg_index = 0; pg_index < nr_pages; pg_index++) {
page = cb->compressed_pages[pg_index];
@ -685,7 +685,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
* we inc the count. Otherwise, the cb might get
* freed before we're done setting it up
*/
atomic_inc(&cb->pending_bios);
refcount_inc(&cb->pending_bios);
if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
ret = btrfs_lookup_bio_sums(inode, comp_bio,

View File

@ -567,7 +567,7 @@ tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
static noinline int
tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb, int dst_slot, int src_slot,
int nr_items, gfp_t flags)
int nr_items)
{
struct tree_mod_elem *tm = NULL;
struct tree_mod_elem **tm_list = NULL;
@ -578,11 +578,11 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
if (!tree_mod_need_log(fs_info, eb))
return 0;
tm_list = kcalloc(nr_items, sizeof(struct tree_mod_elem *), flags);
tm_list = kcalloc(nr_items, sizeof(struct tree_mod_elem *), GFP_NOFS);
if (!tm_list)
return -ENOMEM;
tm = kzalloc(sizeof(*tm), flags);
tm = kzalloc(sizeof(*tm), GFP_NOFS);
if (!tm) {
ret = -ENOMEM;
goto free_tms;
@ -596,7 +596,7 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
tm_list[i] = alloc_tree_mod_elem(eb, i + dst_slot,
MOD_LOG_KEY_REMOVE_WHILE_MOVING, flags);
MOD_LOG_KEY_REMOVE_WHILE_MOVING, GFP_NOFS);
if (!tm_list[i]) {
ret = -ENOMEM;
goto free_tms;
@ -663,7 +663,7 @@ __tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
static noinline int
tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
struct extent_buffer *old_root,
struct extent_buffer *new_root, gfp_t flags,
struct extent_buffer *new_root,
int log_removal)
{
struct tree_mod_elem *tm = NULL;
@ -678,14 +678,14 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
if (log_removal && btrfs_header_level(old_root) > 0) {
nritems = btrfs_header_nritems(old_root);
tm_list = kcalloc(nritems, sizeof(struct tree_mod_elem *),
flags);
GFP_NOFS);
if (!tm_list) {
ret = -ENOMEM;
goto free_tms;
}
for (i = 0; i < nritems; i++) {
tm_list[i] = alloc_tree_mod_elem(old_root, i,
MOD_LOG_KEY_REMOVE_WHILE_FREEING, flags);
MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS);
if (!tm_list[i]) {
ret = -ENOMEM;
goto free_tms;
@ -693,7 +693,7 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
}
}
tm = kzalloc(sizeof(*tm), flags);
tm = kzalloc(sizeof(*tm), GFP_NOFS);
if (!tm) {
ret = -ENOMEM;
goto free_tms;
@ -873,7 +873,7 @@ tree_mod_log_eb_move(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
{
int ret;
ret = tree_mod_log_insert_move(fs_info, dst, dst_offset, src_offset,
nr_items, GFP_NOFS);
nr_items);
BUG_ON(ret < 0);
}
@ -943,7 +943,7 @@ tree_mod_log_set_root_pointer(struct btrfs_root *root,
{
int ret;
ret = tree_mod_log_insert_root(root->fs_info, root->node,
new_root_node, GFP_NOFS, log_removal);
new_root_node, log_removal);
BUG_ON(ret < 0);
}

View File

@ -39,6 +39,7 @@
#include <linux/security.h>
#include <linux/sizes.h>
#include <linux/dynamic_debug.h>
#include <linux/refcount.h>
#include "extent_io.h"
#include "extent_map.h"
#include "async-thread.h"
@ -518,7 +519,7 @@ struct btrfs_caching_control {
struct btrfs_work work;
struct btrfs_block_group_cache *block_group;
u64 progress;
atomic_t count;
refcount_t count;
};
/* Once caching_thread() finds this much free space, it will wake up waiters. */
@ -538,6 +539,14 @@ struct btrfs_io_ctl {
unsigned check_crcs:1;
};
/*
* Tree to record all locked full stripes of a RAID5/6 block group
*/
struct btrfs_full_stripe_locks_tree {
struct rb_root root;
struct mutex lock;
};
struct btrfs_block_group_cache {
struct btrfs_key key;
struct btrfs_block_group_item item;
@ -648,6 +657,9 @@ struct btrfs_block_group_cache {
* Protected by free_space_lock.
*/
int needs_free_space;
/* Record locked full stripes for RAID5/6 block group */
struct btrfs_full_stripe_locks_tree full_stripe_locks_root;
};
/* delayed seq elem */
@ -658,6 +670,8 @@ struct seq_list {
#define SEQ_LIST_INIT(name) { .list = LIST_HEAD_INIT((name).list), .seq = 0 }
#define SEQ_LAST ((u64)-1)
enum btrfs_orphan_cleanup_state {
ORPHAN_CLEANUP_STARTED = 1,
ORPHAN_CLEANUP_DONE = 2,
@ -702,6 +716,11 @@ struct btrfs_delayed_root;
#define BTRFS_FS_BTREE_ERR 11
#define BTRFS_FS_LOG1_ERR 12
#define BTRFS_FS_LOG2_ERR 13
/*
* Indicate that a whole-filesystem exclusive operation is running
* (device replace, resize, device add/delete, balance)
*/
#define BTRFS_FS_EXCL_OP 14
struct btrfs_fs_info {
u8 fsid[BTRFS_FSID_SIZE];
@ -1066,8 +1085,6 @@ struct btrfs_fs_info {
/* device replace state */
struct btrfs_dev_replace dev_replace;
atomic_t mutually_exclusive_operation_running;
struct percpu_counter bio_counter;
wait_queue_head_t replace_wait;
@ -1220,7 +1237,7 @@ struct btrfs_root {
dev_t anon_dev;
spinlock_t root_item_lock;
atomic_t refs;
refcount_t refs;
struct mutex delalloc_mutex;
spinlock_t delalloc_lock;
@ -3646,6 +3663,12 @@ int btrfs_scrub_cancel_dev(struct btrfs_fs_info *info,
struct btrfs_device *dev);
int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
struct btrfs_scrub_progress *progress);
static inline void btrfs_init_full_stripe_locks_tree(
struct btrfs_full_stripe_locks_tree *locks_root)
{
locks_root->root = RB_ROOT;
mutex_init(&locks_root->lock);
}
/* dev-replace.c */
void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info);
@ -3670,8 +3693,7 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
struct btrfs_key *start, struct btrfs_key *end);
int btrfs_reada_wait(void *handle);
void btrfs_reada_detach(void *handle);
int btree_readahead_hook(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb, int err);
int btree_readahead_hook(struct extent_buffer *eb, int err);
static inline int is_fstree(u64 rootid)
{

View File

@ -52,7 +52,7 @@ static inline void btrfs_init_delayed_node(
{
delayed_node->root = root;
delayed_node->inode_id = inode_id;
atomic_set(&delayed_node->refs, 0);
refcount_set(&delayed_node->refs, 0);
delayed_node->ins_root = RB_ROOT;
delayed_node->del_root = RB_ROOT;
mutex_init(&delayed_node->mutex);
@ -81,7 +81,7 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
node = READ_ONCE(btrfs_inode->delayed_node);
if (node) {
atomic_inc(&node->refs);
refcount_inc(&node->refs);
return node;
}
@ -89,14 +89,14 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
if (node) {
if (btrfs_inode->delayed_node) {
atomic_inc(&node->refs); /* can be accessed */
refcount_inc(&node->refs); /* can be accessed */
BUG_ON(btrfs_inode->delayed_node != node);
spin_unlock(&root->inode_lock);
return node;
}
btrfs_inode->delayed_node = node;
/* can be accessed and cached in the inode */
atomic_add(2, &node->refs);
refcount_add(2, &node->refs);
spin_unlock(&root->inode_lock);
return node;
}
@ -125,7 +125,7 @@ again:
btrfs_init_delayed_node(node, root, ino);
/* cached in the btrfs inode and can be accessed */
atomic_add(2, &node->refs);
refcount_set(&node->refs, 2);
ret = radix_tree_preload(GFP_NOFS);
if (ret) {
@ -166,7 +166,7 @@ static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root,
} else {
list_add_tail(&node->n_list, &root->node_list);
list_add_tail(&node->p_list, &root->prepare_list);
atomic_inc(&node->refs); /* inserted into list */
refcount_inc(&node->refs); /* inserted into list */
root->nodes++;
set_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags);
}
@ -180,7 +180,7 @@ static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root,
spin_lock(&root->lock);
if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
root->nodes--;
atomic_dec(&node->refs); /* not in the list */
refcount_dec(&node->refs); /* not in the list */
list_del_init(&node->n_list);
if (!list_empty(&node->p_list))
list_del_init(&node->p_list);
@ -201,7 +201,7 @@ static struct btrfs_delayed_node *btrfs_first_delayed_node(
p = delayed_root->node_list.next;
node = list_entry(p, struct btrfs_delayed_node, n_list);
atomic_inc(&node->refs);
refcount_inc(&node->refs);
out:
spin_unlock(&delayed_root->lock);
@ -228,7 +228,7 @@ static struct btrfs_delayed_node *btrfs_next_delayed_node(
p = node->n_list.next;
next = list_entry(p, struct btrfs_delayed_node, n_list);
atomic_inc(&next->refs);
refcount_inc(&next->refs);
out:
spin_unlock(&delayed_root->lock);
@ -253,11 +253,11 @@ static void __btrfs_release_delayed_node(
btrfs_dequeue_delayed_node(delayed_root, delayed_node);
mutex_unlock(&delayed_node->mutex);
if (atomic_dec_and_test(&delayed_node->refs)) {
if (refcount_dec_and_test(&delayed_node->refs)) {
bool free = false;
struct btrfs_root *root = delayed_node->root;
spin_lock(&root->inode_lock);
if (atomic_read(&delayed_node->refs) == 0) {
if (refcount_read(&delayed_node->refs) == 0) {
radix_tree_delete(&root->delayed_nodes_tree,
delayed_node->inode_id);
free = true;
@ -286,7 +286,7 @@ static struct btrfs_delayed_node *btrfs_first_prepared_delayed_node(
p = delayed_root->prepare_list.next;
list_del_init(p);
node = list_entry(p, struct btrfs_delayed_node, p_list);
atomic_inc(&node->refs);
refcount_inc(&node->refs);
out:
spin_unlock(&delayed_root->lock);
@ -308,7 +308,7 @@ static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
item->ins_or_del = 0;
item->bytes_reserved = 0;
item->delayed_node = NULL;
atomic_set(&item->refs, 1);
refcount_set(&item->refs, 1);
}
return item;
}
@ -483,7 +483,7 @@ static void btrfs_release_delayed_item(struct btrfs_delayed_item *item)
{
if (item) {
__btrfs_remove_delayed_item(item);
if (atomic_dec_and_test(&item->refs))
if (refcount_dec_and_test(&item->refs))
kfree(item);
}
}
@ -1600,14 +1600,14 @@ bool btrfs_readdir_get_delayed_items(struct inode *inode,
mutex_lock(&delayed_node->mutex);
item = __btrfs_first_delayed_insertion_item(delayed_node);
while (item) {
atomic_inc(&item->refs);
refcount_inc(&item->refs);
list_add_tail(&item->readdir_list, ins_list);
item = __btrfs_next_delayed_item(item);
}
item = __btrfs_first_delayed_deletion_item(delayed_node);
while (item) {
atomic_inc(&item->refs);
refcount_inc(&item->refs);
list_add_tail(&item->readdir_list, del_list);
item = __btrfs_next_delayed_item(item);
}
@ -1621,7 +1621,7 @@ bool btrfs_readdir_get_delayed_items(struct inode *inode,
* insert/delete delayed items in this period. So we also needn't
* requeue or dequeue this delayed node.
*/
atomic_dec(&delayed_node->refs);
refcount_dec(&delayed_node->refs);
return true;
}
@ -1634,13 +1634,13 @@ void btrfs_readdir_put_delayed_items(struct inode *inode,
list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
list_del(&curr->readdir_list);
if (atomic_dec_and_test(&curr->refs))
if (refcount_dec_and_test(&curr->refs))
kfree(curr);
}
list_for_each_entry_safe(curr, next, del_list, readdir_list) {
list_del(&curr->readdir_list);
if (atomic_dec_and_test(&curr->refs))
if (refcount_dec_and_test(&curr->refs))
kfree(curr);
}
@ -1667,7 +1667,7 @@ int btrfs_should_delete_dir_index(struct list_head *del_list,
list_del(&curr->readdir_list);
ret = (curr->key.offset == index);
if (atomic_dec_and_test(&curr->refs))
if (refcount_dec_and_test(&curr->refs))
kfree(curr);
if (ret)
@ -1705,7 +1705,7 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
list_del(&curr->readdir_list);
if (curr->key.offset < ctx->pos) {
if (atomic_dec_and_test(&curr->refs))
if (refcount_dec_and_test(&curr->refs))
kfree(curr);
continue;
}
@ -1722,7 +1722,7 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
over = !dir_emit(ctx, name, name_len,
location.objectid, d_type);
if (atomic_dec_and_test(&curr->refs))
if (refcount_dec_and_test(&curr->refs))
kfree(curr);
if (over)
@ -1963,7 +1963,7 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
inode_id = delayed_nodes[n - 1]->inode_id + 1;
for (i = 0; i < n; i++)
atomic_inc(&delayed_nodes[i]->refs);
refcount_inc(&delayed_nodes[i]->refs);
spin_unlock(&root->inode_lock);
for (i = 0; i < n; i++) {

View File

@ -26,7 +26,7 @@
#include <linux/list.h>
#include <linux/wait.h>
#include <linux/atomic.h>
#include <linux/refcount.h>
#include "ctree.h"
/* types of the delayed item */
@ -67,7 +67,7 @@ struct btrfs_delayed_node {
struct rb_root del_root;
struct mutex mutex;
struct btrfs_inode_item inode_item;
atomic_t refs;
refcount_t refs;
u64 index_cnt;
unsigned long flags;
int count;
@ -80,7 +80,7 @@ struct btrfs_delayed_item {
struct list_head readdir_list; /* used for readdir items */
u64 bytes_reserved;
struct btrfs_delayed_node *delayed_node;
atomic_t refs;
refcount_t refs;
int ins_or_del;
u32 data_len;
char data[0];

View File

@ -164,7 +164,7 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
if (mutex_trylock(&head->mutex))
return 0;
atomic_inc(&head->node.refs);
refcount_inc(&head->node.refs);
spin_unlock(&delayed_refs->lock);
mutex_lock(&head->mutex);
@ -590,7 +590,7 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
delayed_refs = &trans->transaction->delayed_refs;
/* first set the basic ref node struct up */
atomic_set(&ref->refs, 1);
refcount_set(&ref->refs, 1);
ref->bytenr = bytenr;
ref->num_bytes = num_bytes;
ref->ref_mod = count_mod;
@ -682,7 +682,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
delayed_refs = &trans->transaction->delayed_refs;
/* first set the basic ref node struct up */
atomic_set(&ref->refs, 1);
refcount_set(&ref->refs, 1);
ref->bytenr = bytenr;
ref->num_bytes = num_bytes;
ref->ref_mod = 1;
@ -739,7 +739,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
seq = atomic64_read(&fs_info->tree_mod_seq);
/* first set the basic ref node struct up */
atomic_set(&ref->refs, 1);
refcount_set(&ref->refs, 1);
ref->bytenr = bytenr;
ref->num_bytes = num_bytes;
ref->ref_mod = 1;

View File

@ -18,6 +18,8 @@
#ifndef __DELAYED_REF__
#define __DELAYED_REF__
#include <linux/refcount.h>
/* these are the possible values of struct btrfs_delayed_ref_node->action */
#define BTRFS_ADD_DELAYED_REF 1 /* add one backref to the tree */
#define BTRFS_DROP_DELAYED_REF 2 /* delete one backref from the tree */
@ -53,7 +55,7 @@ struct btrfs_delayed_ref_node {
u64 seq;
/* ref count on this data structure */
atomic_t refs;
refcount_t refs;
/*
* how many refs is this entry adding or deleting. For
@ -220,8 +222,8 @@ btrfs_free_delayed_extent_op(struct btrfs_delayed_extent_op *op)
static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
{
WARN_ON(atomic_read(&ref->refs) == 0);
if (atomic_dec_and_test(&ref->refs)) {
WARN_ON(refcount_read(&ref->refs) == 0);
if (refcount_dec_and_test(&ref->refs)) {
WARN_ON(ref->in_tree);
switch (ref->type) {
case BTRFS_TREE_BLOCK_REF_KEY:

View File

@ -546,8 +546,10 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
mutex_unlock(&fs_info->chunk_mutex);
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
mutex_unlock(&uuid_mutex);
btrfs_rm_dev_replace_blocked(fs_info);
if (tgt_device)
btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device);
btrfs_rm_dev_replace_unblocked(fs_info);
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
return scrub_ret;
@ -665,7 +667,7 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
srcdev = dev_replace->srcdev;
args->status.progress_1000 = div_u64(dev_replace->cursor_left,
args->status.progress_1000 = div64_u64(dev_replace->cursor_left,
div_u64(btrfs_device_get_total_bytes(srcdev), 1000));
break;
}
@ -784,8 +786,7 @@ int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info)
}
btrfs_dev_replace_unlock(dev_replace, 1);
WARN_ON(atomic_xchg(
&fs_info->mutually_exclusive_operation_running, 1));
WARN_ON(test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags));
task = kthread_run(btrfs_dev_replace_kthread, fs_info, "btrfs-devrepl");
return PTR_ERR_OR_ZERO(task);
}
@ -814,7 +815,7 @@ static int btrfs_dev_replace_kthread(void *data)
(unsigned int)progress);
}
btrfs_dev_replace_continue_on_mount(fs_info);
atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
return 0;
}

View File

@ -762,7 +762,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
err:
if (reads_done &&
test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
btree_readahead_hook(fs_info, eb, ret);
btree_readahead_hook(eb, ret);
if (ret) {
/*
@ -787,7 +787,7 @@ static int btree_io_failed_hook(struct page *page, int failed_mirror)
eb->read_mirror = failed_mirror;
atomic_dec(&eb->io_pages);
if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
btree_readahead_hook(eb->fs_info, eb, -EIO);
btree_readahead_hook(eb, -EIO);
return -EIO; /* we fixed nothing */
}
@ -1340,7 +1340,7 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
atomic_set(&root->log_writers, 0);
atomic_set(&root->log_batch, 0);
atomic_set(&root->orphan_inodes, 0);
atomic_set(&root->refs, 1);
refcount_set(&root->refs, 1);
atomic_set(&root->will_be_snapshoted, 0);
atomic64_set(&root->qgroup_meta_rsv, 0);
root->log_transid = 0;
@ -3497,10 +3497,11 @@ static void btrfs_end_empty_barrier(struct bio *bio)
*/
static int write_dev_flush(struct btrfs_device *device, int wait)
{
struct request_queue *q = bdev_get_queue(device->bdev);
struct bio *bio;
int ret = 0;
if (device->nobarriers)
if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags))
return 0;
if (wait) {
@ -4321,7 +4322,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
head = rb_entry(node, struct btrfs_delayed_ref_head,
href_node);
if (!mutex_trylock(&head->mutex)) {
atomic_inc(&head->node.refs);
refcount_inc(&head->node.refs);
spin_unlock(&delayed_refs->lock);
mutex_lock(&head->mutex);
@ -4593,7 +4594,7 @@ static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info)
t = list_first_entry(&fs_info->trans_list,
struct btrfs_transaction, list);
if (t->state >= TRANS_STATE_COMMIT_START) {
atomic_inc(&t->use_count);
refcount_inc(&t->use_count);
spin_unlock(&fs_info->trans_lock);
btrfs_wait_for_commit(fs_info, t->transid);
btrfs_put_transaction(t);

View File

@ -101,14 +101,14 @@ struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info);
*/
static inline struct btrfs_root *btrfs_grab_fs_root(struct btrfs_root *root)
{
if (atomic_inc_not_zero(&root->refs))
if (refcount_inc_not_zero(&root->refs))
return root;
return NULL;
}
static inline void btrfs_put_fs_root(struct btrfs_root *root)
{
if (atomic_dec_and_test(&root->refs))
if (refcount_dec_and_test(&root->refs))
kfree(root);
}

View File

@ -131,6 +131,16 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
if (atomic_dec_and_test(&cache->count)) {
WARN_ON(cache->pinned > 0);
WARN_ON(cache->reserved > 0);
/*
* If not empty, someone is still holding mutex of
* full_stripe_lock, which can only be released by caller.
* And it will definitely cause use-after-free when caller
* tries to release full stripe lock.
*
* No better way to resolve, but only to warn.
*/
WARN_ON(!RB_EMPTY_ROOT(&cache->full_stripe_locks_root.root));
kfree(cache->free_space_ctl);
kfree(cache);
}
@ -316,14 +326,14 @@ get_caching_control(struct btrfs_block_group_cache *cache)
}
ctl = cache->caching_ctl;
atomic_inc(&ctl->count);
refcount_inc(&ctl->count);
spin_unlock(&cache->lock);
return ctl;
}
static void put_caching_control(struct btrfs_caching_control *ctl)
{
if (atomic_dec_and_test(&ctl->count))
if (refcount_dec_and_test(&ctl->count))
kfree(ctl);
}
@ -599,7 +609,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
init_waitqueue_head(&caching_ctl->wait);
caching_ctl->block_group = cache;
caching_ctl->progress = cache->key.objectid;
atomic_set(&caching_ctl->count, 1);
refcount_set(&caching_ctl->count, 1);
btrfs_init_work(&caching_ctl->work, btrfs_cache_helper,
caching_thread, NULL, NULL);
@ -620,7 +630,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
struct btrfs_caching_control *ctl;
ctl = cache->caching_ctl;
atomic_inc(&ctl->count);
refcount_inc(&ctl->count);
prepare_to_wait(&ctl->wait, &wait, TASK_UNINTERRUPTIBLE);
spin_unlock(&cache->lock);
@ -707,7 +717,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
}
down_write(&fs_info->commit_root_sem);
atomic_inc(&caching_ctl->count);
refcount_inc(&caching_ctl->count);
list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
up_write(&fs_info->commit_root_sem);
@ -892,7 +902,7 @@ search_again:
head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
if (head) {
if (!mutex_trylock(&head->mutex)) {
atomic_inc(&head->node.refs);
refcount_inc(&head->node.refs);
spin_unlock(&delayed_refs->lock);
btrfs_release_path(path);
@ -2980,7 +2990,7 @@ again:
struct btrfs_delayed_ref_node *ref;
ref = &head->node;
atomic_inc(&ref->refs);
refcount_inc(&ref->refs);
spin_unlock(&delayed_refs->lock);
/*
@ -3003,7 +3013,6 @@ again:
goto again;
}
out:
assert_qgroups_uptodate(trans);
trans->can_flush_pending_bgs = can_flush_pending_bgs;
return 0;
}
@ -3057,7 +3066,7 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
}
if (!mutex_trylock(&head->mutex)) {
atomic_inc(&head->node.refs);
refcount_inc(&head->node.refs);
spin_unlock(&delayed_refs->lock);
btrfs_release_path(path);
@ -3443,7 +3452,8 @@ again:
/*
* don't bother trying to write stuff out _if_
* a) we're not cached,
* b) we're with nospace_cache mount option.
* b) we're with nospace_cache mount option,
* c) we're with v2 space_cache (FREE_SPACE_TREE).
*/
dcs = BTRFS_DC_WRITTEN;
spin_unlock(&block_group->lock);
@ -9917,6 +9927,7 @@ btrfs_create_block_group_cache(struct btrfs_fs_info *fs_info,
btrfs_init_free_space_ctl(cache);
atomic_set(&cache->trimming, 0);
mutex_init(&cache->free_space_lock);
btrfs_init_full_stripe_locks_tree(&cache->full_stripe_locks_root);
return cache;
}
@ -10416,7 +10427,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
&fs_info->caching_block_groups, list)
if (ctl->block_group == block_group) {
caching_ctl = ctl;
atomic_inc(&caching_ctl->count);
refcount_inc(&caching_ctl->count);
break;
}
}
@ -10850,7 +10861,7 @@ static int btrfs_trim_free_extents(struct btrfs_device *device,
spin_lock(&fs_info->trans_lock);
trans = fs_info->running_transaction;
if (trans)
atomic_inc(&trans->use_count);
refcount_inc(&trans->use_count);
spin_unlock(&fs_info->trans_lock);
ret = find_free_dev_extent_start(trans, device, minlen, start,

View File

@ -68,7 +68,7 @@ void btrfs_leak_debug_check(void)
pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n",
state->start, state->end, state->state,
extent_state_in_tree(state),
atomic_read(&state->refs));
refcount_read(&state->refs));
list_del(&state->leak_list);
kmem_cache_free(extent_state_cache, state);
}
@ -238,7 +238,7 @@ static struct extent_state *alloc_extent_state(gfp_t mask)
state->failrec = NULL;
RB_CLEAR_NODE(&state->rb_node);
btrfs_leak_debug_add(&state->leak_list, &states);
atomic_set(&state->refs, 1);
refcount_set(&state->refs, 1);
init_waitqueue_head(&state->wq);
trace_alloc_extent_state(state, mask, _RET_IP_);
return state;
@ -248,7 +248,7 @@ void free_extent_state(struct extent_state *state)
{
if (!state)
return;
if (atomic_dec_and_test(&state->refs)) {
if (refcount_dec_and_test(&state->refs)) {
WARN_ON(extent_state_in_tree(state));
btrfs_leak_debug_del(&state->leak_list);
trace_free_extent_state(state, _RET_IP_);
@ -641,7 +641,7 @@ again:
if (cached && extent_state_in_tree(cached) &&
cached->start <= start && cached->end > start) {
if (clear)
atomic_dec(&cached->refs);
refcount_dec(&cached->refs);
state = cached;
goto hit_next;
}
@ -793,7 +793,7 @@ process_node:
if (state->state & bits) {
start = state->start;
atomic_inc(&state->refs);
refcount_inc(&state->refs);
wait_on_state(tree, state);
free_extent_state(state);
goto again;
@ -834,7 +834,7 @@ static void cache_state_if_flags(struct extent_state *state,
if (cached_ptr && !(*cached_ptr)) {
if (!flags || (state->state & flags)) {
*cached_ptr = state;
atomic_inc(&state->refs);
refcount_inc(&state->refs);
}
}
}
@ -1538,7 +1538,7 @@ static noinline u64 find_delalloc_range(struct extent_io_tree *tree,
if (!found) {
*start = state->start;
*cached_state = state;
atomic_inc(&state->refs);
refcount_inc(&state->refs);
}
found++;
*end = state->end;
@ -2004,16 +2004,11 @@ int repair_io_failure(struct btrfs_inode *inode, u64 start, u64 length,
u64 map_length = 0;
u64 sector;
struct btrfs_bio *bbio = NULL;
struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
int ret;
ASSERT(!(fs_info->sb->s_flags & MS_RDONLY));
BUG_ON(!mirror_num);
/* we can't repair anything in raid56 yet */
if (btrfs_is_parity_mirror(map_tree, logical, length, mirror_num))
return 0;
bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
if (!bio)
return -EIO;
@ -2026,17 +2021,35 @@ int repair_io_failure(struct btrfs_inode *inode, u64 start, u64 length,
* read repair operation.
*/
btrfs_bio_counter_inc_blocked(fs_info);
ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical,
&map_length, &bbio, mirror_num);
if (ret) {
btrfs_bio_counter_dec(fs_info);
bio_put(bio);
return -EIO;
if (btrfs_is_parity_mirror(fs_info, logical, length, mirror_num)) {
/*
* Note that we don't use BTRFS_MAP_WRITE because it's supposed
* to update all raid stripes, but here we just want to correct
* bad stripe, thus BTRFS_MAP_READ is abused to only get the bad
* stripe's dev and sector.
*/
ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical,
&map_length, &bbio, 0);
if (ret) {
btrfs_bio_counter_dec(fs_info);
bio_put(bio);
return -EIO;
}
ASSERT(bbio->mirror_num == 1);
} else {
ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical,
&map_length, &bbio, mirror_num);
if (ret) {
btrfs_bio_counter_dec(fs_info);
bio_put(bio);
return -EIO;
}
BUG_ON(mirror_num != bbio->mirror_num);
}
BUG_ON(mirror_num != bbio->mirror_num);
sector = bbio->stripes[mirror_num-1].physical >> 9;
sector = bbio->stripes[bbio->mirror_num - 1].physical >> 9;
bio->bi_iter.bi_sector = sector;
dev = bbio->stripes[mirror_num-1].dev;
dev = bbio->stripes[bbio->mirror_num - 1].dev;
btrfs_put_bbio(bbio);
if (!dev || !dev->bdev || !dev->writeable) {
btrfs_bio_counter_dec(fs_info);
@ -2859,7 +2872,7 @@ __get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
em = *em_cached;
if (extent_map_in_tree(em) && start >= em->start &&
start < extent_map_end(em)) {
atomic_inc(&em->refs);
refcount_inc(&em->refs);
return em;
}
@ -2870,7 +2883,7 @@ __get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
em = get_extent(BTRFS_I(inode), page, pg_offset, start, len, 0);
if (em_cached && !IS_ERR_OR_NULL(em)) {
BUG_ON(*em_cached);
atomic_inc(&em->refs);
refcount_inc(&em->refs);
*em_cached = em;
}
return em;

View File

@ -2,6 +2,7 @@
#define __EXTENTIO__
#include <linux/rbtree.h>
#include <linux/refcount.h>
#include "ulist.h"
/* bits for the extent state */
@ -14,14 +15,17 @@
#define EXTENT_DEFRAG (1U << 6)
#define EXTENT_BOUNDARY (1U << 9)
#define EXTENT_NODATASUM (1U << 10)
#define EXTENT_DO_ACCOUNTING (1U << 11)
#define EXTENT_CLEAR_META_RESV (1U << 11)
#define EXTENT_FIRST_DELALLOC (1U << 12)
#define EXTENT_NEED_WAIT (1U << 13)
#define EXTENT_DAMAGED (1U << 14)
#define EXTENT_NORESERVE (1U << 15)
#define EXTENT_QGROUP_RESERVED (1U << 16)
#define EXTENT_CLEAR_DATA_RESV (1U << 17)
#define EXTENT_DELALLOC_NEW (1U << 18)
#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
#define EXTENT_DO_ACCOUNTING (EXTENT_CLEAR_META_RESV | \
EXTENT_CLEAR_DATA_RESV)
#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
/*
@ -143,7 +147,7 @@ struct extent_state {
/* ADD NEW ELEMENTS AFTER THIS */
wait_queue_head_t wq;
atomic_t refs;
refcount_t refs;
unsigned state;
struct io_failure_record *failrec;

View File

@ -55,7 +55,7 @@ struct extent_map *alloc_extent_map(void)
em->flags = 0;
em->compress_type = BTRFS_COMPRESS_NONE;
em->generation = 0;
atomic_set(&em->refs, 1);
refcount_set(&em->refs, 1);
INIT_LIST_HEAD(&em->list);
return em;
}
@ -71,8 +71,8 @@ void free_extent_map(struct extent_map *em)
{
if (!em)
return;
WARN_ON(atomic_read(&em->refs) == 0);
if (atomic_dec_and_test(&em->refs)) {
WARN_ON(refcount_read(&em->refs) == 0);
if (refcount_dec_and_test(&em->refs)) {
WARN_ON(extent_map_in_tree(em));
WARN_ON(!list_empty(&em->list));
if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags))
@ -322,7 +322,7 @@ static inline void setup_extent_mapping(struct extent_map_tree *tree,
struct extent_map *em,
int modified)
{
atomic_inc(&em->refs);
refcount_inc(&em->refs);
em->mod_start = em->start;
em->mod_len = em->len;
@ -381,7 +381,7 @@ __lookup_extent_mapping(struct extent_map_tree *tree,
if (strict && !(end > em->start && start < extent_map_end(em)))
return NULL;
atomic_inc(&em->refs);
refcount_inc(&em->refs);
return em;
}

View File

@ -2,6 +2,7 @@
#define __EXTENTMAP__
#include <linux/rbtree.h>
#include <linux/refcount.h>
#define EXTENT_MAP_LAST_BYTE ((u64)-4)
#define EXTENT_MAP_HOLE ((u64)-3)
@ -41,7 +42,7 @@ struct extent_map {
*/
struct map_lookup *map_lookup;
};
atomic_t refs;
refcount_t refs;
unsigned int compress_type;
struct list_head list;
};

View File

@ -1404,6 +1404,47 @@ fail:
}
static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode,
const u64 start,
const u64 len,
struct extent_state **cached_state)
{
u64 search_start = start;
const u64 end = start + len - 1;
while (search_start < end) {
const u64 search_len = end - search_start + 1;
struct extent_map *em;
u64 em_len;
int ret = 0;
em = btrfs_get_extent(inode, NULL, 0, search_start,
search_len, 0);
if (IS_ERR(em))
return PTR_ERR(em);
if (em->block_start != EXTENT_MAP_HOLE)
goto next;
em_len = em->len;
if (em->start < search_start)
em_len -= search_start - em->start;
if (em_len > search_len)
em_len = search_len;
ret = set_extent_bit(&inode->io_tree, search_start,
search_start + em_len - 1,
EXTENT_DELALLOC_NEW,
NULL, cached_state, GFP_NOFS);
next:
search_start = extent_map_end(em);
free_extent_map(em);
if (ret)
return ret;
}
return 0;
}
/*
* This function locks the extent and properly waits for data=ordered extents
* to finish before allowing the pages to be modified if need.
@ -1432,8 +1473,11 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
+ round_up(pos + write_bytes - start_pos,
fs_info->sectorsize) - 1;
if (start_pos < inode->vfs_inode.i_size) {
if (start_pos < inode->vfs_inode.i_size ||
(inode->flags & BTRFS_INODE_PREALLOC)) {
struct btrfs_ordered_extent *ordered;
unsigned int clear_bits;
lock_extent_bits(&inode->io_tree, start_pos, last_pos,
cached_state);
ordered = btrfs_lookup_ordered_range(inode, start_pos,
@ -1454,11 +1498,19 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
}
if (ordered)
btrfs_put_ordered_extent(ordered);
ret = btrfs_find_new_delalloc_bytes(inode, start_pos,
last_pos - start_pos + 1,
cached_state);
clear_bits = EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG;
if (ret)
clear_bits |= EXTENT_DELALLOC_NEW | EXTENT_LOCKED;
clear_extent_bit(&inode->io_tree, start_pos,
last_pos, EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
0, 0, cached_state, GFP_NOFS);
last_pos, clear_bits,
(clear_bits & EXTENT_LOCKED) ? 1 : 0,
0, cached_state, GFP_NOFS);
if (ret)
return ret;
*lockstart = start_pos;
*lockend = last_pos;
ret = 1;
@ -2342,13 +2394,8 @@ static int find_first_non_hole(struct inode *inode, u64 *start, u64 *len)
int ret = 0;
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, *start, *len, 0);
if (IS_ERR_OR_NULL(em)) {
if (!em)
ret = -ENOMEM;
else
ret = PTR_ERR(em);
return ret;
}
if (IS_ERR(em))
return PTR_ERR(em);
/* Hole or vacuum extent(only exists in no-hole mode) */
if (em->block_start == EXTENT_MAP_HOLE) {
@ -2835,11 +2882,8 @@ static long btrfs_fallocate(struct file *file, int mode,
while (1) {
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset,
alloc_end - cur_offset, 0);
if (IS_ERR_OR_NULL(em)) {
if (!em)
ret = -ENOMEM;
else
ret = PTR_ERR(em);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
break;
}
last_byte = min(extent_map_end(em), alloc_end);
@ -2856,8 +2900,10 @@ static long btrfs_fallocate(struct file *file, int mode,
}
ret = btrfs_qgroup_reserve_data(inode, cur_offset,
last_byte - cur_offset);
if (ret < 0)
if (ret < 0) {
free_extent_map(em);
break;
}
} else {
/*
* Do not need to reserve unwritten extent for this

View File

@ -355,7 +355,7 @@ static void io_ctl_map_page(struct btrfs_io_ctl *io_ctl, int clear)
io_ctl->orig = io_ctl->cur;
io_ctl->size = PAGE_SIZE;
if (clear)
memset(io_ctl->cur, 0, PAGE_SIZE);
clear_page(io_ctl->cur);
}
static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl)

View File

@ -115,6 +115,31 @@ static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len,
u64 ram_bytes, int compress_type,
int type);
static void __endio_write_update_ordered(struct inode *inode,
const u64 offset, const u64 bytes,
const bool uptodate);
/*
* Cleanup all submitted ordered extents in specified range to handle errors
* from the fill_dellaloc() callback.
*
* NOTE: caller must ensure that when an error happens, it can not call
* extent_clear_unlock_delalloc() to clear both the bits EXTENT_DO_ACCOUNTING
* and EXTENT_DELALLOC simultaneously, because that causes the reserved metadata
* to be released, which we want to happen only when finishing the ordered
* extent (btrfs_finish_ordered_io()). Also note that the caller of the
* fill_delalloc() callback already does proper cleanup for the first page of
* the range, that is, it invokes the callback writepage_end_io_hook() for the
* range of the first page.
*/
static inline void btrfs_cleanup_ordered_extents(struct inode *inode,
const u64 offset,
const u64 bytes)
{
return __endio_write_update_ordered(inode, offset + PAGE_SIZE,
bytes - PAGE_SIZE, false);
}
static int btrfs_dirty_inode(struct inode *inode);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
@ -547,7 +572,7 @@ cont:
}
if (ret <= 0) {
unsigned long clear_flags = EXTENT_DELALLOC |
EXTENT_DEFRAG;
EXTENT_DELALLOC_NEW | EXTENT_DEFRAG;
unsigned long page_error_op;
clear_flags |= (ret < 0) ? EXTENT_DO_ACCOUNTING : 0;
@ -565,8 +590,10 @@ cont:
PAGE_SET_WRITEBACK |
page_error_op |
PAGE_END_WRITEBACK);
btrfs_free_reserved_data_space_noquota(inode, start,
end - start + 1);
if (ret == 0)
btrfs_free_reserved_data_space_noquota(inode,
start,
end - start + 1);
goto free_pages_out;
}
}
@ -852,6 +879,7 @@ out_free:
async_extent->start +
async_extent->ram_size - 1,
NULL, EXTENT_LOCKED | EXTENT_DELALLOC |
EXTENT_DELALLOC_NEW |
EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING,
PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK |
@ -918,10 +946,13 @@ static noinline int cow_file_range(struct inode *inode,
u64 num_bytes;
unsigned long ram_size;
u64 disk_num_bytes;
u64 cur_alloc_size;
u64 cur_alloc_size = 0;
u64 blocksize = fs_info->sectorsize;
struct btrfs_key ins;
struct extent_map *em;
unsigned clear_bits;
unsigned long page_ops;
bool extent_reserved = false;
int ret = 0;
if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
@ -944,6 +975,7 @@ static noinline int cow_file_range(struct inode *inode,
extent_clear_unlock_delalloc(inode, start, end,
delalloc_end, NULL,
EXTENT_LOCKED | EXTENT_DELALLOC |
EXTENT_DELALLOC_NEW |
EXTENT_DEFRAG, PAGE_UNLOCK |
PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
PAGE_END_WRITEBACK);
@ -966,14 +998,14 @@ static noinline int cow_file_range(struct inode *inode,
start + num_bytes - 1, 0);
while (disk_num_bytes > 0) {
unsigned long op;
cur_alloc_size = disk_num_bytes;
ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
fs_info->sectorsize, 0, alloc_hint,
&ins, 1, 1);
if (ret < 0)
goto out_unlock;
cur_alloc_size = ins.offset;
extent_reserved = true;
ram_size = ins.offset;
em = create_io_em(inode, start, ins.offset, /* len */
@ -988,7 +1020,6 @@ static noinline int cow_file_range(struct inode *inode,
goto out_reserve;
free_extent_map(em);
cur_alloc_size = ins.offset;
ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
ram_size, cur_alloc_size, 0);
if (ret)
@ -998,15 +1029,24 @@ static noinline int cow_file_range(struct inode *inode,
BTRFS_DATA_RELOC_TREE_OBJECTID) {
ret = btrfs_reloc_clone_csums(inode, start,
cur_alloc_size);
/*
* Only drop cache here, and process as normal.
*
* We must not allow extent_clear_unlock_delalloc()
* at out_unlock label to free meta of this ordered
* extent, as its meta should be freed by
* btrfs_finish_ordered_io().
*
* So we must continue until @start is increased to
* skip current ordered extent.
*/
if (ret)
goto out_drop_extent_cache;
btrfs_drop_extent_cache(BTRFS_I(inode), start,
start + ram_size - 1, 0);
}
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
if (disk_num_bytes < cur_alloc_size)
break;
/* we're not doing compressed IO, don't unlock the first
* page (which the caller expects to stay locked), don't
* clear any dirty bits and don't set any writeback bits
@ -1014,18 +1054,30 @@ static noinline int cow_file_range(struct inode *inode,
* Do set the Private2 bit so we know this page was properly
* setup for writepage
*/
op = unlock ? PAGE_UNLOCK : 0;
op |= PAGE_SET_PRIVATE2;
page_ops = unlock ? PAGE_UNLOCK : 0;
page_ops |= PAGE_SET_PRIVATE2;
extent_clear_unlock_delalloc(inode, start,
start + ram_size - 1,
delalloc_end, locked_page,
EXTENT_LOCKED | EXTENT_DELALLOC,
op);
disk_num_bytes -= cur_alloc_size;
page_ops);
if (disk_num_bytes < cur_alloc_size)
disk_num_bytes = 0;
else
disk_num_bytes -= cur_alloc_size;
num_bytes -= cur_alloc_size;
alloc_hint = ins.objectid + ins.offset;
start += cur_alloc_size;
extent_reserved = false;
/*
* btrfs_reloc_clone_csums() error, since start is increased
* extent_clear_unlock_delalloc() at out_unlock label won't
* free metadata of current ordered extent, we're OK to exit.
*/
if (ret)
goto out_unlock;
}
out:
return ret;
@ -1036,12 +1088,35 @@ out_reserve:
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
out_unlock:
clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
EXTENT_DEFRAG | EXTENT_CLEAR_META_RESV;
page_ops = PAGE_UNLOCK | PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
PAGE_END_WRITEBACK;
/*
* If we reserved an extent for our delalloc range (or a subrange) and
* failed to create the respective ordered extent, then it means that
* when we reserved the extent we decremented the extent's size from
* the data space_info's bytes_may_use counter and incremented the
* space_info's bytes_reserved counter by the same amount. We must make
* sure extent_clear_unlock_delalloc() does not try to decrement again
* the data space_info's bytes_may_use counter, therefore we do not pass
* it the flag EXTENT_CLEAR_DATA_RESV.
*/
if (extent_reserved) {
extent_clear_unlock_delalloc(inode, start,
start + cur_alloc_size,
start + cur_alloc_size,
locked_page,
clear_bits,
page_ops);
start += cur_alloc_size;
if (start >= end)
goto out;
}
extent_clear_unlock_delalloc(inode, start, end, delalloc_end,
locked_page,
EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
EXTENT_DELALLOC | EXTENT_DEFRAG,
PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK);
clear_bits | EXTENT_CLEAR_DATA_RESV,
page_ops);
goto out;
}
@ -1414,15 +1489,14 @@ out_check:
BUG_ON(ret); /* -ENOMEM */
if (root->root_key.objectid ==
BTRFS_DATA_RELOC_TREE_OBJECTID) {
BTRFS_DATA_RELOC_TREE_OBJECTID)
/*
* Error handled later, as we must prevent
* extent_clear_unlock_delalloc() in error handler
* from freeing metadata of created ordered extent.
*/
ret = btrfs_reloc_clone_csums(inode, cur_offset,
num_bytes);
if (ret) {
if (!nolock && nocow)
btrfs_end_write_no_snapshoting(root);
goto error;
}
}
extent_clear_unlock_delalloc(inode, cur_offset,
cur_offset + num_bytes - 1, end,
@ -1434,6 +1508,14 @@ out_check:
if (!nolock && nocow)
btrfs_end_write_no_snapshoting(root);
cur_offset = extent_end;
/*
* btrfs_reloc_clone_csums() error, now we're OK to call error
* handler, as metadata for created ordered extent will only
* be freed by btrfs_finish_ordered_io().
*/
if (ret)
goto error;
if (cur_offset > end)
break;
}
@ -1509,6 +1591,8 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
ret = cow_file_range_async(inode, locked_page, start, end,
page_started, nr_written);
}
if (ret)
btrfs_cleanup_ordered_extents(inode, start, end - start + 1);
return ret;
}
@ -1693,6 +1777,14 @@ static void btrfs_set_bit_hook(struct inode *inode,
btrfs_add_delalloc_inodes(root, inode);
spin_unlock(&BTRFS_I(inode)->lock);
}
if (!(state->state & EXTENT_DELALLOC_NEW) &&
(*bits & EXTENT_DELALLOC_NEW)) {
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->new_delalloc_bytes += state->end + 1 -
state->start;
spin_unlock(&BTRFS_I(inode)->lock);
}
}
/*
@ -1722,7 +1814,7 @@ static void btrfs_clear_bit_hook(struct btrfs_inode *inode,
if (*bits & EXTENT_FIRST_DELALLOC) {
*bits &= ~EXTENT_FIRST_DELALLOC;
} else if (!(*bits & EXTENT_DO_ACCOUNTING)) {
} else if (!(*bits & EXTENT_CLEAR_META_RESV)) {
spin_lock(&inode->lock);
inode->outstanding_extents -= num_extents;
spin_unlock(&inode->lock);
@ -1733,7 +1825,7 @@ static void btrfs_clear_bit_hook(struct btrfs_inode *inode,
* don't need to call dellalloc_release_metadata if there is an
* error.
*/
if (*bits & EXTENT_DO_ACCOUNTING &&
if (*bits & EXTENT_CLEAR_META_RESV &&
root != fs_info->tree_root)
btrfs_delalloc_release_metadata(inode, len);
@ -1741,10 +1833,9 @@ static void btrfs_clear_bit_hook(struct btrfs_inode *inode,
if (btrfs_is_testing(fs_info))
return;
if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
&& do_list && !(state->state & EXTENT_NORESERVE)
&& (*bits & (EXTENT_DO_ACCOUNTING |
EXTENT_CLEAR_DATA_RESV)))
if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID &&
do_list && !(state->state & EXTENT_NORESERVE) &&
(*bits & EXTENT_CLEAR_DATA_RESV))
btrfs_free_reserved_data_space_noquota(
&inode->vfs_inode,
state->start, len);
@ -1759,6 +1850,14 @@ static void btrfs_clear_bit_hook(struct btrfs_inode *inode,
btrfs_del_delalloc_inode(root, inode);
spin_unlock(&inode->lock);
}
if ((state->state & EXTENT_DELALLOC_NEW) &&
(*bits & EXTENT_DELALLOC_NEW)) {
spin_lock(&inode->lock);
ASSERT(inode->new_delalloc_bytes >= len);
inode->new_delalloc_bytes -= len;
spin_unlock(&inode->lock);
}
}
/*
@ -2791,6 +2890,13 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
u64 logical_len = ordered_extent->len;
bool nolock;
bool truncated = false;
bool range_locked = false;
bool clear_new_delalloc_bytes = false;
if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
!test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags) &&
!test_bit(BTRFS_ORDERED_DIRECT, &ordered_extent->flags))
clear_new_delalloc_bytes = true;
nolock = btrfs_is_free_space_inode(BTRFS_I(inode));
@ -2839,6 +2945,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
goto out;
}
range_locked = true;
lock_extent_bits(io_tree, ordered_extent->file_offset,
ordered_extent->file_offset + ordered_extent->len - 1,
&cached_state);
@ -2864,7 +2971,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
trans = NULL;
goto out_unlock;
goto out;
}
trans->block_rsv = &fs_info->delalloc_block_rsv;
@ -2896,7 +3003,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
trans->transid);
if (ret < 0) {
btrfs_abort_transaction(trans, ret);
goto out_unlock;
goto out;
}
add_pending_csums(trans, inode, &ordered_extent->list);
@ -2905,14 +3012,26 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
ret = btrfs_update_inode_fallback(trans, root, inode);
if (ret) { /* -ENOMEM or corruption */
btrfs_abort_transaction(trans, ret);
goto out_unlock;
goto out;
}
ret = 0;
out_unlock:
unlock_extent_cached(io_tree, ordered_extent->file_offset,
ordered_extent->file_offset +
ordered_extent->len - 1, &cached_state, GFP_NOFS);
out:
if (range_locked || clear_new_delalloc_bytes) {
unsigned int clear_bits = 0;
if (range_locked)
clear_bits |= EXTENT_LOCKED;
if (clear_new_delalloc_bytes)
clear_bits |= EXTENT_DELALLOC_NEW;
clear_extent_bit(&BTRFS_I(inode)->io_tree,
ordered_extent->file_offset,
ordered_extent->file_offset +
ordered_extent->len - 1,
clear_bits,
(clear_bits & EXTENT_LOCKED) ? 1 : 0,
0, &cached_state, GFP_NOFS);
}
if (root != fs_info->tree_root)
btrfs_delalloc_release_metadata(BTRFS_I(inode),
ordered_extent->len);
@ -4401,9 +4520,17 @@ search_again:
if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
item_end +=
btrfs_file_extent_num_bytes(leaf, fi);
trace_btrfs_truncate_show_fi_regular(
BTRFS_I(inode), leaf, fi,
found_key.offset);
} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
item_end += btrfs_file_extent_inline_len(leaf,
path->slots[0], fi);
trace_btrfs_truncate_show_fi_inline(
BTRFS_I(inode), leaf, fi, path->slots[0],
found_key.offset);
}
item_end--;
}
@ -4603,13 +4730,6 @@ error:
btrfs_free_path(path);
if (err == 0) {
/* only inline file may have last_size != new_size */
if (new_size >= fs_info->sectorsize ||
new_size > fs_info->max_inline)
ASSERT(last_size == new_size);
}
if (be_nice && bytes_deleted > SZ_32M) {
unsigned long updates = trans->delayed_ref_updates;
if (updates) {
@ -6735,7 +6855,6 @@ static noinline int uncompress_inline(struct btrfs_path *path,
*
* This also copies inline extents directly into the page.
*/
struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
struct page *page,
size_t pg_offset, u64 start, u64 len,
@ -6835,11 +6954,18 @@ again:
found_type == BTRFS_FILE_EXTENT_PREALLOC) {
extent_end = extent_start +
btrfs_file_extent_num_bytes(leaf, item);
trace_btrfs_get_extent_show_fi_regular(inode, leaf, item,
extent_start);
} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
size_t size;
size = btrfs_file_extent_inline_len(leaf, path->slots[0], item);
extent_end = ALIGN(extent_start + size,
fs_info->sectorsize);
trace_btrfs_get_extent_show_fi_inline(inode, leaf, item,
path->slots[0],
extent_start);
}
next:
if (start >= extent_end) {
@ -7037,19 +7163,17 @@ struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
em = btrfs_get_extent(inode, page, pg_offset, start, len, create);
if (IS_ERR(em))
return em;
if (em) {
/*
* if our em maps to
* - a hole or
* - a pre-alloc extent,
* there might actually be delalloc bytes behind it.
*/
if (em->block_start != EXTENT_MAP_HOLE &&
!test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
return em;
else
hole_em = em;
}
/*
* If our em maps to:
* - a hole or
* - a pre-alloc extent,
* there might actually be delalloc bytes behind it.
*/
if (em->block_start != EXTENT_MAP_HOLE &&
!test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
return em;
else
hole_em = em;
/* check to see if we've wrapped (len == -1 or similar) */
end = start + len;
@ -8127,17 +8251,26 @@ static void btrfs_endio_direct_read(struct bio *bio)
bio_put(bio);
}
static void btrfs_endio_direct_write_update_ordered(struct inode *inode,
const u64 offset,
const u64 bytes,
const int uptodate)
static void __endio_write_update_ordered(struct inode *inode,
const u64 offset, const u64 bytes,
const bool uptodate)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_ordered_extent *ordered = NULL;
struct btrfs_workqueue *wq;
btrfs_work_func_t func;
u64 ordered_offset = offset;
u64 ordered_bytes = bytes;
int ret;
if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
wq = fs_info->endio_freespace_worker;
func = btrfs_freespace_write_helper;
} else {
wq = fs_info->endio_write_workers;
func = btrfs_endio_write_helper;
}
again:
ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
&ordered_offset,
@ -8146,9 +8279,8 @@ again:
if (!ret)
goto out_test;
btrfs_init_work(&ordered->work, btrfs_endio_write_helper,
finish_ordered_fn, NULL, NULL);
btrfs_queue_work(fs_info->endio_write_workers, &ordered->work);
btrfs_init_work(&ordered->work, func, finish_ordered_fn, NULL, NULL);
btrfs_queue_work(wq, &ordered->work);
out_test:
/*
* our bio might span multiple ordered extents. If we haven't
@ -8166,10 +8298,8 @@ static void btrfs_endio_direct_write(struct bio *bio)
struct btrfs_dio_private *dip = bio->bi_private;
struct bio *dio_bio = dip->dio_bio;
btrfs_endio_direct_write_update_ordered(dip->inode,
dip->logical_offset,
dip->bytes,
!bio->bi_error);
__endio_write_update_ordered(dip->inode, dip->logical_offset,
dip->bytes, !bio->bi_error);
kfree(dip);
@ -8530,10 +8660,10 @@ free_ordered:
io_bio = NULL;
} else {
if (write)
btrfs_endio_direct_write_update_ordered(inode,
__endio_write_update_ordered(inode,
file_offset,
dio_bio->bi_iter.bi_size,
0);
false);
else
unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
file_offset + dio_bio->bi_iter.bi_size - 1);
@ -8668,11 +8798,11 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
*/
if (dio_data.unsubmitted_oe_range_start <
dio_data.unsubmitted_oe_range_end)
btrfs_endio_direct_write_update_ordered(inode,
__endio_write_update_ordered(inode,
dio_data.unsubmitted_oe_range_start,
dio_data.unsubmitted_oe_range_end -
dio_data.unsubmitted_oe_range_start,
0);
false);
} else if (ret >= 0 && (size_t)ret < count)
btrfs_delalloc_release_space(inode, offset,
count - (size_t)ret);
@ -8819,6 +8949,7 @@ again:
if (!inode_evicting)
clear_extent_bit(tree, start, end,
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DELALLOC_NEW |
EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
EXTENT_DEFRAG, 1, 0, &cached_state,
GFP_NOFS);
@ -8876,8 +9007,8 @@ again:
if (!inode_evicting) {
clear_extent_bit(tree, page_start, page_end,
EXTENT_LOCKED | EXTENT_DIRTY |
EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
EXTENT_DEFRAG, 1, 1,
EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1,
&cached_state, GFP_NOFS);
__btrfs_releasepage(page, GFP_NOFS);
@ -9248,6 +9379,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
ei->last_sub_trans = 0;
ei->logged_trans = 0;
ei->delalloc_bytes = 0;
ei->new_delalloc_bytes = 0;
ei->defrag_bytes = 0;
ei->disk_i_size = 0;
ei->flags = 0;
@ -9313,6 +9445,7 @@ void btrfs_destroy_inode(struct inode *inode)
WARN_ON(BTRFS_I(inode)->outstanding_extents);
WARN_ON(BTRFS_I(inode)->reserved_extents);
WARN_ON(BTRFS_I(inode)->delalloc_bytes);
WARN_ON(BTRFS_I(inode)->new_delalloc_bytes);
WARN_ON(BTRFS_I(inode)->csum_bytes);
WARN_ON(BTRFS_I(inode)->defrag_bytes);
@ -9436,7 +9569,7 @@ static int btrfs_getattr(const struct path *path, struct kstat *stat,
stat->dev = BTRFS_I(inode)->root->anon_dev;
spin_lock(&BTRFS_I(inode)->lock);
delalloc_bytes = BTRFS_I(inode)->delalloc_bytes;
delalloc_bytes = BTRFS_I(inode)->new_delalloc_bytes;
spin_unlock(&BTRFS_I(inode)->lock);
stat->blocks = (ALIGN(inode_get_bytes(inode), blocksize) +
ALIGN(delalloc_bytes, blocksize)) >> 9;

View File

@ -1504,7 +1504,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
if (ret)
return ret;
if (atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)) {
if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
mnt_drop_write_file(file);
return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
}
@ -1619,7 +1619,7 @@ out_free:
kfree(vol_args);
out:
mutex_unlock(&fs_info->volume_mutex);
atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
mnt_drop_write_file(file);
return ret;
}
@ -2661,7 +2661,7 @@ static long btrfs_ioctl_add_dev(struct btrfs_fs_info *fs_info, void __user *arg)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1))
if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags))
return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
mutex_lock(&fs_info->volume_mutex);
@ -2680,7 +2680,7 @@ static long btrfs_ioctl_add_dev(struct btrfs_fs_info *fs_info, void __user *arg)
kfree(vol_args);
out:
mutex_unlock(&fs_info->volume_mutex);
atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
return ret;
}
@ -2708,7 +2708,7 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg)
if (vol_args->flags & ~BTRFS_VOL_ARG_V2_FLAGS_SUPPORTED)
return -EOPNOTSUPP;
if (atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)) {
if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
goto out;
}
@ -2721,7 +2721,7 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg)
ret = btrfs_rm_device(fs_info, vol_args->name, 0);
}
mutex_unlock(&fs_info->volume_mutex);
atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
if (!ret) {
if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID)
@ -2752,7 +2752,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
if (ret)
return ret;
if (atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)) {
if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
goto out_drop_write;
}
@ -2772,7 +2772,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
btrfs_info(fs_info, "disk deleted %s", vol_args->name);
kfree(vol_args);
out:
atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
out_drop_write:
mnt_drop_write_file(file);
@ -4439,13 +4439,11 @@ static long btrfs_ioctl_dev_replace(struct btrfs_fs_info *fs_info,
ret = -EROFS;
goto out;
}
if (atomic_xchg(
&fs_info->mutually_exclusive_operation_running, 1)) {
if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
} else {
ret = btrfs_dev_replace_by_ioctl(fs_info, p);
atomic_set(
&fs_info->mutually_exclusive_operation_running, 0);
clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
}
break;
case BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS:
@ -4640,7 +4638,7 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
return ret;
again:
if (!atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)) {
if (!test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
mutex_lock(&fs_info->volume_mutex);
mutex_lock(&fs_info->balance_mutex);
need_unlock = true;
@ -4686,7 +4684,7 @@ again:
}
locked:
BUG_ON(!atomic_read(&fs_info->mutually_exclusive_operation_running));
BUG_ON(!test_bit(BTRFS_FS_EXCL_OP, &fs_info->flags));
if (arg) {
bargs = memdup_user(arg, sizeof(*bargs));
@ -4742,11 +4740,10 @@ locked:
do_balance:
/*
* Ownership of bctl and mutually_exclusive_operation_running
* Ownership of bctl and filesystem flag BTRFS_FS_EXCL_OP
* goes to to btrfs_balance. bctl is freed in __cancel_balance,
* or, if restriper was paused all the way until unmount, in
* free_fs_info. mutually_exclusive_operation_running is
* cleared in __cancel_balance.
* free_fs_info. The flag is cleared in __cancel_balance.
*/
need_unlock = false;
@ -4766,7 +4763,7 @@ out_unlock:
mutex_unlock(&fs_info->balance_mutex);
mutex_unlock(&fs_info->volume_mutex);
if (need_unlock)
atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
out:
mnt_drop_write_file(file);
return ret;

View File

@ -212,7 +212,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
set_bit(BTRFS_ORDERED_DIRECT, &entry->flags);
/* one ref for the tree */
atomic_set(&entry->refs, 1);
refcount_set(&entry->refs, 1);
init_waitqueue_head(&entry->wait);
INIT_LIST_HEAD(&entry->list);
INIT_LIST_HEAD(&entry->root_extent_list);
@ -358,7 +358,7 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode,
out:
if (!ret && cached && entry) {
*cached = entry;
atomic_inc(&entry->refs);
refcount_inc(&entry->refs);
}
spin_unlock_irqrestore(&tree->lock, flags);
return ret == 0;
@ -425,7 +425,7 @@ have_entry:
out:
if (!ret && cached && entry) {
*cached = entry;
atomic_inc(&entry->refs);
refcount_inc(&entry->refs);
}
spin_unlock_irqrestore(&tree->lock, flags);
return ret == 0;
@ -456,7 +456,7 @@ void btrfs_get_logged_extents(struct btrfs_inode *inode,
if (test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags))
continue;
list_add(&ordered->log_list, logged_list);
atomic_inc(&ordered->refs);
refcount_inc(&ordered->refs);
}
spin_unlock_irq(&tree->lock);
}
@ -565,7 +565,7 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
trace_btrfs_ordered_extent_put(entry->inode, entry);
if (atomic_dec_and_test(&entry->refs)) {
if (refcount_dec_and_test(&entry->refs)) {
ASSERT(list_empty(&entry->log_list));
ASSERT(list_empty(&entry->trans_list));
ASSERT(list_empty(&entry->root_extent_list));
@ -623,7 +623,7 @@ void btrfs_remove_ordered_extent(struct inode *inode,
spin_lock(&fs_info->trans_lock);
trans = fs_info->running_transaction;
if (trans)
atomic_inc(&trans->use_count);
refcount_inc(&trans->use_count);
spin_unlock(&fs_info->trans_lock);
ASSERT(trans);
@ -690,7 +690,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr,
list_move_tail(&ordered->root_extent_list,
&root->ordered_extents);
atomic_inc(&ordered->refs);
refcount_inc(&ordered->refs);
spin_unlock(&root->ordered_extent_lock);
btrfs_init_work(&ordered->flush_work,
@ -870,7 +870,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
if (!offset_in_entry(entry, file_offset))
entry = NULL;
if (entry)
atomic_inc(&entry->refs);
refcount_inc(&entry->refs);
out:
spin_unlock_irq(&tree->lock);
return entry;
@ -911,7 +911,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(
}
out:
if (entry)
atomic_inc(&entry->refs);
refcount_inc(&entry->refs);
spin_unlock_irq(&tree->lock);
return entry;
}
@ -948,7 +948,7 @@ btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset)
goto out;
entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
atomic_inc(&entry->refs);
refcount_inc(&entry->refs);
out:
spin_unlock_irq(&tree->lock);
return entry;

View File

@ -113,7 +113,7 @@ struct btrfs_ordered_extent {
int compress_type;
/* reference count */
atomic_t refs;
refcount_t refs;
/* the inode we belong to */
struct inode *inode;

View File

@ -47,50 +47,6 @@
* - check all ioctl parameters
*/
/*
* one struct for each qgroup, organized in fs_info->qgroup_tree.
*/
struct btrfs_qgroup {
u64 qgroupid;
/*
* state
*/
u64 rfer; /* referenced */
u64 rfer_cmpr; /* referenced compressed */
u64 excl; /* exclusive */
u64 excl_cmpr; /* exclusive compressed */
/*
* limits
*/
u64 lim_flags; /* which limits are set */
u64 max_rfer;
u64 max_excl;
u64 rsv_rfer;
u64 rsv_excl;
/*
* reservation tracking
*/
u64 reserved;
/*
* lists
*/
struct list_head groups; /* groups this group is member of */
struct list_head members; /* groups that are members of this group */
struct list_head dirty; /* dirty groups */
struct rb_node node; /* tree of qgroups */
/*
* temp variables for accounting operations
* Refer to qgroup_shared_accounting() for details.
*/
u64 old_refcnt;
u64 new_refcnt;
};
static void btrfs_qgroup_update_old_refcnt(struct btrfs_qgroup *qg, u64 seq,
int mod)
{
@ -1078,6 +1034,7 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
qgroup->excl += sign * num_bytes;
qgroup->excl_cmpr += sign * num_bytes;
if (sign > 0) {
trace_qgroup_update_reserve(fs_info, qgroup, -(s64)num_bytes);
if (qgroup->reserved < num_bytes)
report_reserved_underflow(fs_info, qgroup, num_bytes);
else
@ -1103,6 +1060,8 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
WARN_ON(sign < 0 && qgroup->excl < num_bytes);
qgroup->excl += sign * num_bytes;
if (sign > 0) {
trace_qgroup_update_reserve(fs_info, qgroup,
-(s64)num_bytes);
if (qgroup->reserved < num_bytes)
report_reserved_underflow(fs_info, qgroup,
num_bytes);
@ -2058,12 +2017,12 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans,
if (!ret) {
/*
* Use (u64)-1 as time_seq to do special search, which
* Use SEQ_LAST as time_seq to do special search, which
* doesn't lock tree or delayed_refs and search current
* root. It's safe inside commit_transaction().
*/
ret = btrfs_find_all_roots(trans, fs_info,
record->bytenr, (u64)-1, &new_roots);
record->bytenr, SEQ_LAST, &new_roots);
if (ret < 0)
goto cleanup;
if (qgroup_to_skip)
@ -2370,6 +2329,7 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce)
struct btrfs_fs_info *fs_info = root->fs_info;
u64 ref_root = root->root_key.objectid;
int ret = 0;
int retried = 0;
struct ulist_node *unode;
struct ulist_iterator uiter;
@ -2378,7 +2338,7 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce)
if (num_bytes == 0)
return 0;
retry:
spin_lock(&fs_info->qgroup_lock);
quota_root = fs_info->quota_root;
if (!quota_root)
@ -2405,6 +2365,27 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce)
qg = unode_aux_to_qgroup(unode);
if (enforce && !qgroup_check_limits(qg, num_bytes)) {
/*
* Commit the tree and retry, since we may have
* deletions which would free up space.
*/
if (!retried && qg->reserved > 0) {
struct btrfs_trans_handle *trans;
spin_unlock(&fs_info->qgroup_lock);
ret = btrfs_start_delalloc_inodes(root, 0);
if (ret)
return ret;
btrfs_wait_ordered_extents(root, -1, 0, (u64)-1);
trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
return PTR_ERR(trans);
ret = btrfs_commit_transaction(trans);
if (ret)
return ret;
retried++;
goto retry;
}
ret = -EDQUOT;
goto out;
}
@ -2427,6 +2408,7 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce)
qg = unode_aux_to_qgroup(unode);
trace_qgroup_update_reserve(fs_info, qg, num_bytes);
qg->reserved += num_bytes;
}
@ -2472,6 +2454,7 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
qg = unode_aux_to_qgroup(unode);
trace_qgroup_update_reserve(fs_info, qg, -(s64)num_bytes);
if (qg->reserved < num_bytes)
report_reserved_underflow(fs_info, qg, num_bytes);
else
@ -2490,18 +2473,6 @@ out:
spin_unlock(&fs_info->qgroup_lock);
}
void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
{
if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq)
return;
btrfs_err(trans->fs_info,
"qgroups not uptodate in trans handle %p: list is%s empty, seq is %#x.%x",
trans, list_empty(&trans->qgroup_ref_list) ? "" : " not",
(u32)(trans->delayed_ref_elem.seq >> 32),
(u32)trans->delayed_ref_elem.seq);
BUG();
}
/*
* returns < 0 on error, 0 when more leafs are to be scanned.
* returns 1 when done.
@ -2889,14 +2860,14 @@ static int __btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len,
if (ret < 0)
goto out;
if (free) {
if (free)
trace_op = QGROUP_FREE;
trace_btrfs_qgroup_release_data(inode, start, len,
changeset.bytes_changed, trace_op);
if (free)
btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info,
BTRFS_I(inode)->root->objectid,
changeset.bytes_changed);
trace_op = QGROUP_FREE;
}
trace_btrfs_qgroup_release_data(inode, start, len,
changeset.bytes_changed, trace_op);
out:
ulist_release(&changeset.range_changed);
return ret;
@ -2948,6 +2919,7 @@ int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
return 0;
BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
trace_qgroup_meta_reserve(root, (s64)num_bytes);
ret = qgroup_reserve(root, num_bytes, enforce);
if (ret < 0)
return ret;
@ -2967,6 +2939,7 @@ void btrfs_qgroup_free_meta_all(struct btrfs_root *root)
reserved = atomic64_xchg(&root->qgroup_meta_rsv, 0);
if (reserved == 0)
return;
trace_qgroup_meta_reserve(root, -(s64)reserved);
btrfs_qgroup_free_refroot(fs_info, root->objectid, reserved);
}
@ -2981,6 +2954,7 @@ void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes)
BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
WARN_ON(atomic64_read(&root->qgroup_meta_rsv) < num_bytes);
atomic64_sub(num_bytes, &root->qgroup_meta_rsv);
trace_qgroup_meta_reserve(root, -(s64)num_bytes);
btrfs_qgroup_free_refroot(fs_info, root->objectid, num_bytes);
}

View File

@ -61,6 +61,50 @@ struct btrfs_qgroup_extent_record {
struct ulist *old_roots;
};
/*
* one struct for each qgroup, organized in fs_info->qgroup_tree.
*/
struct btrfs_qgroup {
u64 qgroupid;
/*
* state
*/
u64 rfer; /* referenced */
u64 rfer_cmpr; /* referenced compressed */
u64 excl; /* exclusive */
u64 excl_cmpr; /* exclusive compressed */
/*
* limits
*/
u64 lim_flags; /* which limits are set */
u64 max_rfer;
u64 max_excl;
u64 rsv_rfer;
u64 rsv_excl;
/*
* reservation tracking
*/
u64 reserved;
/*
* lists
*/
struct list_head groups; /* groups this group is member of */
struct list_head members; /* groups that are members of this group */
struct list_head dirty; /* dirty groups */
struct rb_node node; /* tree of qgroups */
/*
* temp variables for accounting operations
* Refer to qgroup_shared_accounting() for details.
*/
u64 old_refcnt;
u64 new_refcnt;
};
/*
* For qgroup event trace points only
*/
@ -186,17 +230,12 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
struct btrfs_qgroup_inherit *inherit);
void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
u64 ref_root, u64 num_bytes);
/*
* TODO: Add proper trace point for it, as btrfs_qgroup_free() is
* called by everywhere, can't provide good trace for delayed ref case.
*/
static inline void btrfs_qgroup_free_delayed_ref(struct btrfs_fs_info *fs_info,
u64 ref_root, u64 num_bytes)
{
btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes);
trace_btrfs_qgroup_free_delayed_ref(fs_info, ref_root, num_bytes);
btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes);
}
void assert_qgroups_uptodate(struct btrfs_trans_handle *trans);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,

View File

@ -149,7 +149,7 @@ struct btrfs_raid_bio {
int generic_bio_cnt;
atomic_t refs;
refcount_t refs;
atomic_t stripes_pending;
@ -389,7 +389,7 @@ static void __remove_rbio_from_cache(struct btrfs_raid_bio *rbio)
if (bio_list_empty(&rbio->bio_list)) {
if (!list_empty(&rbio->hash_list)) {
list_del_init(&rbio->hash_list);
atomic_dec(&rbio->refs);
refcount_dec(&rbio->refs);
BUG_ON(!list_empty(&rbio->plug_list));
}
}
@ -480,7 +480,7 @@ static void cache_rbio(struct btrfs_raid_bio *rbio)
/* bump our ref if we were not in the list before */
if (!test_and_set_bit(RBIO_CACHE_BIT, &rbio->flags))
atomic_inc(&rbio->refs);
refcount_inc(&rbio->refs);
if (!list_empty(&rbio->stripe_cache)){
list_move(&rbio->stripe_cache, &table->stripe_cache);
@ -689,7 +689,7 @@ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio)
test_bit(RBIO_CACHE_BIT, &cur->flags) &&
!test_bit(RBIO_RMW_LOCKED_BIT, &cur->flags)) {
list_del_init(&cur->hash_list);
atomic_dec(&cur->refs);
refcount_dec(&cur->refs);
steal_rbio(cur, rbio);
cache_drop = cur;
@ -738,7 +738,7 @@ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio)
}
}
lockit:
atomic_inc(&rbio->refs);
refcount_inc(&rbio->refs);
list_add(&rbio->hash_list, &h->hash_list);
out:
spin_unlock_irqrestore(&h->lock, flags);
@ -784,7 +784,7 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
}
list_del_init(&rbio->hash_list);
atomic_dec(&rbio->refs);
refcount_dec(&rbio->refs);
/*
* we use the plug list to hold all the rbios
@ -801,7 +801,7 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
list_del_init(&rbio->plug_list);
list_add(&next->hash_list, &h->hash_list);
atomic_inc(&next->refs);
refcount_inc(&next->refs);
spin_unlock(&rbio->bio_list_lock);
spin_unlock_irqrestore(&h->lock, flags);
@ -843,8 +843,7 @@ static void __free_raid_bio(struct btrfs_raid_bio *rbio)
{
int i;
WARN_ON(atomic_read(&rbio->refs) < 0);
if (!atomic_dec_and_test(&rbio->refs))
if (!refcount_dec_and_test(&rbio->refs))
return;
WARN_ON(!list_empty(&rbio->stripe_cache));
@ -997,7 +996,7 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
rbio->stripe_npages = stripe_npages;
rbio->faila = -1;
rbio->failb = -1;
atomic_set(&rbio->refs, 1);
refcount_set(&rbio->refs, 1);
atomic_set(&rbio->error, 0);
atomic_set(&rbio->stripes_pending, 0);
@ -2118,6 +2117,11 @@ int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio,
struct btrfs_raid_bio *rbio;
int ret;
if (generic_io) {
ASSERT(bbio->mirror_num == mirror_num);
btrfs_io_bio(bio)->mirror_num = mirror_num;
}
rbio = alloc_rbio(fs_info, bbio, stripe_len);
if (IS_ERR(rbio)) {
if (generic_io)
@ -2194,6 +2198,8 @@ static void read_rebuild_work(struct btrfs_work *work)
/*
* The following code is used to scrub/replace the parity stripe
*
* Caller must have already increased bio_counter for getting @bbio.
*
* Note: We need make sure all the pages that add into the scrub/replace
* raid bio are correct and not be changed during the scrub/replace. That
* is those pages just hold metadata or file data with checksum.
@ -2231,6 +2237,12 @@ raid56_parity_alloc_scrub_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
ASSERT(rbio->stripe_npages == stripe_nsectors);
bitmap_copy(rbio->dbitmap, dbitmap, stripe_nsectors);
/*
* We have already increased bio_counter when getting bbio, record it
* so we can free it at rbio_orig_end_io().
*/
rbio->generic_bio_cnt = 1;
return rbio;
}
@ -2673,6 +2685,12 @@ raid56_alloc_missing_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
return NULL;
}
/*
* When we get bbio, we have already increased bio_counter, record it
* so we can free it at rbio_orig_end_io()
*/
rbio->generic_bio_cnt = 1;
return rbio;
}

View File

@ -209,9 +209,9 @@ cleanup:
return;
}
int btree_readahead_hook(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb, int err)
int btree_readahead_hook(struct extent_buffer *eb, int err)
{
struct btrfs_fs_info *fs_info = eb->fs_info;
int ret = 0;
struct reada_extent *re;
@ -235,10 +235,10 @@ start_machine:
return ret;
}
static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
struct btrfs_device *dev, u64 logical,
static struct reada_zone *reada_find_zone(struct btrfs_device *dev, u64 logical,
struct btrfs_bio *bbio)
{
struct btrfs_fs_info *fs_info = dev->fs_info;
int ret;
struct reada_zone *zone;
struct btrfs_block_group_cache *cache = NULL;
@ -270,6 +270,12 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
if (!zone)
return NULL;
ret = radix_tree_preload(GFP_KERNEL);
if (ret) {
kfree(zone);
return NULL;
}
zone->start = start;
zone->end = end;
INIT_LIST_HEAD(&zone->list);
@ -299,6 +305,7 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
zone = NULL;
}
spin_unlock(&fs_info->reada_lock);
radix_tree_preload_end();
return zone;
}
@ -313,7 +320,6 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
struct btrfs_bio *bbio = NULL;
struct btrfs_device *dev;
struct btrfs_device *prev_dev;
u32 blocksize;
u64 length;
int real_stripes;
int nzones = 0;
@ -334,7 +340,6 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
if (!re)
return NULL;
blocksize = fs_info->nodesize;
re->logical = logical;
re->top = *top;
INIT_LIST_HEAD(&re->extctl);
@ -344,10 +349,10 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
/*
* map block
*/
length = blocksize;
length = fs_info->nodesize;
ret = btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
&length, &bbio, 0);
if (ret || !bbio || length < blocksize)
if (ret || !bbio || length < fs_info->nodesize)
goto error;
if (bbio->num_stripes > BTRFS_MAX_MIRRORS) {
@ -367,7 +372,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
if (!dev->bdev)
continue;
zone = reada_find_zone(fs_info, dev, logical, bbio);
zone = reada_find_zone(dev, logical, bbio);
if (!zone)
continue;
@ -386,6 +391,10 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
goto error;
}
ret = radix_tree_preload(GFP_KERNEL);
if (ret)
goto error;
/* insert extent in reada_tree + all per-device trees, all or nothing */
btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
spin_lock(&fs_info->reada_lock);
@ -395,13 +404,16 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
re_exist->refcnt++;
spin_unlock(&fs_info->reada_lock);
btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
radix_tree_preload_end();
goto error;
}
if (ret) {
spin_unlock(&fs_info->reada_lock);
btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
radix_tree_preload_end();
goto error;
}
radix_tree_preload_end();
prev_dev = NULL;
dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(
&fs_info->dev_replace);
@ -639,9 +651,9 @@ static int reada_pick_zone(struct btrfs_device *dev)
return 1;
}
static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
struct btrfs_device *dev)
static int reada_start_machine_dev(struct btrfs_device *dev)
{
struct btrfs_fs_info *fs_info = dev->fs_info;
struct reada_extent *re = NULL;
int mirror_num = 0;
struct extent_buffer *eb = NULL;
@ -754,8 +766,7 @@ static void __reada_start_machine(struct btrfs_fs_info *fs_info)
list_for_each_entry(device, &fs_devices->devices, dev_list) {
if (atomic_read(&device->reada_in_flight) <
MAX_IN_FLIGHT)
enqueued += reada_start_machine_dev(fs_info,
device);
enqueued += reada_start_machine_dev(device);
}
mutex_unlock(&fs_devices->device_list_mutex);
total += enqueued;

View File

@ -501,8 +501,9 @@ void btrfs_update_root_times(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
struct btrfs_root_item *item = &root->root_item;
struct timespec ct = current_fs_time(root->fs_info->sb);
struct timespec ct;
ktime_get_real_ts(&ct);
spin_lock(&root->root_item_lock);
btrfs_set_root_ctransid(item, trans->transid);
btrfs_set_stack_timespec_sec(&item->ctime, ct.tv_sec);

View File

@ -64,7 +64,7 @@ struct scrub_ctx;
#define SCRUB_MAX_PAGES_PER_BLOCK 16 /* 64k per node/leaf/sector */
struct scrub_recover {
atomic_t refs;
refcount_t refs;
struct btrfs_bio *bbio;
u64 map_length;
};
@ -112,7 +112,7 @@ struct scrub_block {
struct scrub_page *pagev[SCRUB_MAX_PAGES_PER_BLOCK];
int page_count;
atomic_t outstanding_pages;
atomic_t refs; /* free mem on transition to zero */
refcount_t refs; /* free mem on transition to zero */
struct scrub_ctx *sctx;
struct scrub_parity *sparity;
struct {
@ -140,9 +140,9 @@ struct scrub_parity {
int nsectors;
int stripe_len;
u64 stripe_len;
atomic_t refs;
refcount_t refs;
struct list_head spages;
@ -202,7 +202,7 @@ struct scrub_ctx {
* doesn't free the scrub context before or while the workers are
* doing the wakeup() call.
*/
atomic_t refs;
refcount_t refs;
};
struct scrub_fixup_nodatasum {
@ -240,6 +240,13 @@ struct scrub_warning {
struct btrfs_device *dev;
};
struct full_stripe_lock {
struct rb_node node;
u64 logical;
u64 refs;
struct mutex mutex;
};
static void scrub_pending_bio_inc(struct scrub_ctx *sctx);
static void scrub_pending_bio_dec(struct scrub_ctx *sctx);
static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx);
@ -305,7 +312,7 @@ static void scrub_put_ctx(struct scrub_ctx *sctx);
static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
{
atomic_inc(&sctx->refs);
refcount_inc(&sctx->refs);
atomic_inc(&sctx->bios_in_flight);
}
@ -348,6 +355,222 @@ static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
scrub_pause_off(fs_info);
}
/*
* Insert new full stripe lock into full stripe locks tree
*
* Return pointer to existing or newly inserted full_stripe_lock structure if
* everything works well.
* Return ERR_PTR(-ENOMEM) if we failed to allocate memory
*
* NOTE: caller must hold full_stripe_locks_root->lock before calling this
* function
*/
static struct full_stripe_lock *insert_full_stripe_lock(
struct btrfs_full_stripe_locks_tree *locks_root,
u64 fstripe_logical)
{
struct rb_node **p;
struct rb_node *parent = NULL;
struct full_stripe_lock *entry;
struct full_stripe_lock *ret;
WARN_ON(!mutex_is_locked(&locks_root->lock));
p = &locks_root->root.rb_node;
while (*p) {
parent = *p;
entry = rb_entry(parent, struct full_stripe_lock, node);
if (fstripe_logical < entry->logical) {
p = &(*p)->rb_left;
} else if (fstripe_logical > entry->logical) {
p = &(*p)->rb_right;
} else {
entry->refs++;
return entry;
}
}
/* Insert new lock */
ret = kmalloc(sizeof(*ret), GFP_KERNEL);
if (!ret)
return ERR_PTR(-ENOMEM);
ret->logical = fstripe_logical;
ret->refs = 1;
mutex_init(&ret->mutex);
rb_link_node(&ret->node, parent, p);
rb_insert_color(&ret->node, &locks_root->root);
return ret;
}
/*
* Search for a full stripe lock of a block group
*
* Return pointer to existing full stripe lock if found
* Return NULL if not found
*/
static struct full_stripe_lock *search_full_stripe_lock(
struct btrfs_full_stripe_locks_tree *locks_root,
u64 fstripe_logical)
{
struct rb_node *node;
struct full_stripe_lock *entry;
WARN_ON(!mutex_is_locked(&locks_root->lock));
node = locks_root->root.rb_node;
while (node) {
entry = rb_entry(node, struct full_stripe_lock, node);
if (fstripe_logical < entry->logical)
node = node->rb_left;
else if (fstripe_logical > entry->logical)
node = node->rb_right;
else
return entry;
}
return NULL;
}
/*
* Helper to get full stripe logical from a normal bytenr.
*
* Caller must ensure @cache is a RAID56 block group.
*/
static u64 get_full_stripe_logical(struct btrfs_block_group_cache *cache,
u64 bytenr)
{
u64 ret;
/*
* Due to chunk item size limit, full stripe length should not be
* larger than U32_MAX. Just a sanity check here.
*/
WARN_ON_ONCE(cache->full_stripe_len >= U32_MAX);
/*
* round_down() can only handle power of 2, while RAID56 full
* stripe length can be 64KiB * n, so we need to manually round down.
*/
ret = div64_u64(bytenr - cache->key.objectid, cache->full_stripe_len) *
cache->full_stripe_len + cache->key.objectid;
return ret;
}
/*
* Lock a full stripe to avoid concurrency of recovery and read
*
* It's only used for profiles with parities (RAID5/6), for other profiles it
* does nothing.
*
* Return 0 if we locked full stripe covering @bytenr, with a mutex held.
* So caller must call unlock_full_stripe() at the same context.
*
* Return <0 if encounters error.
*/
static int lock_full_stripe(struct btrfs_fs_info *fs_info, u64 bytenr,
bool *locked_ret)
{
struct btrfs_block_group_cache *bg_cache;
struct btrfs_full_stripe_locks_tree *locks_root;
struct full_stripe_lock *existing;
u64 fstripe_start;
int ret = 0;
*locked_ret = false;
bg_cache = btrfs_lookup_block_group(fs_info, bytenr);
if (!bg_cache) {
ASSERT(0);
return -ENOENT;
}
/* Profiles not based on parity don't need full stripe lock */
if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK))
goto out;
locks_root = &bg_cache->full_stripe_locks_root;
fstripe_start = get_full_stripe_logical(bg_cache, bytenr);
/* Now insert the full stripe lock */
mutex_lock(&locks_root->lock);
existing = insert_full_stripe_lock(locks_root, fstripe_start);
mutex_unlock(&locks_root->lock);
if (IS_ERR(existing)) {
ret = PTR_ERR(existing);
goto out;
}
mutex_lock(&existing->mutex);
*locked_ret = true;
out:
btrfs_put_block_group(bg_cache);
return ret;
}
/*
* Unlock a full stripe.
*
* NOTE: Caller must ensure it's the same context calling corresponding
* lock_full_stripe().
*
* Return 0 if we unlock full stripe without problem.
* Return <0 for error
*/
static int unlock_full_stripe(struct btrfs_fs_info *fs_info, u64 bytenr,
bool locked)
{
struct btrfs_block_group_cache *bg_cache;
struct btrfs_full_stripe_locks_tree *locks_root;
struct full_stripe_lock *fstripe_lock;
u64 fstripe_start;
bool freeit = false;
int ret = 0;
/* If we didn't acquire full stripe lock, no need to continue */
if (!locked)
return 0;
bg_cache = btrfs_lookup_block_group(fs_info, bytenr);
if (!bg_cache) {
ASSERT(0);
return -ENOENT;
}
if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK))
goto out;
locks_root = &bg_cache->full_stripe_locks_root;
fstripe_start = get_full_stripe_logical(bg_cache, bytenr);
mutex_lock(&locks_root->lock);
fstripe_lock = search_full_stripe_lock(locks_root, fstripe_start);
/* Unpaired unlock_full_stripe() detected */
if (!fstripe_lock) {
WARN_ON(1);
ret = -ENOENT;
mutex_unlock(&locks_root->lock);
goto out;
}
if (fstripe_lock->refs == 0) {
WARN_ON(1);
btrfs_warn(fs_info, "full stripe lock at %llu refcount underflow",
fstripe_lock->logical);
} else {
fstripe_lock->refs--;
}
if (fstripe_lock->refs == 0) {
rb_erase(&fstripe_lock->node, &locks_root->root);
freeit = true;
}
mutex_unlock(&locks_root->lock);
mutex_unlock(&fstripe_lock->mutex);
if (freeit)
kfree(fstripe_lock);
out:
btrfs_put_block_group(bg_cache);
return ret;
}
/*
* used for workers that require transaction commits (i.e., for the
* NOCOW case)
@ -356,7 +579,7 @@ static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx)
{
struct btrfs_fs_info *fs_info = sctx->fs_info;
atomic_inc(&sctx->refs);
refcount_inc(&sctx->refs);
/*
* increment scrubs_running to prevent cancel requests from
* completing as long as a worker is running. we must also
@ -447,7 +670,7 @@ static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
static void scrub_put_ctx(struct scrub_ctx *sctx)
{
if (atomic_dec_and_test(&sctx->refs))
if (refcount_dec_and_test(&sctx->refs))
scrub_free_ctx(sctx);
}
@ -462,7 +685,7 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
sctx = kzalloc(sizeof(*sctx), GFP_KERNEL);
if (!sctx)
goto nomem;
atomic_set(&sctx->refs, 1);
refcount_set(&sctx->refs, 1);
sctx->is_dev_replace = is_dev_replace;
sctx->pages_per_rd_bio = SCRUB_PAGES_PER_RD_BIO;
sctx->curr = -1;
@ -857,12 +1080,14 @@ out:
static inline void scrub_get_recover(struct scrub_recover *recover)
{
atomic_inc(&recover->refs);
refcount_inc(&recover->refs);
}
static inline void scrub_put_recover(struct scrub_recover *recover)
static inline void scrub_put_recover(struct btrfs_fs_info *fs_info,
struct scrub_recover *recover)
{
if (atomic_dec_and_test(&recover->refs)) {
if (refcount_dec_and_test(&recover->refs)) {
btrfs_bio_counter_dec(fs_info);
btrfs_put_bbio(recover->bbio);
kfree(recover);
}
@ -892,6 +1117,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
int mirror_index;
int page_num;
int success;
bool full_stripe_locked;
static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
@ -917,6 +1143,24 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
have_csum = sblock_to_check->pagev[0]->have_csum;
dev = sblock_to_check->pagev[0]->dev;
/*
* For RAID5/6, race can happen for a different device scrub thread.
* For data corruption, Parity and Data threads will both try
* to recovery the data.
* Race can lead to doubly added csum error, or even unrecoverable
* error.
*/
ret = lock_full_stripe(fs_info, logical, &full_stripe_locked);
if (ret < 0) {
spin_lock(&sctx->stat_lock);
if (ret == -ENOMEM)
sctx->stat.malloc_errors++;
sctx->stat.read_errors++;
sctx->stat.uncorrectable_errors++;
spin_unlock(&sctx->stat_lock);
return ret;
}
if (sctx->is_dev_replace && !is_metadata && !have_csum) {
sblocks_for_recheck = NULL;
goto nodatasum_case;
@ -1241,7 +1485,7 @@ out:
sblock->pagev[page_index]->sblock = NULL;
recover = sblock->pagev[page_index]->recover;
if (recover) {
scrub_put_recover(recover);
scrub_put_recover(fs_info, recover);
sblock->pagev[page_index]->recover =
NULL;
}
@ -1251,6 +1495,9 @@ out:
kfree(sblocks_for_recheck);
}
ret = unlock_full_stripe(fs_info, logical, full_stripe_locked);
if (ret < 0)
return ret;
return 0;
}
@ -1330,20 +1577,23 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
* with a length of PAGE_SIZE, each returned stripe
* represents one mirror
*/
btrfs_bio_counter_inc_blocked(fs_info);
ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
logical, &mapped_length, &bbio, 0, 1);
logical, &mapped_length, &bbio);
if (ret || !bbio || mapped_length < sublen) {
btrfs_put_bbio(bbio);
btrfs_bio_counter_dec(fs_info);
return -EIO;
}
recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS);
if (!recover) {
btrfs_put_bbio(bbio);
btrfs_bio_counter_dec(fs_info);
return -ENOMEM;
}
atomic_set(&recover->refs, 1);
refcount_set(&recover->refs, 1);
recover->bbio = bbio;
recover->map_length = mapped_length;
@ -1365,7 +1615,7 @@ leave_nomem:
spin_lock(&sctx->stat_lock);
sctx->stat.malloc_errors++;
spin_unlock(&sctx->stat_lock);
scrub_put_recover(recover);
scrub_put_recover(fs_info, recover);
return -ENOMEM;
}
scrub_page_get(page);
@ -1407,7 +1657,7 @@ leave_nomem:
scrub_get_recover(recover);
page->recover = recover;
}
scrub_put_recover(recover);
scrub_put_recover(fs_info, recover);
length -= sublen;
logical += sublen;
page_index++;
@ -1497,14 +1747,18 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
bio_add_page(bio, page->page, PAGE_SIZE, 0);
if (!retry_failed_mirror && scrub_is_page_on_raid56(page)) {
if (scrub_submit_raid56_bio_wait(fs_info, bio, page))
if (scrub_submit_raid56_bio_wait(fs_info, bio, page)) {
page->io_error = 1;
sblock->no_io_error_seen = 0;
}
} else {
bio->bi_iter.bi_sector = page->physical >> 9;
bio_set_op_attrs(bio, REQ_OP_READ, 0);
if (btrfsic_submit_bio_wait(bio))
if (btrfsic_submit_bio_wait(bio)) {
page->io_error = 1;
sblock->no_io_error_seen = 0;
}
}
bio_put(bio);
@ -1634,7 +1888,7 @@ static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
if (spage->io_error) {
void *mapped_buffer = kmap_atomic(spage->page);
memset(mapped_buffer, 0, PAGE_SIZE);
clear_page(mapped_buffer);
flush_dcache_page(spage->page);
kunmap_atomic(mapped_buffer);
}
@ -1998,12 +2252,12 @@ static int scrub_checksum_super(struct scrub_block *sblock)
static void scrub_block_get(struct scrub_block *sblock)
{
atomic_inc(&sblock->refs);
refcount_inc(&sblock->refs);
}
static void scrub_block_put(struct scrub_block *sblock)
{
if (atomic_dec_and_test(&sblock->refs)) {
if (refcount_dec_and_test(&sblock->refs)) {
int i;
if (sblock->sparity)
@ -2187,8 +2441,9 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock)
int ret;
int i;
btrfs_bio_counter_inc_blocked(fs_info);
ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
&length, &bbio, 0, 1);
&length, &bbio);
if (ret || !bbio || !bbio->raid_map)
goto bbio_out;
@ -2231,6 +2486,7 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock)
rbio_out:
bio_put(bio);
bbio_out:
btrfs_bio_counter_dec(fs_info);
btrfs_put_bbio(bbio);
spin_lock(&sctx->stat_lock);
sctx->stat.malloc_errors++;
@ -2255,7 +2511,7 @@ static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
/* one ref inside this function, plus one for each page added to
* a bio later on */
atomic_set(&sblock->refs, 1);
refcount_set(&sblock->refs, 1);
sblock->sctx = sctx;
sblock->no_io_error_seen = 1;
@ -2385,7 +2641,7 @@ static inline void __scrub_mark_bitmap(struct scrub_parity *sparity,
unsigned long *bitmap,
u64 start, u64 len)
{
u32 offset;
u64 offset;
int nsectors;
int sectorsize = sparity->sctx->fs_info->sectorsize;
@ -2395,8 +2651,8 @@ static inline void __scrub_mark_bitmap(struct scrub_parity *sparity,
}
start -= sparity->logic_start;
start = div_u64_rem(start, sparity->stripe_len, &offset);
offset /= sectorsize;
start = div64_u64_rem(start, sparity->stripe_len, &offset);
offset = div_u64(offset, sectorsize);
nsectors = (int)len / sectorsize;
if (offset + nsectors <= sparity->nsectors) {
@ -2555,7 +2811,7 @@ static int scrub_pages_for_parity(struct scrub_parity *sparity,
/* one ref inside this function, plus one for each page added to
* a bio later on */
atomic_set(&sblock->refs, 1);
refcount_set(&sblock->refs, 1);
sblock->sctx = sctx;
sblock->no_io_error_seen = 1;
sblock->sparity = sparity;
@ -2694,7 +2950,7 @@ static int get_raid56_logic_offset(u64 physical, int num,
for (i = 0; i < nr_data_stripes(map); i++) {
*offset = last_offset + i * map->stripe_len;
stripe_nr = div_u64(*offset, map->stripe_len);
stripe_nr = div64_u64(*offset, map->stripe_len);
stripe_nr = div_u64(stripe_nr, nr_data_stripes(map));
/* Work out the disk rotation on this stripe-set */
@ -2765,7 +3021,6 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
struct btrfs_fs_info *fs_info = sctx->fs_info;
struct bio *bio;
struct btrfs_raid_bio *rbio;
struct scrub_page *spage;
struct btrfs_bio *bbio = NULL;
u64 length;
int ret;
@ -2775,8 +3030,10 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
goto out;
length = sparity->logic_end - sparity->logic_start;
btrfs_bio_counter_inc_blocked(fs_info);
ret = btrfs_map_sblock(fs_info, BTRFS_MAP_WRITE, sparity->logic_start,
&length, &bbio, 0, 1);
&length, &bbio);
if (ret || !bbio || !bbio->raid_map)
goto bbio_out;
@ -2795,9 +3052,6 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
if (!rbio)
goto rbio_out;
list_for_each_entry(spage, &sparity->spages, list)
raid56_add_scrub_pages(rbio, spage->page, spage->logical);
scrub_pending_bio_inc(sctx);
raid56_parity_submit_scrub_rbio(rbio);
return;
@ -2805,6 +3059,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
rbio_out:
bio_put(bio);
bbio_out:
btrfs_bio_counter_dec(fs_info);
btrfs_put_bbio(bbio);
bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
sparity->nsectors);
@ -2822,12 +3077,12 @@ static inline int scrub_calc_parity_bitmap_len(int nsectors)
static void scrub_parity_get(struct scrub_parity *sparity)
{
atomic_inc(&sparity->refs);
refcount_inc(&sparity->refs);
}
static void scrub_parity_put(struct scrub_parity *sparity)
{
if (!atomic_dec_and_test(&sparity->refs))
if (!refcount_dec_and_test(&sparity->refs))
return;
scrub_parity_check_and_repair(sparity);
@ -2879,7 +3134,7 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
sparity->scrub_dev = sdev;
sparity->logic_start = logic_start;
sparity->logic_end = logic_end;
atomic_set(&sparity->refs, 1);
refcount_set(&sparity->refs, 1);
INIT_LIST_HEAD(&sparity->spages);
sparity->dbitmap = sparity->bitmap;
sparity->ebitmap = (void *)sparity->bitmap + bitmap_len;
@ -3098,7 +3353,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
physical = map->stripes[num].physical;
offset = 0;
nstripes = div_u64(length, map->stripe_len);
nstripes = div64_u64(length, map->stripe_len);
if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
offset = map->stripe_len * num;
increment = map->stripe_len * map->num_stripes;

View File

@ -5184,13 +5184,19 @@ static int is_extent_unchanged(struct send_ctx *sctx,
while (key.offset < ekey->offset + left_len) {
ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
right_type = btrfs_file_extent_type(eb, ei);
if (right_type != BTRFS_FILE_EXTENT_REG) {
if (right_type != BTRFS_FILE_EXTENT_REG &&
right_type != BTRFS_FILE_EXTENT_INLINE) {
ret = 0;
goto out;
}
right_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
right_len = btrfs_file_extent_num_bytes(eb, ei);
if (right_type == BTRFS_FILE_EXTENT_INLINE) {
right_len = btrfs_file_extent_inline_len(eb, slot, ei);
right_len = PAGE_ALIGN(right_len);
} else {
right_len = btrfs_file_extent_num_bytes(eb, ei);
}
right_offset = btrfs_file_extent_offset(eb, ei);
right_gen = btrfs_file_extent_generation(eb, ei);
@ -5204,6 +5210,19 @@ static int is_extent_unchanged(struct send_ctx *sctx,
goto out;
}
/*
* We just wanted to see if when we have an inline extent, what
* follows it is a regular extent (wanted to check the above
* condition for inline extents too). This should normally not
* happen but it's possible for example when we have an inline
* compressed extent representing data with a size matching
* the page size (currently the same as sector size).
*/
if (right_type == BTRFS_FILE_EXTENT_INLINE) {
ret = 0;
goto out;
}
left_offset_fixed = left_offset;
if (key.offset < ekey->offset) {
/* Fix the right offset for 2a and 7. */

View File

@ -1795,8 +1795,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
}
if (fs_info->fs_devices->missing_devices >
fs_info->num_tolerated_disk_barrier_failures &&
!(*flags & MS_RDONLY)) {
fs_info->num_tolerated_disk_barrier_failures) {
btrfs_warn(fs_info,
"too many missing devices, writeable remount is not allowed");
ret = -EACCES;

View File

@ -237,7 +237,6 @@ void btrfs_init_dummy_trans(struct btrfs_trans_handle *trans)
{
memset(trans, 0, sizeof(*trans));
trans->transid = 1;
INIT_LIST_HEAD(&trans->qgroup_ref_list);
trans->type = __TRANS_DUMMY;
}

View File

@ -60,8 +60,8 @@ static const unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = {
void btrfs_put_transaction(struct btrfs_transaction *transaction)
{
WARN_ON(atomic_read(&transaction->use_count) == 0);
if (atomic_dec_and_test(&transaction->use_count)) {
WARN_ON(refcount_read(&transaction->use_count) == 0);
if (refcount_dec_and_test(&transaction->use_count)) {
BUG_ON(!list_empty(&transaction->list));
WARN_ON(!RB_EMPTY_ROOT(&transaction->delayed_refs.href_root));
if (transaction->delayed_refs.pending_csums)
@ -207,7 +207,7 @@ loop:
spin_unlock(&fs_info->trans_lock);
return -EBUSY;
}
atomic_inc(&cur_trans->use_count);
refcount_inc(&cur_trans->use_count);
atomic_inc(&cur_trans->num_writers);
extwriter_counter_inc(cur_trans, type);
spin_unlock(&fs_info->trans_lock);
@ -257,7 +257,7 @@ loop:
* One for this trans handle, one so it will live on until we
* commit the transaction.
*/
atomic_set(&cur_trans->use_count, 2);
refcount_set(&cur_trans->use_count, 2);
atomic_set(&cur_trans->pending_ordered, 0);
cur_trans->flags = 0;
cur_trans->start_time = get_seconds();
@ -432,7 +432,7 @@ static void wait_current_trans(struct btrfs_fs_info *fs_info)
spin_lock(&fs_info->trans_lock);
cur_trans = fs_info->running_transaction;
if (cur_trans && is_transaction_blocked(cur_trans)) {
atomic_inc(&cur_trans->use_count);
refcount_inc(&cur_trans->use_count);
spin_unlock(&fs_info->trans_lock);
wait_event(fs_info->transaction_wait,
@ -572,7 +572,6 @@ again:
h->type = type;
h->can_flush_pending_bgs = true;
INIT_LIST_HEAD(&h->qgroup_ref_list);
INIT_LIST_HEAD(&h->new_bgs);
smp_mb();
@ -744,7 +743,7 @@ int btrfs_wait_for_commit(struct btrfs_fs_info *fs_info, u64 transid)
list_for_each_entry(t, &fs_info->trans_list, list) {
if (t->transid == transid) {
cur_trans = t;
atomic_inc(&cur_trans->use_count);
refcount_inc(&cur_trans->use_count);
ret = 0;
break;
}
@ -773,7 +772,7 @@ int btrfs_wait_for_commit(struct btrfs_fs_info *fs_info, u64 transid)
if (t->state == TRANS_STATE_COMPLETED)
break;
cur_trans = t;
atomic_inc(&cur_trans->use_count);
refcount_inc(&cur_trans->use_count);
break;
}
}
@ -917,7 +916,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
wake_up_process(info->transaction_kthread);
err = -EIO;
}
assert_qgroups_uptodate(trans);
kmem_cache_free(btrfs_trans_handle_cachep, trans);
if (must_run_delayed_refs) {
@ -1839,7 +1837,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
/* take transaction reference */
cur_trans = trans->transaction;
atomic_inc(&cur_trans->use_count);
refcount_inc(&cur_trans->use_count);
btrfs_end_transaction(trans);
@ -2015,7 +2013,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
spin_lock(&fs_info->trans_lock);
if (cur_trans->state >= TRANS_STATE_COMMIT_START) {
spin_unlock(&fs_info->trans_lock);
atomic_inc(&cur_trans->use_count);
refcount_inc(&cur_trans->use_count);
ret = btrfs_end_transaction(trans);
wait_for_commit(cur_trans);
@ -2035,7 +2033,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
prev_trans = list_entry(cur_trans->list.prev,
struct btrfs_transaction, list);
if (prev_trans->state != TRANS_STATE_COMPLETED) {
atomic_inc(&prev_trans->use_count);
refcount_inc(&prev_trans->use_count);
spin_unlock(&fs_info->trans_lock);
wait_for_commit(prev_trans);
@ -2130,13 +2128,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
goto scrub_continue;
}
/* Reocrd old roots for later qgroup accounting */
ret = btrfs_qgroup_prepare_account_extents(trans, fs_info);
if (ret) {
mutex_unlock(&fs_info->reloc_mutex);
goto scrub_continue;
}
/*
* make sure none of the code above managed to slip in a
* delayed item
@ -2178,6 +2169,24 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
*/
btrfs_free_log_root_tree(trans, fs_info);
/*
* commit_fs_roots() can call btrfs_save_ino_cache(), which generates
* new delayed refs. Must handle them or qgroup can be wrong.
*/
ret = btrfs_run_delayed_refs(trans, fs_info, (unsigned long)-1);
if (ret) {
mutex_unlock(&fs_info->tree_log_mutex);
mutex_unlock(&fs_info->reloc_mutex);
goto scrub_continue;
}
ret = btrfs_qgroup_prepare_account_extents(trans, fs_info);
if (ret) {
mutex_unlock(&fs_info->tree_log_mutex);
mutex_unlock(&fs_info->reloc_mutex);
goto scrub_continue;
}
/*
* Since fs roots are all committed, we can get a quite accurate
* new_roots. So let's do quota accounting.
@ -2223,7 +2232,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
switch_commit_roots(cur_trans, fs_info);
assert_qgroups_uptodate(trans);
ASSERT(list_empty(&cur_trans->dirty_bgs));
ASSERT(list_empty(&cur_trans->io_bgs));
update_super_roots(fs_info);

View File

@ -18,6 +18,8 @@
#ifndef __BTRFS_TRANSACTION__
#define __BTRFS_TRANSACTION__
#include <linux/refcount.h>
#include "btrfs_inode.h"
#include "delayed-ref.h"
#include "ctree.h"
@ -49,7 +51,7 @@ struct btrfs_transaction {
* transaction can end
*/
atomic_t num_writers;
atomic_t use_count;
refcount_t use_count;
atomic_t pending_ordered;
unsigned long flags;
@ -125,8 +127,6 @@ struct btrfs_trans_handle {
unsigned int type;
struct btrfs_root *root;
struct btrfs_fs_info *fs_info;
struct seq_list delayed_ref_elem;
struct list_head qgroup_ref_list;
struct list_head new_bgs;
};

View File

@ -4196,7 +4196,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
if (em->generation <= test_gen)
continue;
/* Need a ref to keep it from getting evicted from cache */
atomic_inc(&em->refs);
refcount_inc(&em->refs);
set_bit(EXTENT_FLAG_LOGGING, &em->flags);
list_add_tail(&em->list, &extents);
num++;

File diff suppressed because it is too large Load Diff

View File

@ -123,7 +123,6 @@ struct btrfs_device {
struct list_head resized_list;
/* for sending down flush barriers */
int nobarriers;
struct bio *flush_bio;
struct completion flush_wait;
@ -298,7 +297,7 @@ struct btrfs_bio;
typedef void (btrfs_bio_end_io_t) (struct btrfs_bio *bio, int err);
struct btrfs_bio {
atomic_t refs;
refcount_t refs;
atomic_t stripes_pending;
struct btrfs_fs_info *fs_info;
u64 map_type; /* get from map_lookup->type */
@ -400,8 +399,7 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
struct btrfs_bio **bbio_ret, int mirror_num);
int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
u64 logical, u64 *length,
struct btrfs_bio **bbio_ret, int mirror_num,
int need_raid_map);
struct btrfs_bio **bbio_ret);
int btrfs_rmap_block(struct btrfs_fs_info *fs_info,
u64 chunk_start, u64 physical, u64 devid,
u64 **logical, int *naddrs, int *stripe_len);
@ -475,7 +473,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
void btrfs_init_dev_replace_tgtdev_for_resume(struct btrfs_fs_info *fs_info,
struct btrfs_device *tgtdev);
void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_path);
int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree,
int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info,
u64 logical, u64 len, int mirror_num);
unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
struct btrfs_mapping_tree *map_tree,

View File

@ -12,6 +12,7 @@ struct btrfs_root;
struct btrfs_fs_info;
struct btrfs_inode;
struct extent_map;
struct btrfs_file_extent_item;
struct btrfs_ordered_extent;
struct btrfs_delayed_ref_node;
struct btrfs_delayed_tree_ref;
@ -24,6 +25,7 @@ struct extent_buffer;
struct btrfs_work;
struct __btrfs_workqueue;
struct btrfs_qgroup_extent_record;
struct btrfs_qgroup;
#define show_ref_type(type) \
__print_symbolic(type, \
@ -54,6 +56,12 @@ struct btrfs_qgroup_extent_record;
(obj >= BTRFS_ROOT_TREE_OBJECTID && \
obj <= BTRFS_QUOTA_TREE_OBJECTID)) ? __show_root_type(obj) : "-"
#define show_fi_type(type) \
__print_symbolic(type, \
{ BTRFS_FILE_EXTENT_INLINE, "INLINE" }, \
{ BTRFS_FILE_EXTENT_REG, "REG" }, \
{ BTRFS_FILE_EXTENT_PREALLOC, "PREALLOC"})
#define BTRFS_GROUP_FLAGS \
{ BTRFS_BLOCK_GROUP_DATA, "DATA"}, \
{ BTRFS_BLOCK_GROUP_SYSTEM, "SYSTEM"}, \
@ -213,7 +221,7 @@ TRACE_EVENT_CONDITION(btrfs_get_extent,
__entry->block_start = map->block_start;
__entry->block_len = map->block_len;
__entry->flags = map->flags;
__entry->refs = atomic_read(&map->refs);
__entry->refs = refcount_read(&map->refs);
__entry->compress_type = map->compress_type;
),
@ -232,6 +240,138 @@ TRACE_EVENT_CONDITION(btrfs_get_extent,
__entry->refs, __entry->compress_type)
);
/* file extent item */
DECLARE_EVENT_CLASS(btrfs__file_extent_item_regular,
TP_PROTO(struct btrfs_inode *bi, struct extent_buffer *l,
struct btrfs_file_extent_item *fi, u64 start),
TP_ARGS(bi, l, fi, start),
TP_STRUCT__entry_btrfs(
__field( u64, root_obj )
__field( u64, ino )
__field( loff_t, isize )
__field( u64, disk_isize )
__field( u64, num_bytes )
__field( u64, ram_bytes )
__field( u64, disk_bytenr )
__field( u64, disk_num_bytes )
__field( u64, extent_offset )
__field( u8, extent_type )
__field( u8, compression )
__field( u64, extent_start )
__field( u64, extent_end )
),
TP_fast_assign_btrfs(bi->root->fs_info,
__entry->root_obj = bi->root->objectid;
__entry->ino = btrfs_ino(bi);
__entry->isize = bi->vfs_inode.i_size;
__entry->disk_isize = bi->disk_i_size;
__entry->num_bytes = btrfs_file_extent_num_bytes(l, fi);
__entry->ram_bytes = btrfs_file_extent_ram_bytes(l, fi);
__entry->disk_bytenr = btrfs_file_extent_disk_bytenr(l, fi);
__entry->disk_num_bytes = btrfs_file_extent_disk_num_bytes(l, fi);
__entry->extent_offset = btrfs_file_extent_offset(l, fi);
__entry->extent_type = btrfs_file_extent_type(l, fi);
__entry->compression = btrfs_file_extent_compression(l, fi);
__entry->extent_start = start;
__entry->extent_end = (start + __entry->num_bytes);
),
TP_printk_btrfs(
"root=%llu(%s) inode=%llu size=%llu disk_isize=%llu "
"file extent range=[%llu %llu] "
"(num_bytes=%llu ram_bytes=%llu disk_bytenr=%llu "
"disk_num_bytes=%llu extent_offset=%llu type=%s "
"compression=%u",
show_root_type(__entry->root_obj), __entry->ino,
__entry->isize,
__entry->disk_isize, __entry->extent_start,
__entry->extent_end, __entry->num_bytes, __entry->ram_bytes,
__entry->disk_bytenr, __entry->disk_num_bytes,
__entry->extent_offset, show_fi_type(__entry->extent_type),
__entry->compression)
);
DECLARE_EVENT_CLASS(
btrfs__file_extent_item_inline,
TP_PROTO(struct btrfs_inode *bi, struct extent_buffer *l,
struct btrfs_file_extent_item *fi, int slot, u64 start),
TP_ARGS(bi, l, fi, slot, start),
TP_STRUCT__entry_btrfs(
__field( u64, root_obj )
__field( u64, ino )
__field( loff_t, isize )
__field( u64, disk_isize )
__field( u8, extent_type )
__field( u8, compression )
__field( u64, extent_start )
__field( u64, extent_end )
),
TP_fast_assign_btrfs(
bi->root->fs_info,
__entry->root_obj = bi->root->objectid;
__entry->ino = btrfs_ino(bi);
__entry->isize = bi->vfs_inode.i_size;
__entry->disk_isize = bi->disk_i_size;
__entry->extent_type = btrfs_file_extent_type(l, fi);
__entry->compression = btrfs_file_extent_compression(l, fi);
__entry->extent_start = start;
__entry->extent_end = (start + btrfs_file_extent_inline_len(l, slot, fi));
),
TP_printk_btrfs(
"root=%llu(%s) inode=%llu size=%llu disk_isize=%llu "
"file extent range=[%llu %llu] "
"extent_type=%s compression=%u",
show_root_type(__entry->root_obj), __entry->ino, __entry->isize,
__entry->disk_isize, __entry->extent_start,
__entry->extent_end, show_fi_type(__entry->extent_type),
__entry->compression)
);
DEFINE_EVENT(
btrfs__file_extent_item_regular, btrfs_get_extent_show_fi_regular,
TP_PROTO(struct btrfs_inode *bi, struct extent_buffer *l,
struct btrfs_file_extent_item *fi, u64 start),
TP_ARGS(bi, l, fi, start)
);
DEFINE_EVENT(
btrfs__file_extent_item_regular, btrfs_truncate_show_fi_regular,
TP_PROTO(struct btrfs_inode *bi, struct extent_buffer *l,
struct btrfs_file_extent_item *fi, u64 start),
TP_ARGS(bi, l, fi, start)
);
DEFINE_EVENT(
btrfs__file_extent_item_inline, btrfs_get_extent_show_fi_inline,
TP_PROTO(struct btrfs_inode *bi, struct extent_buffer *l,
struct btrfs_file_extent_item *fi, int slot, u64 start),
TP_ARGS(bi, l, fi, slot, start)
);
DEFINE_EVENT(
btrfs__file_extent_item_inline, btrfs_truncate_show_fi_inline,
TP_PROTO(struct btrfs_inode *bi, struct extent_buffer *l,
struct btrfs_file_extent_item *fi, int slot, u64 start),
TP_ARGS(bi, l, fi, slot, start)
);
#define show_ordered_flags(flags) \
__print_flags(flags, "|", \
{ (1 << BTRFS_ORDERED_IO_DONE), "IO_DONE" }, \
@ -275,7 +415,7 @@ DECLARE_EVENT_CLASS(btrfs__ordered_extent,
__entry->bytes_left = ordered->bytes_left;
__entry->flags = ordered->flags;
__entry->compress_type = ordered->compress_type;
__entry->refs = atomic_read(&ordered->refs);
__entry->refs = refcount_read(&ordered->refs);
__entry->root_objectid =
BTRFS_I(inode)->root->root_key.objectid;
__entry->truncated_len = ordered->truncated_len;
@ -1475,6 +1615,49 @@ TRACE_EVENT(qgroup_update_counters,
__entry->cur_new_count)
);
TRACE_EVENT(qgroup_update_reserve,
TP_PROTO(struct btrfs_fs_info *fs_info, struct btrfs_qgroup *qgroup,
s64 diff),
TP_ARGS(fs_info, qgroup, diff),
TP_STRUCT__entry_btrfs(
__field( u64, qgid )
__field( u64, cur_reserved )
__field( s64, diff )
),
TP_fast_assign_btrfs(fs_info,
__entry->qgid = qgroup->qgroupid;
__entry->cur_reserved = qgroup->reserved;
__entry->diff = diff;
),
TP_printk_btrfs("qgid=%llu cur_reserved=%llu diff=%lld",
__entry->qgid, __entry->cur_reserved, __entry->diff)
);
TRACE_EVENT(qgroup_meta_reserve,
TP_PROTO(struct btrfs_root *root, s64 diff),
TP_ARGS(root, diff),
TP_STRUCT__entry_btrfs(
__field( u64, refroot )
__field( s64, diff )
),
TP_fast_assign_btrfs(root->fs_info,
__entry->refroot = root->objectid;
__entry->diff = diff;
),
TP_printk_btrfs("refroot=%llu(%s) diff=%lld",
show_root_type(__entry->refroot), __entry->diff)
);
#endif /* _TRACE_BTRFS_H */
/* This part must be outside protection */

View File

@ -291,10 +291,10 @@ struct btrfs_ioctl_feature_flags {
struct btrfs_balance_args {
__u64 profiles;
union {
__le64 usage;
__u64 usage;
struct {
__le32 usage_min;
__le32 usage_max;
__u32 usage_min;
__u32 usage_max;
};
};
__u64 devid;
@ -324,8 +324,8 @@ struct btrfs_balance_args {
* Process chunks that cross stripes_min..stripes_max devices,
* BTRFS_BALANCE_ARGS_STRIPES_RANGE
*/
__le32 stripes_min;
__le32 stripes_max;
__u32 stripes_min;
__u32 stripes_max;
__u64 unused[6];
} __attribute__ ((__packed__));