From fb4b10e5d56639389fbc46dc8a87e81578af0b64 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 11 Jan 2016 17:28:38 -0500 Subject: [PATCH 1/4] Btrfs: change how we update the global block rsv I'm writing a tool to visualize the enospc system in order to help debug enospc bugs and I found weird data and ran it down to when we update the global block rsv. We add all of the remaining free space to the block rsv, do a trace event, then remove the extra and do another trace event. This makes my visualization look silly and is unintuitive code as well. Fix this stuff to only add the amount we are missing, or free the amount we are missing. This is less clean to read but more explicit in what it is doing, as well as only emitting events for values that make sense. Thanks, Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e2287c7c10be..07f47a5d0d6f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5373,27 +5373,33 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info) block_rsv->size = min_t(u64, num_bytes, SZ_512M); - num_bytes = sinfo->bytes_used + sinfo->bytes_pinned + - sinfo->bytes_reserved + sinfo->bytes_readonly + - sinfo->bytes_may_use; - - if (sinfo->total_bytes > num_bytes) { - num_bytes = sinfo->total_bytes - num_bytes; - block_rsv->reserved += num_bytes; - sinfo->bytes_may_use += num_bytes; - trace_btrfs_space_reservation(fs_info, "space_info", - sinfo->flags, num_bytes, 1); - } - - if (block_rsv->reserved >= block_rsv->size) { + if (block_rsv->reserved < block_rsv->size) { + num_bytes = sinfo->bytes_used + sinfo->bytes_pinned + + sinfo->bytes_reserved + sinfo->bytes_readonly + + sinfo->bytes_may_use; + if (sinfo->total_bytes > num_bytes) { + num_bytes = sinfo->total_bytes - num_bytes; + num_bytes = min(num_bytes, + block_rsv->size - block_rsv->reserved); + block_rsv->reserved += num_bytes; + sinfo->bytes_may_use += num_bytes; + trace_btrfs_space_reservation(fs_info, "space_info", + sinfo->flags, num_bytes, + 1); + } + } else if (block_rsv->reserved > block_rsv->size) { num_bytes = block_rsv->reserved - block_rsv->size; sinfo->bytes_may_use -= num_bytes; trace_btrfs_space_reservation(fs_info, "space_info", sinfo->flags, num_bytes, 0); block_rsv->reserved = block_rsv->size; - block_rsv->full = 1; } + if (block_rsv->reserved == block_rsv->size) + block_rsv->full = 1; + else + block_rsv->full = 0; + spin_unlock(&block_rsv->lock); spin_unlock(&sinfo->lock); } From dc95f7bfc57fa4b75a77d0da84d5db249d74aa3f Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 13 Jan 2016 11:48:06 -0500 Subject: [PATCH 2/4] Btrfs: fix truncate_space_check truncate_space_check is using btrfs_csum_bytes_to_leaves() but forgetting to multiply by nodesize so we get an actual byte count. We need a tracepoint here so that we have the matching reserve for the release that will come later. Also add a comment to make clear what the intent of truncate_space_check is. Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/inode.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5f06eb1f4384..9b4a5ab279dc 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4211,11 +4211,20 @@ static int truncate_space_check(struct btrfs_trans_handle *trans, { int ret; + /* + * This is only used to apply pressure to the enospc system, we don't + * intend to use this reservation at all. + */ bytes_deleted = btrfs_csum_bytes_to_leaves(root, bytes_deleted); + bytes_deleted *= root->nodesize; ret = btrfs_block_rsv_add(root, &root->fs_info->trans_block_rsv, bytes_deleted, BTRFS_RESERVE_NO_FLUSH); - if (!ret) + if (!ret) { + trace_btrfs_space_reservation(root->fs_info, "transaction", + trans->transid, + bytes_deleted, 1); trans->bytes_reserved += bytes_deleted; + } return ret; } From 88d3a5aaf6171d9a222961837ba329b850f140e3 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 13 Jan 2016 13:21:20 -0500 Subject: [PATCH 3/4] Btrfs: add transaction space reservation tracepoints There are a few places where we add to trans->bytes_reserved but don't have the corresponding trace point. With these added my tool no longer sees transaction leaks. Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/transaction.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index b6031ce474f7..e9e95ef0644f 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -637,6 +637,8 @@ struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv( trans->block_rsv = &root->fs_info->trans_block_rsv; trans->bytes_reserved = num_bytes; + trace_btrfs_space_reservation(root->fs_info, "transaction", + trans->transid, num_bytes, 1); return trans; } @@ -1375,7 +1377,9 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, rsv = trans->block_rsv; trans->block_rsv = &pending->block_rsv; trans->bytes_reserved = trans->block_rsv->reserved; - + trace_btrfs_space_reservation(root->fs_info, "transaction", + trans->transid, + trans->bytes_reserved, 1); dentry = pending->dentry; parent_inode = pending->dir; parent_root = BTRFS_I(parent_inode)->root; From baee8790641eac0c0fcb2524a925aec39d9be6e0 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 26 Jan 2016 09:35:38 -0500 Subject: [PATCH 4/4] Btrfs: check reserved when deciding to background flush We will sometimes start background flushing the various enospc related things (delayed nodes, delalloc, etc) if we are getting close to reserving all of our available space. We don't want to do this however when we are actually using this space as it causes unneeded thrashing. We currently try to do this by checking bytes_used >= thresh, but bytes_used is only part of the equation, we need to use bytes_reserved as well as this represents space that is very likely to become bytes_used in the future. My tracing tool will keep count of the number of times we kick off the async flusher, the following are counts for the entire run of generic/027 No Patch Patch avg: 5385 5009 median: 5500 4916 We skewed lower than the average with my patch and higher than the average with the patch, overall it cuts the flushing from anywhere from 5-10%, which in the case of actual ENOSPC is quite helpful. Thanks, Signed-off-by: Josef Bacik Reviewed-by: Liu Bo Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 07f47a5d0d6f..aa31fe97c47a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4838,7 +4838,7 @@ static inline int need_do_async_reclaim(struct btrfs_space_info *space_info, u64 thresh = div_factor_fine(space_info->total_bytes, 98); /* If we're just plain full then async reclaim just slows us down. */ - if (space_info->bytes_used >= thresh) + if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh) return 0; return (used >= thresh && !btrfs_fs_closing(fs_info) &&