diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 99eb2bcd9aa7..1afda75d5414 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1214,7 +1214,10 @@ struct btrfs_fs_info { /* restriper state */ spinlock_t balance_lock; struct mutex balance_mutex; + atomic_t balance_running; + atomic_t balance_pause_req; struct btrfs_balance_control *balance_ctl; + wait_queue_head_t balance_wait_q; unsigned data_chunk_allocations; unsigned metadata_ratio; @@ -2658,6 +2661,7 @@ static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info) } static inline void free_fs_info(struct btrfs_fs_info *fs_info) { + kfree(fs_info->balance_ctl); kfree(fs_info->delayed_root); kfree(fs_info->extent_root); kfree(fs_info->tree_root); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index eb7a11ac5b73..8ce837407800 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2004,7 +2004,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, spin_lock_init(&fs_info->balance_lock); mutex_init(&fs_info->balance_mutex); + atomic_set(&fs_info->balance_running, 0); + atomic_set(&fs_info->balance_pause_req, 0); fs_info->balance_ctl = NULL; + init_waitqueue_head(&fs_info->balance_wait_q); sb->s_blocksize = 4096; sb->s_blocksize_bits = blksize_bits(4096); @@ -2980,6 +2983,9 @@ int close_ctree(struct btrfs_root *root) fs_info->closing = 1; smp_mb(); + /* pause restriper - we want to resume on mount */ + btrfs_pause_balance(root->fs_info); + btrfs_scrub_cancel(root); /* wait for any defraggers to finish */ diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 29b3a94933f0..f572c53dda4f 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3072,6 +3072,11 @@ void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, bargs->flags = bctl->flags; + if (atomic_read(&fs_info->balance_running)) + bargs->state |= BTRFS_BALANCE_STATE_RUNNING; + if (atomic_read(&fs_info->balance_pause_req)) + bargs->state |= BTRFS_BALANCE_STATE_PAUSE_REQ; + memcpy(&bargs->data, &bctl->data, sizeof(bargs->data)); memcpy(&bargs->meta, &bctl->meta, sizeof(bargs->meta)); memcpy(&bargs->sys, &bctl->sys, sizeof(bargs->sys)); @@ -3103,6 +3108,11 @@ static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg) bargs = NULL; } + if (fs_info->balance_ctl) { + ret = -EINPROGRESS; + goto out_bargs; + } + bctl = kzalloc(sizeof(*bctl), GFP_NOFS); if (!bctl) { ret = -ENOMEM; @@ -3123,7 +3133,8 @@ static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg) ret = btrfs_balance(bctl, bargs); /* - * bctl is freed in __cancel_balance + * bctl is freed in __cancel_balance or in free_fs_info if + * restriper was paused all the way until unmount */ if (arg) { if (copy_to_user(arg, bargs, sizeof(*bargs))) @@ -3138,6 +3149,19 @@ out: return ret; } +static long btrfs_ioctl_balance_ctl(struct btrfs_root *root, int cmd) +{ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + switch (cmd) { + case BTRFS_BALANCE_CTL_PAUSE: + return btrfs_pause_balance(root->fs_info); + } + + return -EINVAL; +} + long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -3216,6 +3240,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_scrub_progress(root, argp); case BTRFS_IOC_BALANCE_V2: return btrfs_ioctl_balance(root, argp); + case BTRFS_IOC_BALANCE_CTL: + return btrfs_ioctl_balance_ctl(root, arg); } return -ENOTTY; diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index c8b37d2c0d77..e972e11a8d77 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -109,6 +109,9 @@ struct btrfs_ioctl_fs_info_args { __u64 reserved[124]; /* pad to 1k */ }; +/* balance control ioctl modes */ +#define BTRFS_BALANCE_CTL_PAUSE 1 + /* * this is packed, because it should be exactly the same as its disk * byte order counterpart (struct btrfs_disk_balance_args) @@ -137,6 +140,9 @@ struct btrfs_balance_progress { __u64 completed; /* # of chunks relocated so far */ }; +#define BTRFS_BALANCE_STATE_RUNNING (1ULL << 0) +#define BTRFS_BALANCE_STATE_PAUSE_REQ (1ULL << 1) + struct btrfs_ioctl_balance_args { __u64 flags; /* in/out */ __u64 state; /* out */ @@ -315,6 +321,7 @@ struct btrfs_ioctl_logical_ino_args { struct btrfs_ioctl_fs_info_args) #define BTRFS_IOC_BALANCE_V2 _IOWR(BTRFS_IOCTL_MAGIC, 32, \ struct btrfs_ioctl_balance_args) +#define BTRFS_IOC_BALANCE_CTL _IOW(BTRFS_IOCTL_MAGIC, 33, int) #define BTRFS_IOC_INO_PATHS _IOWR(BTRFS_IOCTL_MAGIC, 35, \ struct btrfs_ioctl_ino_path_args) #define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \ diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index e0160607e6e2..d32660ce753d 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2492,6 +2492,11 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) key.type = BTRFS_CHUNK_ITEM_KEY; while (1) { + if (atomic_read(&fs_info->balance_pause_req)) { + ret = -ECANCELED; + goto error; + } + ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0); if (ret < 0) goto error; @@ -2553,6 +2558,11 @@ error: return ret; } +static inline int balance_need_close(struct btrfs_fs_info *fs_info) +{ + return atomic_read(&fs_info->balance_pause_req) == 0; +} + static void __cancel_balance(struct btrfs_fs_info *fs_info) { int ret; @@ -2575,7 +2585,8 @@ int btrfs_balance(struct btrfs_balance_control *bctl, u64 allowed; int ret; - if (btrfs_fs_closing(fs_info)) { + if (btrfs_fs_closing(fs_info) || + atomic_read(&fs_info->balance_pause_req)) { ret = -EINVAL; goto out; } @@ -2680,18 +2691,25 @@ do_balance: spin_unlock(&fs_info->balance_lock); } + atomic_inc(&fs_info->balance_running); mutex_unlock(&fs_info->balance_mutex); ret = __btrfs_balance(fs_info); mutex_lock(&fs_info->balance_mutex); + atomic_dec(&fs_info->balance_running); if (bargs) { memset(bargs, 0, sizeof(*bargs)); update_ioctl_balance_args(fs_info, bargs); } - __cancel_balance(fs_info); + if ((ret && ret != -ECANCELED && ret != -ENOSPC) || + balance_need_close(fs_info)) { + __cancel_balance(fs_info); + } + + wake_up(&fs_info->balance_wait_q); return ret; out: @@ -2785,6 +2803,35 @@ out: return ret; } +int btrfs_pause_balance(struct btrfs_fs_info *fs_info) +{ + int ret = 0; + + mutex_lock(&fs_info->balance_mutex); + if (!fs_info->balance_ctl) { + mutex_unlock(&fs_info->balance_mutex); + return -ENOTCONN; + } + + if (atomic_read(&fs_info->balance_running)) { + atomic_inc(&fs_info->balance_pause_req); + mutex_unlock(&fs_info->balance_mutex); + + wait_event(fs_info->balance_wait_q, + atomic_read(&fs_info->balance_running) == 0); + + mutex_lock(&fs_info->balance_mutex); + /* we are good with balance_ctl ripped off from under us */ + BUG_ON(atomic_read(&fs_info->balance_running)); + atomic_dec(&fs_info->balance_pause_req); + } else { + ret = -ENOTCONN; + } + + mutex_unlock(&fs_info->balance_mutex); + return ret; +} + /* * shrinking a device means finding all of the device extents past * the new size, and then following the back refs to the chunks. diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index cd25ea58ec35..80953afb12b9 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -273,6 +273,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *path); int btrfs_balance(struct btrfs_balance_control *bctl, struct btrfs_ioctl_balance_args *bargs); int btrfs_recover_balance(struct btrfs_root *tree_root); +int btrfs_pause_balance(struct btrfs_fs_info *fs_info); int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); int find_free_dev_extent(struct btrfs_trans_handle *trans, struct btrfs_device *device, u64 num_bytes,