alistair23-linux/fs/btrfs/ioctl.c
Yan Zheng 3bb1a1bc42 Btrfs: Remove offset field from struct btrfs_extent_ref
The offset field in struct btrfs_extent_ref records the position
inside file that file extent is referenced by. In the new back
reference system, tree leaves holding references to file extent
are recorded explicitly. We can scan these tree leaves very quickly, so the
offset field is not required.

This patch also makes the back reference system check the objectid
when extents are in deleting.

Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
2008-10-09 11:46:24 -04:00

789 lines
19 KiB
C

/*
* Copyright (C) 2007 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include <linux/kernel.h>
#include <linux/bio.h>
#include <linux/buffer_head.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/time.h>
#include <linux/init.h>
#include <linux/string.h>
#include <linux/smp_lock.h>
#include <linux/backing-dev.h>
#include <linux/mpage.h>
#include <linux/swap.h>
#include <linux/writeback.h>
#include <linux/statfs.h>
#include <linux/compat.h>
#include <linux/bit_spinlock.h>
#include <linux/version.h>
#include <linux/xattr.h>
#include <linux/vmalloc.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
#include "btrfs_inode.h"
#include "ioctl.h"
#include "print-tree.h"
#include "volumes.h"
#include "locking.h"
static noinline int create_subvol(struct btrfs_root *root, char *name,
int namelen)
{
struct btrfs_trans_handle *trans;
struct btrfs_key key;
struct btrfs_root_item root_item;
struct btrfs_inode_item *inode_item;
struct extent_buffer *leaf;
struct btrfs_root *new_root = root;
struct inode *dir;
int ret;
int err;
u64 objectid;
u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
unsigned long nr = 1;
ret = btrfs_check_free_space(root, 1, 0);
if (ret)
goto fail_commit;
trans = btrfs_start_transaction(root, 1);
BUG_ON(!trans);
ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
0, &objectid);
if (ret)
goto fail;
leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
objectid, trans->transid, 0, 0, 0);
if (IS_ERR(leaf)) {
ret = PTR_ERR(leaf);
goto fail;
}
btrfs_set_header_nritems(leaf, 0);
btrfs_set_header_level(leaf, 0);
btrfs_set_header_bytenr(leaf, leaf->start);
btrfs_set_header_generation(leaf, trans->transid);
btrfs_set_header_owner(leaf, objectid);
write_extent_buffer(leaf, root->fs_info->fsid,
(unsigned long)btrfs_header_fsid(leaf),
BTRFS_FSID_SIZE);
btrfs_mark_buffer_dirty(leaf);
inode_item = &root_item.inode;
memset(inode_item, 0, sizeof(*inode_item));
inode_item->generation = cpu_to_le64(1);
inode_item->size = cpu_to_le64(3);
inode_item->nlink = cpu_to_le32(1);
inode_item->nbytes = cpu_to_le64(root->leafsize);
inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
btrfs_set_root_bytenr(&root_item, leaf->start);
btrfs_set_root_level(&root_item, 0);
btrfs_set_root_refs(&root_item, 1);
btrfs_set_root_used(&root_item, 0);
memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress));
root_item.drop_level = 0;
btrfs_tree_unlock(leaf);
free_extent_buffer(leaf);
leaf = NULL;
btrfs_set_root_dirid(&root_item, new_dirid);
key.objectid = objectid;
key.offset = 1;
btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
&root_item);
if (ret)
goto fail;
/*
* insert the directory item
*/
key.offset = (u64)-1;
dir = root->fs_info->sb->s_root->d_inode;
ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
name, namelen, dir->i_ino, &key,
BTRFS_FT_DIR, 0);
if (ret)
goto fail;
ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root,
name, namelen, objectid,
root->fs_info->sb->s_root->d_inode->i_ino, 0);
if (ret)
goto fail;
ret = btrfs_commit_transaction(trans, root);
if (ret)
goto fail_commit;
new_root = btrfs_read_fs_root(root->fs_info, &key, name, namelen);
BUG_ON(!new_root);
trans = btrfs_start_transaction(new_root, 1);
BUG_ON(!trans);
ret = btrfs_create_subvol_root(new_root, trans, new_dirid,
BTRFS_I(dir)->block_group);
if (ret)
goto fail;
/* Invalidate existing dcache entry for new subvolume. */
btrfs_invalidate_dcache_root(root, name, namelen);
fail:
nr = trans->blocks_used;
err = btrfs_commit_transaction(trans, new_root);
if (err && !ret)
ret = err;
fail_commit:
btrfs_btree_balance_dirty(root, nr);
return ret;
}
static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
{
struct btrfs_pending_snapshot *pending_snapshot;
struct btrfs_trans_handle *trans;
int ret;
int err;
unsigned long nr = 0;
if (!root->ref_cows)
return -EINVAL;
ret = btrfs_check_free_space(root, 1, 0);
if (ret)
goto fail_unlock;
pending_snapshot = kmalloc(sizeof(*pending_snapshot), GFP_NOFS);
if (!pending_snapshot) {
ret = -ENOMEM;
goto fail_unlock;
}
pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS);
if (!pending_snapshot->name) {
ret = -ENOMEM;
kfree(pending_snapshot);
goto fail_unlock;
}
memcpy(pending_snapshot->name, name, namelen);
pending_snapshot->name[namelen] = '\0';
trans = btrfs_start_transaction(root, 1);
BUG_ON(!trans);
pending_snapshot->root = root;
list_add(&pending_snapshot->list,
&trans->transaction->pending_snapshots);
ret = btrfs_update_inode(trans, root, root->inode);
err = btrfs_commit_transaction(trans, root);
fail_unlock:
btrfs_btree_balance_dirty(root, nr);
return ret;
}
int btrfs_defrag_file(struct file *file)
{
struct inode *inode = fdentry(file)->d_inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct btrfs_ordered_extent *ordered;
struct page *page;
unsigned long last_index;
unsigned long ra_pages = root->fs_info->bdi.ra_pages;
unsigned long total_read = 0;
u64 page_start;
u64 page_end;
unsigned long i;
int ret;
ret = btrfs_check_free_space(root, inode->i_size, 0);
if (ret)
return -ENOSPC;
mutex_lock(&inode->i_mutex);
last_index = inode->i_size >> PAGE_CACHE_SHIFT;
for (i = 0; i <= last_index; i++) {
if (total_read % ra_pages == 0) {
btrfs_force_ra(inode->i_mapping, &file->f_ra, file, i,
min(last_index, i + ra_pages - 1));
}
total_read++;
again:
page = grab_cache_page(inode->i_mapping, i);
if (!page)
goto out_unlock;
if (!PageUptodate(page)) {
btrfs_readpage(NULL, page);
lock_page(page);
if (!PageUptodate(page)) {
unlock_page(page);
page_cache_release(page);
goto out_unlock;
}
}
wait_on_page_writeback(page);
page_start = (u64)page->index << PAGE_CACHE_SHIFT;
page_end = page_start + PAGE_CACHE_SIZE - 1;
lock_extent(io_tree, page_start, page_end, GFP_NOFS);
ordered = btrfs_lookup_ordered_extent(inode, page_start);
if (ordered) {
unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
unlock_page(page);
page_cache_release(page);
btrfs_start_ordered_extent(inode, ordered, 1);
btrfs_put_ordered_extent(ordered);
goto again;
}
set_page_extent_mapped(page);
/*
* this makes sure page_mkwrite is called on the
* page if it is dirtied again later
*/
clear_page_dirty_for_io(page);
btrfs_set_extent_delalloc(inode, page_start, page_end);
unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
set_page_dirty(page);
unlock_page(page);
page_cache_release(page);
balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1);
}
out_unlock:
mutex_unlock(&inode->i_mutex);
return 0;
}
/*
* Called inside transaction, so use GFP_NOFS
*/
static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
{
u64 new_size;
u64 old_size;
u64 devid = 1;
struct btrfs_ioctl_vol_args *vol_args;
struct btrfs_trans_handle *trans;
struct btrfs_device *device = NULL;
char *sizestr;
char *devstr = NULL;
int ret = 0;
int namelen;
int mod = 0;
vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
if (!vol_args)
return -ENOMEM;
if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
ret = -EFAULT;
goto out;
}
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
namelen = strlen(vol_args->name);
mutex_lock(&root->fs_info->volume_mutex);
sizestr = vol_args->name;
devstr = strchr(sizestr, ':');
if (devstr) {
char *end;
sizestr = devstr + 1;
*devstr = '\0';
devstr = vol_args->name;
devid = simple_strtoull(devstr, &end, 10);
printk(KERN_INFO "resizing devid %llu\n", devid);
}
device = btrfs_find_device(root, devid, NULL);
if (!device) {
printk(KERN_INFO "resizer unable to find device %llu\n", devid);
ret = -EINVAL;
goto out_unlock;
}
if (!strcmp(sizestr, "max"))
new_size = device->bdev->bd_inode->i_size;
else {
if (sizestr[0] == '-') {
mod = -1;
sizestr++;
} else if (sizestr[0] == '+') {
mod = 1;
sizestr++;
}
new_size = btrfs_parse_size(sizestr);
if (new_size == 0) {
ret = -EINVAL;
goto out_unlock;
}
}
old_size = device->total_bytes;
if (mod < 0) {
if (new_size > old_size) {
ret = -EINVAL;
goto out_unlock;
}
new_size = old_size - new_size;
} else if (mod > 0) {
new_size = old_size + new_size;
}
if (new_size < 256 * 1024 * 1024) {
ret = -EINVAL;
goto out_unlock;
}
if (new_size > device->bdev->bd_inode->i_size) {
ret = -EFBIG;
goto out_unlock;
}
do_div(new_size, root->sectorsize);
new_size *= root->sectorsize;
printk(KERN_INFO "new size for %s is %llu\n",
device->name, (unsigned long long)new_size);
if (new_size > old_size) {
trans = btrfs_start_transaction(root, 1);
ret = btrfs_grow_device(trans, device, new_size);
btrfs_commit_transaction(trans, root);
} else {
ret = btrfs_shrink_device(device, new_size);
}
out_unlock:
mutex_unlock(&root->fs_info->volume_mutex);
out:
kfree(vol_args);
return ret;
}
static noinline int btrfs_ioctl_snap_create(struct btrfs_root *root,
void __user *arg)
{
struct btrfs_ioctl_vol_args *vol_args;
struct btrfs_dir_item *di;
struct btrfs_path *path;
u64 root_dirid;
int namelen;
int ret;
vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
if (!vol_args)
return -ENOMEM;
if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
ret = -EFAULT;
goto out;
}
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
namelen = strlen(vol_args->name);
if (strchr(vol_args->name, '/')) {
ret = -EINVAL;
goto out;
}
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
goto out;
}
root_dirid = root->fs_info->sb->s_root->d_inode->i_ino,
di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root,
path, root_dirid,
vol_args->name, namelen, 0);
btrfs_free_path(path);
if (di && !IS_ERR(di)) {
ret = -EEXIST;
goto out;
}
if (IS_ERR(di)) {
ret = PTR_ERR(di);
goto out;
}
if (root == root->fs_info->tree_root)
ret = create_subvol(root, vol_args->name, namelen);
else
ret = create_snapshot(root, vol_args->name, namelen);
out:
kfree(vol_args);
return ret;
}
static int btrfs_ioctl_defrag(struct file *file)
{
struct inode *inode = fdentry(file)->d_inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
switch (inode->i_mode & S_IFMT) {
case S_IFDIR:
btrfs_defrag_root(root, 0);
btrfs_defrag_root(root->fs_info->extent_root, 0);
break;
case S_IFREG:
btrfs_defrag_file(file);
break;
}
return 0;
}
long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg)
{
struct btrfs_ioctl_vol_args *vol_args;
int ret;
vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
if (!vol_args)
return -ENOMEM;
if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
ret = -EFAULT;
goto out;
}
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
ret = btrfs_init_new_device(root, vol_args->name);
out:
kfree(vol_args);
return ret;
}
long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg)
{
struct btrfs_ioctl_vol_args *vol_args;
int ret;
vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
if (!vol_args)
return -ENOMEM;
if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
ret = -EFAULT;
goto out;
}
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
ret = btrfs_rm_device(root, vol_args->name);
out:
kfree(vol_args);
return ret;
}
long btrfs_ioctl_clone(struct file *file, unsigned long src_fd)
{
struct inode *inode = fdentry(file)->d_inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct file *src_file;
struct inode *src;
struct btrfs_trans_handle *trans;
struct btrfs_path *path;
struct extent_buffer *leaf;
char *buf;
struct btrfs_key key;
u32 nritems;
int slot;
int ret;
src_file = fget(src_fd);
if (!src_file)
return -EBADF;
src = src_file->f_dentry->d_inode;
ret = -EISDIR;
if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))
goto out_fput;
ret = -EXDEV;
if (src->i_sb != inode->i_sb || BTRFS_I(src)->root != root)
goto out_fput;
ret = -ENOMEM;
buf = vmalloc(btrfs_level_size(root, 0));
if (!buf)
goto out_fput;
path = btrfs_alloc_path();
if (!path) {
vfree(buf);
goto out_fput;
}
path->reada = 2;
if (inode < src) {
mutex_lock(&inode->i_mutex);
mutex_lock(&src->i_mutex);
} else {
mutex_lock(&src->i_mutex);
mutex_lock(&inode->i_mutex);
}
ret = -ENOTEMPTY;
if (inode->i_size)
goto out_unlock;
/* do any pending delalloc/csum calc on src, one way or
another, and lock file content */
while (1) {
struct btrfs_ordered_extent *ordered;
lock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS);
ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);
if (BTRFS_I(src)->delalloc_bytes == 0 && !ordered)
break;
unlock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS);
if (ordered)
btrfs_put_ordered_extent(ordered);
btrfs_wait_ordered_range(src, 0, (u64)-1);
}
trans = btrfs_start_transaction(root, 1);
BUG_ON(!trans);
key.objectid = src->i_ino;
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = 0;
while (1) {
/*
* note the key will change type as we walk through the
* tree.
*/
ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
if (ret < 0)
goto out;
nritems = btrfs_header_nritems(path->nodes[0]);
if (path->slots[0] >= nritems) {
ret = btrfs_next_leaf(root, path);
if (ret < 0)
goto out;
if (ret > 0)
break;
nritems = btrfs_header_nritems(path->nodes[0]);
}
leaf = path->nodes[0];
slot = path->slots[0];
btrfs_item_key_to_cpu(leaf, &key, slot);
if (btrfs_key_type(&key) > BTRFS_CSUM_ITEM_KEY ||
key.objectid != src->i_ino)
break;
if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY ||
btrfs_key_type(&key) == BTRFS_CSUM_ITEM_KEY) {
u32 size;
struct btrfs_key new_key;
size = btrfs_item_size_nr(leaf, slot);
read_extent_buffer(leaf, buf,
btrfs_item_ptr_offset(leaf, slot),
size);
btrfs_release_path(root, path);
memcpy(&new_key, &key, sizeof(new_key));
new_key.objectid = inode->i_ino;
ret = btrfs_insert_empty_item(trans, root, path,
&new_key, size);
if (ret)
goto out;
leaf = path->nodes[0];
slot = path->slots[0];
write_extent_buffer(leaf, buf,
btrfs_item_ptr_offset(leaf, slot),
size);
btrfs_mark_buffer_dirty(leaf);
}
if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
struct btrfs_file_extent_item *extent;
int found_type;
extent = btrfs_item_ptr(leaf, slot,
struct btrfs_file_extent_item);
found_type = btrfs_file_extent_type(leaf, extent);
if (found_type == BTRFS_FILE_EXTENT_REG) {
u64 ds = btrfs_file_extent_disk_bytenr(leaf,
extent);
u64 dl = btrfs_file_extent_disk_num_bytes(leaf,
extent);
/* ds == 0 means there's a hole */
if (ds != 0) {
ret = btrfs_inc_extent_ref(trans, root,
ds, dl, leaf->start,
root->root_key.objectid,
trans->transid,
inode->i_ino);
BUG_ON(ret);
}
}
}
btrfs_release_path(root, path);
key.offset++;
}
ret = 0;
out:
btrfs_release_path(root, path);
if (ret == 0) {
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
inode_set_bytes(inode, inode_get_bytes(src));
btrfs_i_size_write(inode, src->i_size);
BTRFS_I(inode)->flags = BTRFS_I(src)->flags;
ret = btrfs_update_inode(trans, root, inode);
}
btrfs_end_transaction(trans, root);
unlock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS);
if (ret)
vmtruncate(inode, 0);
out_unlock:
mutex_unlock(&src->i_mutex);
mutex_unlock(&inode->i_mutex);
vfree(buf);
btrfs_free_path(path);
out_fput:
fput(src_file);
return ret;
}
/*
* there are many ways the trans_start and trans_end ioctls can lead
* to deadlocks. They should only be used by applications that
* basically own the machine, and have a very in depth understanding
* of all the possible deadlocks and enospc problems.
*/
long btrfs_ioctl_trans_start(struct file *file)
{
struct inode *inode = fdentry(file)->d_inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
int ret = 0;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (file->private_data) {
ret = -EINPROGRESS;
goto out;
}
mutex_lock(&root->fs_info->trans_mutex);
root->fs_info->open_ioctl_trans++;
mutex_unlock(&root->fs_info->trans_mutex);
trans = btrfs_start_ioctl_transaction(root, 0);
if (trans)
file->private_data = trans;
else
ret = -ENOMEM;
/*printk(KERN_INFO "btrfs_ioctl_trans_start on %p\n", file);*/
out:
return ret;
}
/*
* there are many ways the trans_start and trans_end ioctls can lead
* to deadlocks. They should only be used by applications that
* basically own the machine, and have a very in depth understanding
* of all the possible deadlocks and enospc problems.
*/
long btrfs_ioctl_trans_end(struct file *file)
{
struct inode *inode = fdentry(file)->d_inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
int ret = 0;
trans = file->private_data;
if (!trans) {
ret = -EINVAL;
goto out;
}
btrfs_end_transaction(trans, root);
file->private_data = NULL;
mutex_lock(&root->fs_info->trans_mutex);
root->fs_info->open_ioctl_trans--;
mutex_unlock(&root->fs_info->trans_mutex);
out:
return ret;
}
long btrfs_ioctl(struct file *file, unsigned int
cmd, unsigned long arg)
{
struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
switch (cmd) {
case BTRFS_IOC_SNAP_CREATE:
return btrfs_ioctl_snap_create(root, (void __user *)arg);
case BTRFS_IOC_DEFRAG:
return btrfs_ioctl_defrag(file);
case BTRFS_IOC_RESIZE:
return btrfs_ioctl_resize(root, (void __user *)arg);
case BTRFS_IOC_ADD_DEV:
return btrfs_ioctl_add_dev(root, (void __user *)arg);
case BTRFS_IOC_RM_DEV:
return btrfs_ioctl_rm_dev(root, (void __user *)arg);
case BTRFS_IOC_BALANCE:
return btrfs_balance(root->fs_info->dev_root);
case BTRFS_IOC_CLONE:
return btrfs_ioctl_clone(file, arg);
case BTRFS_IOC_TRANS_START:
return btrfs_ioctl_trans_start(file);
case BTRFS_IOC_TRANS_END:
return btrfs_ioctl_trans_end(file);
case BTRFS_IOC_SYNC:
btrfs_start_delalloc_inodes(root);
btrfs_sync_fs(file->f_dentry->d_sb, 1);
return 0;
}
return -ENOTTY;
}