From c969f58ca43fc403c75f5d3da4cf1e21de7afaa0 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 7 Apr 2009 14:13:01 +0100 Subject: [PATCH 01/25] GFS2: Update the rw flags After Jens recent updates: http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=a1f242524c3c1f5d40f1c9c343427e34d1aadd6e et al. this is a patch to bring gfs2 uptodate with the core code. Also I've managed to squash another call to ll_rw_block() along the way. There is still one part of the GFS2 I/O paths which are not correctly annotated and that is due to the sharing of the writeback code between the data and metadata address spaces. I would like to change that too, but this patch is still worth doing on its own, I think. Signed-off-by: Steven Whitehouse --- fs/gfs2/log.c | 6 +++--- fs/gfs2/lops.c | 14 ++++++++------ fs/gfs2/meta_io.c | 38 +++++++++++++++++++++++++++----------- 3 files changed, 38 insertions(+), 20 deletions(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 98918a756410..aa62cf5976e8 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -120,7 +120,7 @@ __acquires(&sdp->sd_log_lock) lock_buffer(bh); if (test_clear_buffer_dirty(bh)) { bh->b_end_io = end_buffer_write_sync; - submit_bh(WRITE, bh); + submit_bh(WRITE_SYNC_PLUG, bh); } else { unlock_buffer(bh); brelse(bh); @@ -604,7 +604,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) goto skip_barrier; get_bh(bh); - submit_bh(WRITE_BARRIER | (1 << BIO_RW_META), bh); + submit_bh(WRITE_SYNC | (1 << BIO_RW_BARRIER) | (1 << BIO_RW_META), bh); wait_on_buffer(bh); if (buffer_eopnotsupp(bh)) { clear_buffer_eopnotsupp(bh); @@ -664,7 +664,7 @@ static void gfs2_ordered_write(struct gfs2_sbd *sdp) lock_buffer(bh); if (buffer_mapped(bh) && test_clear_buffer_dirty(bh)) { bh->b_end_io = end_buffer_write_sync; - submit_bh(WRITE, bh); + submit_bh(WRITE_SYNC_PLUG, bh); } else { unlock_buffer(bh); brelse(bh); diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 80e4f5f898bb..00315f50fa46 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -13,6 +13,8 @@ #include #include #include +#include +#include #include "gfs2.h" #include "incore.h" @@ -189,7 +191,7 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp) } gfs2_log_unlock(sdp); - submit_bh(WRITE, bh); + submit_bh(WRITE_SYNC_PLUG, bh); gfs2_log_lock(sdp); n = 0; @@ -199,7 +201,7 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp) gfs2_log_unlock(sdp); lock_buffer(bd2->bd_bh); bh = gfs2_log_fake_buf(sdp, bd2->bd_bh); - submit_bh(WRITE, bh); + submit_bh(WRITE_SYNC_PLUG, bh); gfs2_log_lock(sdp); if (++n >= num) break; @@ -341,7 +343,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp) sdp->sd_log_num_revoke--; if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) { - submit_bh(WRITE, bh); + submit_bh(WRITE_SYNC_PLUG, bh); bh = gfs2_log_get_buf(sdp); mh = (struct gfs2_meta_header *)bh->b_data; @@ -358,7 +360,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp) } gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); - submit_bh(WRITE, bh); + submit_bh(WRITE_SYNC_PLUG, bh); } static void revoke_lo_before_scan(struct gfs2_jdesc *jd, @@ -560,7 +562,7 @@ static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh, ptr = bh_log_ptr(bh); get_bh(bh); - submit_bh(WRITE, bh); + submit_bh(WRITE_SYNC_PLUG, bh); gfs2_log_lock(sdp); while(!list_empty(list)) { bd = list_entry(list->next, struct gfs2_bufdata, bd_le.le_list); @@ -586,7 +588,7 @@ static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh, } else { bh1 = gfs2_log_fake_buf(sdp, bd->bd_bh); } - submit_bh(WRITE, bh1); + submit_bh(WRITE_SYNC_PLUG, bh1); gfs2_log_lock(sdp); ptr += 2; } diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 8d6f13256b26..75b2aec06f85 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -201,16 +201,32 @@ struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno) int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, struct buffer_head **bhp) { - *bhp = gfs2_getbuf(gl, blkno, CREATE); - if (!buffer_uptodate(*bhp)) { - ll_rw_block(READ_META, 1, bhp); - if (flags & DIO_WAIT) { - int error = gfs2_meta_wait(gl->gl_sbd, *bhp); - if (error) { - brelse(*bhp); - return error; - } - } + struct gfs2_sbd *sdp = gl->gl_sbd; + struct buffer_head *bh; + + if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) + return -EIO; + + *bhp = bh = gfs2_getbuf(gl, blkno, CREATE); + + lock_buffer(bh); + if (buffer_uptodate(bh)) { + unlock_buffer(bh); + return 0; + } + bh->b_end_io = end_buffer_read_sync; + get_bh(bh); + submit_bh(READ_SYNC | (1 << BIO_RW_META), bh); + if (!(flags & DIO_WAIT)) + return 0; + + wait_on_buffer(bh); + if (unlikely(!buffer_uptodate(bh))) { + struct gfs2_trans *tr = current->journal_info; + if (tr && tr->tr_touched) + gfs2_io_error_bh(sdp, bh); + brelse(bh); + return -EIO; } return 0; @@ -404,7 +420,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen) if (buffer_uptodate(first_bh)) goto out; if (!buffer_locked(first_bh)) - ll_rw_block(READ_META, 1, &first_bh); + ll_rw_block(READ_SYNC | (1 << BIO_RW_META), 1, &first_bh); dblock++; extlen--; From 4a0f9a321a113392b448e477018311d14fba2b34 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 20 Apr 2009 08:16:26 +0100 Subject: [PATCH 02/25] GFS2: Optimise writepage for metadata This adds a GFS2 specific writepage for metadata, rather than continuing to use the VFS function. As a result we now tag all our metadata I/O with the correct flag so that blktraces will now be less confusing. Also, the generic function was checking for a number of corner cases which cannot happen on the metadata address spaces so that this should be faster too. Signed-off-by: Steven Whitehouse --- fs/gfs2/meta_io.c | 66 ++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 57 insertions(+), 9 deletions(-) diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 75b2aec06f85..78a5f4312667 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -33,17 +33,65 @@ #include "util.h" #include "ops_address.h" -static int aspace_get_block(struct inode *inode, sector_t lblock, - struct buffer_head *bh_result, int create) +static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wbc) { - gfs2_assert_warn(inode->i_sb->s_fs_info, 0); - return -EOPNOTSUPP; -} + int err; + struct buffer_head *bh, *head; + int nr_underway = 0; + int write_op = (1 << BIO_RW_META) | ((wbc->sync_mode == WB_SYNC_ALL ? + WRITE_SYNC_PLUG : WRITE)); -static int gfs2_aspace_writepage(struct page *page, - struct writeback_control *wbc) -{ - return block_write_full_page(page, aspace_get_block, wbc); + BUG_ON(!PageLocked(page)); + BUG_ON(!page_has_buffers(page)); + + head = page_buffers(page); + bh = head; + + do { + if (!buffer_mapped(bh)) + continue; + /* + * If it's a fully non-blocking write attempt and we cannot + * lock the buffer then redirty the page. Note that this can + * potentially cause a busy-wait loop from pdflush and kswapd + * activity, but those code paths have their own higher-level + * throttling. + */ + if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { + lock_buffer(bh); + } else if (!trylock_buffer(bh)) { + redirty_page_for_writepage(wbc, page); + continue; + } + if (test_clear_buffer_dirty(bh)) { + mark_buffer_async_write(bh); + } else { + unlock_buffer(bh); + } + } while ((bh = bh->b_this_page) != head); + + /* + * The page and its buffers are protected by PageWriteback(), so we can + * drop the bh refcounts early. + */ + BUG_ON(PageWriteback(page)); + set_page_writeback(page); + + do { + struct buffer_head *next = bh->b_this_page; + if (buffer_async_write(bh)) { + submit_bh(write_op, bh); + nr_underway++; + } + bh = next; + } while (bh != head); + unlock_page(page); + + err = 0; + if (nr_underway == 0) + end_page_writeback(page); + + return err; } static const struct address_space_operations aspace_aops = { From 48bf2b1711dc498494e77705c415ee46bb508fd9 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 29 Apr 2009 13:59:35 +0100 Subject: [PATCH 03/25] GFS2: Something nonlinear this way comes! For some reason GFS2 has been missing support for non-linear mappings. This patch fixes that, and also avoids taking any locks for mmap in the O_NOATIME case. In fact we don't actually need to take the lock here at all - just doing file_accessed() would be enough, but we have to take the lock eventually and this helps it hit disk (and thus be seen by other nodes) faster. Signed-off-by: Steven Whitehouse --- fs/gfs2/ops_file.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index 5d82e91887e3..0ee7bd287c5a 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c @@ -425,33 +425,36 @@ static struct vm_operations_struct gfs2_vm_ops = { .page_mkwrite = gfs2_page_mkwrite, }; - /** * gfs2_mmap - * @file: The file to map * @vma: The VMA which described the mapping * - * Returns: 0 or error code + * There is no need to get a lock here unless we should be updating + * atime. We ignore any locking errors since the only consequence is + * a missed atime update (which will just be deferred until later). + * + * Returns: 0 */ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) { struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); - struct gfs2_holder i_gh; - int error; - gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh); - error = gfs2_glock_nq(&i_gh); - if (error) { - gfs2_holder_uninit(&i_gh); - return error; + if (!(file->f_flags & O_NOATIME)) { + struct gfs2_holder i_gh; + int error; + + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); + error = gfs2_glock_nq(&i_gh); + file_accessed(file); + if (error == 0) + gfs2_glock_dq_uninit(&i_gh); } - vma->vm_ops = &gfs2_vm_ops; + vma->vm_flags |= VM_CAN_NONLINEAR; - gfs2_glock_dq_uninit(&i_gh); - - return error; + return 0; } /** From 7537d81aa7b7cd31b0caeac8091456e93d96fa8d Mon Sep 17 00:00:00 2001 From: Abhijith Das Date: Tue, 12 May 2009 11:16:20 -0500 Subject: [PATCH 04/25] GFS2: Fix timestamps on write This patch copies the timestamps from the vfs inode into gfs2 and syncs it to the disk inode during writes. Signed-off-by: Abhijith Das Signed-off-by: Steven Whitehouse --- fs/gfs2/ops_address.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index a6dde1751e17..e5664210f0d8 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c @@ -781,10 +781,12 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh, unlock_page(page); page_cache_release(page); - if (inode->i_size < to) { - i_size_write(inode, to); - ip->i_disksize = inode->i_size; - di->di_size = cpu_to_be64(inode->i_size); + if (copied) { + if (inode->i_size < to) { + i_size_write(inode, to); + ip->i_disksize = inode->i_size; + } + gfs2_dinode_out(ip, di); mark_inode_dirty(inode); } @@ -824,7 +826,6 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, struct gfs2_sbd *sdp = GFS2_SB(inode); struct buffer_head *dibh; struct gfs2_alloc *al = ip->i_alloc; - struct gfs2_dinode *di; unsigned int from = pos & (PAGE_CACHE_SIZE - 1); unsigned int to = from + len; int ret; @@ -847,11 +848,10 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, gfs2_page_add_databufs(ip, page, from, to); ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); - - if (likely(ret >= 0) && (inode->i_size > ip->i_disksize)) { - di = (struct gfs2_dinode *)dibh->b_data; - ip->i_disksize = inode->i_size; - di->di_size = cpu_to_be64(inode->i_size); + if (ret > 0) { + if (inode->i_size > ip->i_disksize) + ip->i_disksize = inode->i_size; + gfs2_dinode_out(ip, dibh->b_data); mark_inode_dirty(inode); } From a1c0643ff9f360a30644f6e3cd643ca2a5083aea Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 13 May 2009 10:56:52 +0100 Subject: [PATCH 05/25] GFS2: Move journal live test at transaction start There seems little point grabbing the transaction glock only to have to release it again if the journal isn't live. This moves the test earlier to avoid grabbing the lock when we don't need it in the first place. Signed-off-by: Steven Whitehouse --- fs/gfs2/trans.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index 053752d4b27f..4ef0e9fa3549 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -33,6 +33,9 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, BUG_ON(current->journal_info); BUG_ON(blocks == 0 && revokes == 0); + if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) + return -EROFS; + tr = kzalloc(sizeof(struct gfs2_trans), GFP_NOFS); if (!tr) return -ENOMEM; @@ -54,12 +57,6 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, if (error) goto fail_holder_uninit; - if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { - tr->tr_t_gh.gh_flags |= GL_NOCACHE; - error = -EROFS; - goto fail_gunlock; - } - error = gfs2_log_reserve(sdp, tr->tr_reserved); if (error) goto fail_gunlock; From 48c2b613616235d7c97fda5982f50100a6c79166 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 13 May 2009 14:49:48 +0100 Subject: [PATCH 06/25] GFS2: Add commit= mount option It has always been possible to adjust the gfs2 log commit interval, but only from the sysfs interface. This adds a mount option, commit=, which will be familar to ext3 users. The sysfs interface continues to be available as well, although this might be removed in the future. Also this patch cleans up some duplicated structures in the GFS2 sysfs code. Signed-off-by: Steven Whitehouse --- fs/gfs2/incore.h | 1 + fs/gfs2/mount.c | 10 +++++ fs/gfs2/ops_fstype.c | 4 +- fs/gfs2/ops_super.c | 13 ++++++- fs/gfs2/sys.c | 92 ++++++++++++++++---------------------------- 5 files changed, 60 insertions(+), 60 deletions(-) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 399d1b978049..65f438e9537a 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -418,6 +418,7 @@ struct gfs2_args { unsigned int ar_data:2; /* ordered/writeback */ unsigned int ar_meta:1; /* mount metafs */ unsigned int ar_discard:1; /* discard requests */ + int ar_commit; /* Commit interval */ }; struct gfs2_tune { diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c index f7e8527a21e0..947af151fa24 100644 --- a/fs/gfs2/mount.c +++ b/fs/gfs2/mount.c @@ -45,6 +45,7 @@ enum { Opt_meta, Opt_discard, Opt_nodiscard, + Opt_commit, Opt_err, }; @@ -73,6 +74,7 @@ static const match_table_t tokens = { {Opt_meta, "meta"}, {Opt_discard, "discard"}, {Opt_nodiscard, "nodiscard"}, + {Opt_commit, "commit=%d"}, {Opt_err, NULL} }; @@ -89,6 +91,7 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options) char *o; int token; substring_t tmp[MAX_OPT_ARGS]; + int rv; /* Split the options into tokens with the "," character and process them */ @@ -173,6 +176,13 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options) case Opt_nodiscard: args->ar_discard = 0; break; + case Opt_commit: + rv = match_int(&tmp[0], &args->ar_commit); + if (rv || args->ar_commit <= 0) { + fs_info(sdp, "commit mount option requires a positive numeric argument\n"); + return rv ? rv : -EINVAL; + } + break; case Opt_err: default: fs_info(sdp, "invalid mount option: %s\n", o); diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 1ff9473ea753..7981fbc9fc3b 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -55,7 +55,6 @@ static void gfs2_tune_init(struct gfs2_tune *gt) spin_lock_init(>->gt_spin); gt->gt_incore_log_blocks = 1024; - gt->gt_log_flush_secs = 60; gt->gt_recoverd_secs = 60; gt->gt_logd_secs = 1; gt->gt_quota_simul_sync = 64; @@ -1165,6 +1164,7 @@ static int fill_super(struct super_block *sb, void *data, int silent) sdp->sd_args.ar_quota = GFS2_QUOTA_DEFAULT; sdp->sd_args.ar_data = GFS2_DATA_DEFAULT; + sdp->sd_args.ar_commit = 60; error = gfs2_mount_args(sdp, &sdp->sd_args, data); if (error) { @@ -1191,6 +1191,8 @@ static int fill_super(struct super_block *sb, void *data, int silent) GFS2_BASIC_BLOCK_SHIFT; sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift; + sdp->sd_tune.gt_log_flush_secs = sdp->sd_args.ar_commit; + error = init_names(sdp, silent); if (error) goto fail; diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c index 458019569dcb..0677a8378560 100644 --- a/fs/gfs2/ops_super.c +++ b/fs/gfs2/ops_super.c @@ -436,8 +436,12 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) { struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_args args = sdp->sd_args; /* Default to current settings */ + struct gfs2_tune *gt = &sdp->sd_tune; int error; + spin_lock(>->gt_spin); + args.ar_commit = gt->gt_log_flush_secs; + spin_unlock(>->gt_spin); error = gfs2_mount_args(sdp, &args, data); if (error) return error; @@ -473,6 +477,10 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) sb->s_flags |= MS_POSIXACL; else sb->s_flags &= ~MS_POSIXACL; + spin_lock(>->gt_spin); + gt->gt_log_flush_secs = args.ar_commit; + spin_unlock(>->gt_spin); + return 0; } @@ -550,6 +558,7 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) { struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info; struct gfs2_args *args = &sdp->sd_args; + int lfsecs; if (is_ancestor(mnt->mnt_root, sdp->sd_master_dir)) seq_printf(s, ",meta"); @@ -610,7 +619,9 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) } if (args->ar_discard) seq_printf(s, ",discard"); - + lfsecs = sdp->sd_tune.gt_log_flush_secs; + if (lfsecs != 60) + seq_printf(s, ",commit=%d", lfsecs); return 0; } diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 7655f5025fec..d53b22edc980 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -26,6 +26,36 @@ #include "util.h" #include "glops.h" +struct gfs2_attr { + struct attribute attr; + ssize_t (*show)(struct gfs2_sbd *, char *); + ssize_t (*store)(struct gfs2_sbd *, const char *, size_t); +}; + +static ssize_t gfs2_attr_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj); + struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr); + return a->show ? a->show(sdp, buf) : 0; +} + +static ssize_t gfs2_attr_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t len) +{ + struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj); + struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr); + return a->store ? a->store(sdp, buf, len) : len; +} + +static struct sysfs_ops gfs2_attr_ops = { + .show = gfs2_attr_show, + .store = gfs2_attr_store, +}; + + +static struct kset *gfs2_kset; + static ssize_t id_show(struct gfs2_sbd *sdp, char *buf) { return snprintf(buf, PAGE_SIZE, "%u:%u\n", @@ -212,11 +242,6 @@ static ssize_t demote_rq_store(struct gfs2_sbd *sdp, const char *buf, size_t len return len; } -struct gfs2_attr { - struct attribute attr; - ssize_t (*show)(struct gfs2_sbd *, char *); - ssize_t (*store)(struct gfs2_sbd *, const char *, size_t); -}; #define GFS2_ATTR(name, mode, show, store) \ static struct gfs2_attr gfs2_attr_##name = __ATTR(name, mode, show, store) @@ -246,49 +271,21 @@ static struct attribute *gfs2_attrs[] = { NULL, }; -static ssize_t gfs2_attr_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj); - struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr); - return a->show ? a->show(sdp, buf) : 0; -} - -static ssize_t gfs2_attr_store(struct kobject *kobj, struct attribute *attr, - const char *buf, size_t len) -{ - struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj); - struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr); - return a->store ? a->store(sdp, buf, len) : len; -} - -static struct sysfs_ops gfs2_attr_ops = { - .show = gfs2_attr_show, - .store = gfs2_attr_store, -}; - static struct kobj_type gfs2_ktype = { .default_attrs = gfs2_attrs, .sysfs_ops = &gfs2_attr_ops, }; -static struct kset *gfs2_kset; - /* * display struct lm_lockstruct fields */ -struct lockstruct_attr { - struct attribute attr; - ssize_t (*show)(struct gfs2_sbd *, char *); -}; - #define LOCKSTRUCT_ATTR(name, fmt) \ static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \ { \ return snprintf(buf, PAGE_SIZE, fmt, sdp->sd_lockstruct.ls_##name); \ } \ -static struct lockstruct_attr lockstruct_attr_##name = __ATTR_RO(name) +static struct gfs2_attr lockstruct_attr_##name = __ATTR_RO(name) LOCKSTRUCT_ATTR(jid, "%u\n"); LOCKSTRUCT_ATTR(first, "%u\n"); @@ -401,14 +398,8 @@ static ssize_t recover_status_show(struct gfs2_sbd *sdp, char *buf) return sprintf(buf, "%d\n", ls->ls_recover_jid_status); } -struct gdlm_attr { - struct attribute attr; - ssize_t (*show)(struct gfs2_sbd *sdp, char *); - ssize_t (*store)(struct gfs2_sbd *sdp, const char *, size_t); -}; - #define GDLM_ATTR(_name,_mode,_show,_store) \ -static struct gdlm_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store) +static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store) GDLM_ATTR(proto_name, 0444, proto_name_show, NULL); GDLM_ATTR(block, 0644, block_show, block_store); @@ -434,21 +425,12 @@ static struct attribute *lock_module_attrs[] = { NULL, }; -/* - * display struct gfs2_args fields - */ - -struct args_attr { - struct attribute attr; - ssize_t (*show)(struct gfs2_sbd *, char *); -}; - #define ARGS_ATTR(name, fmt) \ static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \ { \ return snprintf(buf, PAGE_SIZE, fmt, sdp->sd_args.ar_##name); \ } \ -static struct args_attr args_attr_##name = __ATTR_RO(name) +static struct gfs2_attr args_attr_##name = __ATTR_RO(name) ARGS_ATTR(lockproto, "%s\n"); ARGS_ATTR(locktable, "%s\n"); @@ -531,14 +513,8 @@ static ssize_t tune_set(struct gfs2_sbd *sdp, unsigned int *field, return len; } -struct tune_attr { - struct attribute attr; - ssize_t (*show)(struct gfs2_sbd *, char *); - ssize_t (*store)(struct gfs2_sbd *, const char *, size_t); -}; - #define TUNE_ATTR_3(name, show, store) \ -static struct tune_attr tune_attr_##name = __ATTR(name, 0644, show, store) +static struct gfs2_attr tune_attr_##name = __ATTR(name, 0644, show, store) #define TUNE_ATTR_2(name, store) \ static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \ From 9582d41135c0d362f04ed6bf3dc8d693a7eafee2 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 13 May 2009 15:06:25 +0100 Subject: [PATCH 07/25] GFS2: Remove a couple of unused sysfs entries These two tunables are pointless and would never need to be changed anyway. There is also a race between them and umount as the deamons which they refer to might have gone away. The easiest way to fix the race is to remove the interface. Signed-off-by: Steven Whitehouse --- fs/gfs2/sys.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index d53b22edc980..894bf773ec93 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -530,15 +530,6 @@ static ssize_t name##_store(struct gfs2_sbd *sdp, const char *buf, size_t len)\ } \ TUNE_ATTR_2(name, name##_store) -#define TUNE_ATTR_DAEMON(name, process) \ -static ssize_t name##_store(struct gfs2_sbd *sdp, const char *buf, size_t len)\ -{ \ - ssize_t r = tune_set(sdp, &sdp->sd_tune.gt_##name, 1, buf, len); \ - wake_up_process(sdp->sd_##process); \ - return r; \ -} \ -TUNE_ATTR_2(name, name##_store) - TUNE_ATTR(incore_log_blocks, 0); TUNE_ATTR(log_flush_secs, 0); TUNE_ATTR(quota_warn_period, 0); @@ -550,8 +541,6 @@ TUNE_ATTR(new_files_jdata, 0); TUNE_ATTR(quota_simul_sync, 1); TUNE_ATTR(stall_secs, 1); TUNE_ATTR(statfs_quantum, 1); -TUNE_ATTR_DAEMON(recoverd_secs, recoverd_process); -TUNE_ATTR_DAEMON(logd_secs, logd_process); TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store); static struct attribute *tune_attrs[] = { @@ -565,8 +554,6 @@ static struct attribute *tune_attrs[] = { &tune_attr_quota_simul_sync.attr, &tune_attr_stall_secs.attr, &tune_attr_statfs_quantum.attr, - &tune_attr_recoverd_secs.attr, - &tune_attr_logd_secs.attr, &tune_attr_quota_scale.attr, &tune_attr_new_files_jdata.attr, NULL, From fe64d517df0970a68417184a12fcd4ba0589cc28 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 19 May 2009 10:01:18 +0100 Subject: [PATCH 08/25] GFS2: Umount recovery race fix This patch fixes a race condition where we can receive recovery requests part way through processing a umount. This was causing problems since the recovery thread had already gone away. Looking in more detail at the recovery code, it was really trying to implement a slight variation on a work queue, and that happens to align nicely with the recently introduced slow-work subsystem. As a result I've updated the code to use slow-work, rather than its own home grown variety of work queue. When using the wait_on_bit() function, I noticed that the wait function that was supplied as an argument was appearing in the WCHAN field, so I've updated the function names in order to produce more meaningful output. Signed-off-by: Steven Whitehouse --- fs/gfs2/Kconfig | 1 + fs/gfs2/glock.c | 21 ++++++-- fs/gfs2/incore.h | 14 ++---- fs/gfs2/main.c | 8 +++ fs/gfs2/ops_fstype.c | 20 +++----- fs/gfs2/ops_super.c | 25 +++++++++- fs/gfs2/recovery.c | 114 +++++++++++++++---------------------------- fs/gfs2/recovery.h | 2 +- fs/gfs2/sys.c | 53 ++++++++++---------- 9 files changed, 128 insertions(+), 130 deletions(-) diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig index 3a981b7f64ca..cad957cdb1e5 100644 --- a/fs/gfs2/Kconfig +++ b/fs/gfs2/Kconfig @@ -7,6 +7,7 @@ config GFS2_FS select IP_SCTP if DLM_SCTP select FS_POSIX_ACL select CRC32 + select SLOW_WORK help A cluster filesystem. diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index ff4981090489..2bf62bcc5181 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -796,22 +796,37 @@ void gfs2_holder_uninit(struct gfs2_holder *gh) gh->gh_ip = 0; } -static int just_schedule(void *word) +/** + * gfs2_glock_holder_wait + * @word: unused + * + * This function and gfs2_glock_demote_wait both show up in the WCHAN + * field. Thus I've separated these otherwise identical functions in + * order to be more informative to the user. + */ + +static int gfs2_glock_holder_wait(void *word) { schedule(); return 0; } +static int gfs2_glock_demote_wait(void *word) +{ + schedule(); + return 0; +} + static void wait_on_holder(struct gfs2_holder *gh) { might_sleep(); - wait_on_bit(&gh->gh_iflags, HIF_WAIT, just_schedule, TASK_UNINTERRUPTIBLE); + wait_on_bit(&gh->gh_iflags, HIF_WAIT, gfs2_glock_holder_wait, TASK_UNINTERRUPTIBLE); } static void wait_on_demote(struct gfs2_glock *gl) { might_sleep(); - wait_on_bit(&gl->gl_flags, GLF_DEMOTE, just_schedule, TASK_UNINTERRUPTIBLE); + wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, TASK_UNINTERRUPTIBLE); } /** diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 65f438e9537a..0060e9564bb9 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -12,6 +12,7 @@ #include #include +#include #include #include @@ -376,11 +377,11 @@ struct gfs2_journal_extent { struct gfs2_jdesc { struct list_head jd_list; struct list_head extent_list; - + struct slow_work jd_work; struct inode *jd_inode; + unsigned long jd_flags; +#define JDF_RECOVERY 1 unsigned int jd_jid; - int jd_dirty; - unsigned int jd_blocks; }; @@ -390,9 +391,6 @@ struct gfs2_statfs_change_host { s64 sc_dinodes; }; -#define GFS2_GLOCKD_DEFAULT 1 -#define GFS2_GLOCKD_MAX 16 - #define GFS2_QUOTA_DEFAULT GFS2_QUOTA_OFF #define GFS2_QUOTA_OFF 0 #define GFS2_QUOTA_ACCOUNT 1 @@ -427,7 +425,6 @@ struct gfs2_tune { unsigned int gt_incore_log_blocks; unsigned int gt_log_flush_secs; - unsigned int gt_recoverd_secs; unsigned int gt_logd_secs; unsigned int gt_quota_simul_sync; /* Max quotavals to sync at once */ @@ -448,6 +445,7 @@ enum { SDF_JOURNAL_LIVE = 1, SDF_SHUTDOWN = 2, SDF_NOBARRIERS = 3, + SDF_NORECOVERY = 4, }; #define GFS2_FSNAME_LEN 256 @@ -494,7 +492,6 @@ struct lm_lockstruct { unsigned long ls_flags; dlm_lockspace_t *ls_dlm; - int ls_recover_jid; int ls_recover_jid_done; int ls_recover_jid_status; }; @@ -583,7 +580,6 @@ struct gfs2_sbd { /* Daemon stuff */ - struct task_struct *sd_recoverd_process; struct task_struct *sd_logd_process; struct task_struct *sd_quotad_process; diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index a6892ed0840a..eacd78a5d082 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "gfs2.h" #include "incore.h" @@ -113,12 +114,18 @@ static int __init init_gfs2_fs(void) if (error) goto fail_unregister; + error = slow_work_register_user(); + if (error) + goto fail_slow; + gfs2_register_debugfs(); printk("GFS2 (built %s %s) installed\n", __DATE__, __TIME__); return 0; +fail_slow: + unregister_filesystem(&gfs2meta_fs_type); fail_unregister: unregister_filesystem(&gfs2_fs_type); fail: @@ -156,6 +163,7 @@ static void __exit exit_gfs2_fs(void) gfs2_unregister_debugfs(); unregister_filesystem(&gfs2_fs_type); unregister_filesystem(&gfs2meta_fs_type); + slow_work_unregister_user(); kmem_cache_destroy(gfs2_quotad_cachep); kmem_cache_destroy(gfs2_rgrpd_cachep); diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 7981fbc9fc3b..2cd1164c88d7 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "gfs2.h" #include "incore.h" @@ -55,7 +56,6 @@ static void gfs2_tune_init(struct gfs2_tune *gt) spin_lock_init(>->gt_spin); gt->gt_incore_log_blocks = 1024; - gt->gt_recoverd_secs = 60; gt->gt_logd_secs = 1; gt->gt_quota_simul_sync = 64; gt->gt_quota_warn_period = 10; @@ -675,6 +675,7 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh) break; INIT_LIST_HEAD(&jd->extent_list); + slow_work_init(&jd->jd_work, &gfs2_recover_ops); jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1); if (!jd->jd_inode || IS_ERR(jd->jd_inode)) { if (!jd->jd_inode) @@ -700,14 +701,13 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) { struct inode *master = sdp->sd_master_dir->d_inode; struct gfs2_holder ji_gh; - struct task_struct *p; struct gfs2_inode *ip; int jindex = 1; int error = 0; if (undo) { jindex = 0; - goto fail_recoverd; + goto fail_jinode_gh; } sdp->sd_jindex = gfs2_lookup_simple(master, "jindex"); @@ -800,18 +800,8 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) gfs2_glock_dq_uninit(&ji_gh); jindex = 0; - p = kthread_run(gfs2_recoverd, sdp, "gfs2_recoverd"); - error = IS_ERR(p); - if (error) { - fs_err(sdp, "can't start recoverd thread: %d\n", error); - goto fail_jinode_gh; - } - sdp->sd_recoverd_process = p; - return 0; -fail_recoverd: - kthread_stop(sdp->sd_recoverd_process); fail_jinode_gh: if (!sdp->sd_args.ar_spectator) gfs2_glock_dq_uninit(&sdp->sd_jinode_gh); @@ -1172,8 +1162,10 @@ static int fill_super(struct super_block *sb, void *data, int silent) goto fail; } - if (sdp->sd_args.ar_spectator) + if (sdp->sd_args.ar_spectator) { sb->s_flags |= MS_RDONLY; + set_bit(SDF_NORECOVERY, &sdp->sd_flags); + } if (sdp->sd_args.ar_posix_acl) sb->s_flags |= MS_POSIXACL; diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c index 0677a8378560..a3c2272e7cad 100644 --- a/fs/gfs2/ops_super.c +++ b/fs/gfs2/ops_super.c @@ -121,6 +121,12 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp) return error; } +static int gfs2_umount_recovery_wait(void *word) +{ + schedule(); + return 0; +} + /** * gfs2_put_super - Unmount the filesystem * @sb: The VFS superblock @@ -131,6 +137,7 @@ static void gfs2_put_super(struct super_block *sb) { struct gfs2_sbd *sdp = sb->s_fs_info; int error; + struct gfs2_jdesc *jd; /* Unfreeze the filesystem, if we need to */ @@ -139,9 +146,25 @@ static void gfs2_put_super(struct super_block *sb) gfs2_glock_dq_uninit(&sdp->sd_freeze_gh); mutex_unlock(&sdp->sd_freeze_lock); + /* No more recovery requests */ + set_bit(SDF_NORECOVERY, &sdp->sd_flags); + smp_mb(); + + /* Wait on outstanding recovery */ +restart: + spin_lock(&sdp->sd_jindex_spin); + list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { + if (!test_bit(JDF_RECOVERY, &jd->jd_flags)) + continue; + spin_unlock(&sdp->sd_jindex_spin); + wait_on_bit(&jd->jd_flags, JDF_RECOVERY, + gfs2_umount_recovery_wait, TASK_UNINTERRUPTIBLE); + goto restart; + } + spin_unlock(&sdp->sd_jindex_spin); + kthread_stop(sdp->sd_quotad_process); kthread_stop(sdp->sd_logd_process); - kthread_stop(sdp->sd_recoverd_process); if (!(sb->s_flags & MS_RDONLY)) { error = gfs2_make_fs_ro(sdp); diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 247e8f7d6b3d..59d2695509d3 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -13,8 +13,7 @@ #include #include #include -#include -#include +#include #include "gfs2.h" #include "incore.h" @@ -441,18 +440,25 @@ static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid, kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp); } -/** - * gfs2_recover_journal - recover a given journal - * @jd: the struct gfs2_jdesc describing the journal - * - * Acquire the journal's lock, check to see if the journal is clean, and - * do recovery if necessary. - * - * Returns: errno - */ - -int gfs2_recover_journal(struct gfs2_jdesc *jd) +static int gfs2_recover_get_ref(struct slow_work *work) { + struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work); + if (test_and_set_bit(JDF_RECOVERY, &jd->jd_flags)) + return -EBUSY; + return 0; +} + +static void gfs2_recover_put_ref(struct slow_work *work) +{ + struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work); + clear_bit(JDF_RECOVERY, &jd->jd_flags); + smp_mb__after_clear_bit(); + wake_up_bit(&jd->jd_flags, JDF_RECOVERY); +} + +static void gfs2_recover_work(struct slow_work *work) +{ + struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work); struct gfs2_inode *ip = GFS2_I(jd->jd_inode); struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); struct gfs2_log_header_host head; @@ -569,7 +575,7 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd) gfs2_glock_dq_uninit(&j_gh); fs_info(sdp, "jid=%u: Done\n", jd->jd_jid); - return 0; + return; fail_gunlock_tr: gfs2_glock_dq_uninit(&t_gh); @@ -584,70 +590,28 @@ fail_gunlock_j: fail: gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP); - return error; } -static struct gfs2_jdesc *gfs2_jdesc_find_dirty(struct gfs2_sbd *sdp) +struct slow_work_ops gfs2_recover_ops = { + .get_ref = gfs2_recover_get_ref, + .put_ref = gfs2_recover_put_ref, + .execute = gfs2_recover_work, +}; + + +static int gfs2_recovery_wait(void *word) { - struct gfs2_jdesc *jd; - int found = 0; - - spin_lock(&sdp->sd_jindex_spin); - - list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { - if (jd->jd_dirty) { - jd->jd_dirty = 0; - found = 1; - break; - } - } - spin_unlock(&sdp->sd_jindex_spin); - - if (!found) - jd = NULL; - - return jd; -} - -/** - * gfs2_check_journals - Recover any dirty journals - * @sdp: the filesystem - * - */ - -static void gfs2_check_journals(struct gfs2_sbd *sdp) -{ - struct gfs2_jdesc *jd; - - for (;;) { - jd = gfs2_jdesc_find_dirty(sdp); - if (!jd) - break; - - if (jd != sdp->sd_jdesc) - gfs2_recover_journal(jd); - } -} - -/** - * gfs2_recoverd - Recover dead machine's journals - * @sdp: Pointer to GFS2 superblock - * - */ - -int gfs2_recoverd(void *data) -{ - struct gfs2_sbd *sdp = data; - unsigned long t; - - while (!kthread_should_stop()) { - gfs2_check_journals(sdp); - t = gfs2_tune_get(sdp, gt_recoverd_secs) * HZ; - if (freezing(current)) - refrigerator(); - schedule_timeout_interruptible(t); - } - + schedule(); + return 0; +} + +int gfs2_recover_journal(struct gfs2_jdesc *jd) +{ + int rv; + rv = slow_work_enqueue(&jd->jd_work); + if (rv) + return rv; + wait_on_bit(&jd->jd_flags, JDF_RECOVERY, gfs2_recovery_wait, TASK_UNINTERRUPTIBLE); return 0; } diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h index a8218ea15b57..1616ac22569a 100644 --- a/fs/gfs2/recovery.h +++ b/fs/gfs2/recovery.h @@ -28,7 +28,7 @@ extern void gfs2_revoke_clean(struct gfs2_sbd *sdp); extern int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head); extern int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd); -extern int gfs2_recoverd(void *data); +extern struct slow_work_ops gfs2_recover_ops; #endif /* __RECOVERY_DOT_H__ */ diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 894bf773ec93..9f6d48b75fd2 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -356,34 +356,33 @@ static ssize_t first_done_show(struct gfs2_sbd *sdp, char *buf) return sprintf(buf, "%d\n", ls->ls_first_done); } -static ssize_t recover_show(struct gfs2_sbd *sdp, char *buf) -{ - struct lm_lockstruct *ls = &sdp->sd_lockstruct; - return sprintf(buf, "%d\n", ls->ls_recover_jid); -} - -static void gfs2_jdesc_make_dirty(struct gfs2_sbd *sdp, unsigned int jid) +static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len) { + unsigned jid; struct gfs2_jdesc *jd; + int rv; + rv = sscanf(buf, "%u", &jid); + if (rv != 1) + return -EINVAL; + + rv = -ESHUTDOWN; spin_lock(&sdp->sd_jindex_spin); + if (test_bit(SDF_NORECOVERY, &sdp->sd_flags)) + goto out; + rv = -EBUSY; + if (sdp->sd_jdesc->jd_jid == jid) + goto out; + rv = -ENOENT; list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { if (jd->jd_jid != jid) continue; - jd->jd_dirty = 1; + rv = slow_work_enqueue(&jd->jd_work); break; } +out: spin_unlock(&sdp->sd_jindex_spin); -} - -static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len) -{ - struct lm_lockstruct *ls = &sdp->sd_lockstruct; - ls->ls_recover_jid = simple_strtol(buf, NULL, 0); - gfs2_jdesc_make_dirty(sdp, ls->ls_recover_jid); - if (sdp->sd_recoverd_process) - wake_up_process(sdp->sd_recoverd_process); - return len; + return rv ? rv : len; } static ssize_t recover_done_show(struct gfs2_sbd *sdp, char *buf) @@ -401,15 +400,15 @@ static ssize_t recover_status_show(struct gfs2_sbd *sdp, char *buf) #define GDLM_ATTR(_name,_mode,_show,_store) \ static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store) -GDLM_ATTR(proto_name, 0444, proto_name_show, NULL); -GDLM_ATTR(block, 0644, block_show, block_store); -GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store); -GDLM_ATTR(id, 0444, lkid_show, NULL); -GDLM_ATTR(first, 0444, lkfirst_show, NULL); -GDLM_ATTR(first_done, 0444, first_done_show, NULL); -GDLM_ATTR(recover, 0644, recover_show, recover_store); -GDLM_ATTR(recover_done, 0444, recover_done_show, NULL); -GDLM_ATTR(recover_status, 0444, recover_status_show, NULL); +GDLM_ATTR(proto_name, 0444, proto_name_show, NULL); +GDLM_ATTR(block, 0644, block_show, block_store); +GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store); +GDLM_ATTR(id, 0444, lkid_show, NULL); +GDLM_ATTR(first, 0444, lkfirst_show, NULL); +GDLM_ATTR(first_done, 0444, first_done_show, NULL); +GDLM_ATTR(recover, 0200, NULL, recover_store); +GDLM_ATTR(recover_done, 0444, recover_done_show, NULL); +GDLM_ATTR(recover_status, 0444, recover_status_show, NULL); static struct attribute *lock_module_attrs[] = { &gdlm_attr_proto_name.attr, From e9ccb73ab57ada469602506496c42e5b4468ac3e Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 19 May 2009 10:23:23 +0100 Subject: [PATCH 09/25] GFS2: Update docs Update a few things which were out of date, and fix a typo. Signed-off-by: Steven Whitehouse --- Documentation/filesystems/gfs2-glocks.txt | 2 +- Documentation/filesystems/gfs2.txt | 19 +++++++++++-------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/Documentation/filesystems/gfs2-glocks.txt b/Documentation/filesystems/gfs2-glocks.txt index 4dae9a3840bf..0494f78d87e4 100644 --- a/Documentation/filesystems/gfs2-glocks.txt +++ b/Documentation/filesystems/gfs2-glocks.txt @@ -60,7 +60,7 @@ go_lock | Called for the first local holder of a lock go_unlock | Called on the final local unlock of a lock go_dump | Called to print content of object for debugfs file, or on | error to dump glock to the log. -go_type; | The type of the glock, LM_TYPE_..... +go_type | The type of the glock, LM_TYPE_..... go_min_hold_time | The minimum hold time The minimum hold time for each lock is the time after a remote lock diff --git a/Documentation/filesystems/gfs2.txt b/Documentation/filesystems/gfs2.txt index 593004b6bbab..5e3ab8f3beff 100644 --- a/Documentation/filesystems/gfs2.txt +++ b/Documentation/filesystems/gfs2.txt @@ -11,18 +11,15 @@ their I/O so file system consistency is maintained. One of the nifty features of GFS is perfect consistency -- changes made to the file system on one machine show up immediately on all other machines in the cluster. -GFS uses interchangable inter-node locking mechanisms. Different lock -modules can plug into GFS and each file system selects the appropriate -lock module at mount time. Lock modules include: +GFS uses interchangable inter-node locking mechanisms, the currently +supported mechanisms are: lock_nolock -- allows gfs to be used as a local file system lock_dlm -- uses a distributed lock manager (dlm) for inter-node locking The dlm is found at linux/fs/dlm/ -In addition to interfacing with an external locking manager, a gfs lock -module is responsible for interacting with external cluster management -systems. Lock_dlm depends on user space cluster management systems found +Lock_dlm depends on user space cluster management systems found at the URL above. To use gfs as a local file system, no external clustering systems are @@ -31,13 +28,19 @@ needed, simply: $ mkfs -t gfs2 -p lock_nolock -j 1 /dev/block_device $ mount -t gfs2 /dev/block_device /dir -GFS2 is not on-disk compatible with previous versions of GFS. +If you are using Fedora, you need to install the gfs2-utils package +and, for lock_dlm, you will also need to install the cman package +and write a cluster.conf as per the documentation. + +GFS2 is not on-disk compatible with previous versions of GFS, but it +is pretty close. The following man pages can be found at the URL above: - gfs2_fsck to repair a filesystem + fsck.gfs2 to repair a filesystem gfs2_grow to expand a filesystem online gfs2_jadd to add journals to a filesystem online gfs2_tool to manipulate, examine and tune a filesystem gfs2_quota to examine and change quota values in a filesystem + gfs2_convert to convert a gfs filesystem to gfs2 in-place mount.gfs2 to help mount(8) mount a filesystem mkfs.gfs2 to make a filesystem From ef9e8b14a5c1d0afbaf12b4c3b271188ddfc52a4 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 19 May 2009 14:25:16 +0100 Subject: [PATCH 10/25] GFS2: Don't warn when delete inode fails on ro filesystem If the filesystem is read-only, then we expect that delete inode will fail, so there is no need to warn about it. Signed-off-by: Steven Whitehouse --- fs/gfs2/ops_super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c index a3c2272e7cad..2fd1dcbcc5b7 100644 --- a/fs/gfs2/ops_super.c +++ b/fs/gfs2/ops_super.c @@ -714,7 +714,7 @@ out_unlock: gfs2_glock_dq(&ip->i_iopen_gh); gfs2_holder_uninit(&ip->i_iopen_gh); gfs2_glock_dq_uninit(&gh); - if (error && error != GLR_TRYFAILED) + if (error && error != GLR_TRYFAILED && error != -EROFS) fs_warn(sdp, "gfs2_delete_inode: %d\n", error); out: truncate_inode_pages(&inode->i_data, 0); From 09010978345e8883003bf411bb99753710eb5a3a Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 20 May 2009 10:48:47 +0100 Subject: [PATCH 11/25] GFS2: Improve resource group error handling This patch improves the error handling in the case where we discover that the summary information in the resource group doesn't match the bitmap information while in the process of allocating blocks. Originally this resulted in a kernel bug, but this patch changes that so that we return -EIO and print some messages explaining what went wrong, and how to fix it. We also remember locally not to try and allocate from the same rgrp again, so that a subsequent allocation in a different rgrp should succeed. Signed-off-by: Steven Whitehouse --- fs/gfs2/bmap.c | 9 ++++++-- fs/gfs2/dir.c | 11 +++++++-- fs/gfs2/eattr.c | 14 ++++++++--- fs/gfs2/glops.c | 20 +--------------- fs/gfs2/incore.h | 7 +++--- fs/gfs2/rgrp.c | 60 +++++++++++++++++++++++++++++++++++------------- fs/gfs2/rgrp.h | 47 ++++++++++++++++++------------------- 7 files changed, 100 insertions(+), 68 deletions(-) diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 3a5d3f883e10..253e1a39f841 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -136,7 +136,9 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page) and write it out to disk */ unsigned int n = 1; - block = gfs2_alloc_block(ip, &n); + error = gfs2_alloc_block(ip, &block, &n); + if (error) + goto out_brelse; if (isdir) { gfs2_trans_add_unrevoke(GFS2_SB(&ip->i_inode), block, 1); error = gfs2_dir_get_new_buffer(ip, block, &bh); @@ -476,8 +478,11 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock, blks = dblks + iblks; i = sheight; do { + int error; n = blks - alloced; - bn = gfs2_alloc_block(ip, &n); + error = gfs2_alloc_block(ip, &bn, &n); + if (error) + return error; alloced += n; if (state != ALLOC_DATA || gfs2_is_jdata(ip)) gfs2_trans_add_unrevoke(sdp, bn, n); diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index aef4d0c06748..297d7e5cebad 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -803,13 +803,20 @@ static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh, { struct gfs2_inode *ip = GFS2_I(inode); unsigned int n = 1; - u64 bn = gfs2_alloc_block(ip, &n); - struct buffer_head *bh = gfs2_meta_new(ip->i_gl, bn); + u64 bn; + int error; + struct buffer_head *bh; struct gfs2_leaf *leaf; struct gfs2_dirent *dent; struct qstr name = { .name = "", .len = 0, .hash = 0 }; + + error = gfs2_alloc_block(ip, &bn, &n); + if (error) + return NULL; + bh = gfs2_meta_new(ip->i_gl, bn); if (!bh) return NULL; + gfs2_trans_add_unrevoke(GFS2_SB(inode), bn, 1); gfs2_trans_add_bh(ip->i_gl, bh, 1); gfs2_metatype_set(bh, GFS2_METATYPE_LF, GFS2_FORMAT_LF); diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c index 899763aed217..07ea9529adda 100644 --- a/fs/gfs2/eattr.c +++ b/fs/gfs2/eattr.c @@ -582,8 +582,11 @@ static int ea_alloc_blk(struct gfs2_inode *ip, struct buffer_head **bhp) struct gfs2_ea_header *ea; unsigned int n = 1; u64 block; + int error; - block = gfs2_alloc_block(ip, &n); + error = gfs2_alloc_block(ip, &block, &n); + if (error) + return error; gfs2_trans_add_unrevoke(sdp, block, 1); *bhp = gfs2_meta_new(ip->i_gl, block); gfs2_trans_add_bh(ip->i_gl, *bhp, 1); @@ -617,6 +620,7 @@ static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea, struct gfs2_ea_request *er) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + int error; ea->ea_data_len = cpu_to_be32(er->er_data_len); ea->ea_name_len = er->er_name_len; @@ -642,7 +646,9 @@ static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea, int mh_size = sizeof(struct gfs2_meta_header); unsigned int n = 1; - block = gfs2_alloc_block(ip, &n); + error = gfs2_alloc_block(ip, &block, &n); + if (error) + return error; gfs2_trans_add_unrevoke(sdp, block, 1); bh = gfs2_meta_new(ip->i_gl, block); gfs2_trans_add_bh(ip->i_gl, bh, 1); @@ -963,7 +969,9 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er, } else { u64 blk; unsigned int n = 1; - blk = gfs2_alloc_block(ip, &n); + error = gfs2_alloc_block(ip, &blk, &n); + if (error) + return error; gfs2_trans_add_unrevoke(sdp, blk, 1); indbh = gfs2_meta_new(ip->i_gl, blk); gfs2_trans_add_bh(ip->i_gl, indbh, 1); diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 70f87f43afa2..d5e4ab155ca0 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -309,24 +309,6 @@ static void rgrp_go_unlock(struct gfs2_holder *gh) gfs2_rgrp_bh_put(gh->gh_gl->gl_object); } -/** - * rgrp_go_dump - print out an rgrp - * @seq: The iterator - * @gl: The glock in question - * - */ - -static int rgrp_go_dump(struct seq_file *seq, const struct gfs2_glock *gl) -{ - const struct gfs2_rgrpd *rgd = gl->gl_object; - if (rgd == NULL) - return 0; - gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u\n", - (unsigned long long)rgd->rd_addr, rgd->rd_flags, - rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes); - return 0; -} - /** * trans_go_sync - promote/demote the transaction glock * @gl: the glock @@ -410,7 +392,7 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = { .go_demote_ok = rgrp_go_demote_ok, .go_lock = rgrp_go_lock, .go_unlock = rgrp_go_unlock, - .go_dump = rgrp_go_dump, + .go_dump = gfs2_rgrp_dump, .go_type = LM_TYPE_RGRP, .go_min_hold_time = HZ / 5, }; diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 0060e9564bb9..de50d86fec12 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -92,9 +92,10 @@ struct gfs2_rgrpd { unsigned int rd_bh_count; u32 rd_last_alloc; unsigned char rd_flags; -#define GFS2_RDF_CHECK 0x01 /* Need to check for unlinked inodes */ -#define GFS2_RDF_NOALLOC 0x02 /* rg prohibits allocation */ -#define GFS2_RDF_UPTODATE 0x04 /* rg is up to date */ +#define GFS2_RDF_CHECK 0x10000000 /* check for unlinked inodes */ +#define GFS2_RDF_UPTODATE 0x20000000 /* rg is up to date */ +#define GFS2_RDF_ERROR 0x40000000 /* error in rg */ +#define GFS2_RDF_MASK 0xf0000000 /* mask for internal flags */ }; enum gfs2_state_bits { diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 565038243fa2..fbacf09ee34e 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -701,10 +701,7 @@ static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf) u32 rg_flags; rg_flags = be32_to_cpu(str->rg_flags); - if (rg_flags & GFS2_RGF_NOALLOC) - rgd->rd_flags |= GFS2_RDF_NOALLOC; - else - rgd->rd_flags &= ~GFS2_RDF_NOALLOC; + rg_flags &= ~GFS2_RDF_MASK; rgd->rd_free = be32_to_cpu(str->rg_free); rgd->rd_dinodes = be32_to_cpu(str->rg_dinodes); rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration); @@ -713,11 +710,8 @@ static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf) static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf) { struct gfs2_rgrp *str = buf; - u32 rg_flags = 0; - if (rgd->rd_flags & GFS2_RDF_NOALLOC) - rg_flags |= GFS2_RGF_NOALLOC; - str->rg_flags = cpu_to_be32(rg_flags); + str->rg_flags = cpu_to_be32(rgd->rd_flags & ~GFS2_RDF_MASK); str->rg_free = cpu_to_be32(rgd->rd_free); str->rg_dinodes = cpu_to_be32(rgd->rd_dinodes); str->__pad = cpu_to_be32(0); @@ -942,7 +936,7 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al) struct gfs2_sbd *sdp = rgd->rd_sbd; int ret = 0; - if (rgd->rd_flags & GFS2_RDF_NOALLOC) + if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) return 0; spin_lock(&sdp->sd_rindex_spin); @@ -1435,13 +1429,33 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, } /** - * gfs2_alloc_block - Allocate a block - * @ip: the inode to allocate the block for + * gfs2_rgrp_dump - print out an rgrp + * @seq: The iterator + * @gl: The glock in question * - * Returns: the allocated block */ -u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n) +int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl) +{ + const struct gfs2_rgrpd *rgd = gl->gl_object; + if (rgd == NULL) + return 0; + gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u\n", + (unsigned long long)rgd->rd_addr, rgd->rd_flags, + rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes); + return 0; +} + +/** + * gfs2_alloc_block - Allocate one or more blocks + * @ip: the inode to allocate the block for + * @bn: Used to return the starting block number + * @n: requested number of blocks/extent length (value/result) + * + * Returns: 0 or error + */ + +int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct buffer_head *dibh; @@ -1457,7 +1471,10 @@ u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n) goal = rgd->rd_last_alloc; blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED, n); - BUG_ON(blk == BFITNOENT); + + /* Since all blocks are reserved in advance, this shouldn't happen */ + if (blk == BFITNOENT) + goto rgrp_error; rgd->rd_last_alloc = blk; block = rgd->rd_data0 + blk; @@ -1469,7 +1486,9 @@ u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n) di->di_goal_meta = di->di_goal_data = cpu_to_be64(ip->i_goal); brelse(dibh); } - gfs2_assert_withdraw(sdp, rgd->rd_free >= *n); + if (rgd->rd_free < *n) + goto rgrp_error; + rgd->rd_free -= *n; gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); @@ -1484,7 +1503,16 @@ u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n) rgd->rd_free_clone -= *n; spin_unlock(&sdp->sd_rindex_spin); - return block; + *bn = block; + return 0; + +rgrp_error: + fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n", + (unsigned long long)rgd->rd_addr); + fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n"); + gfs2_rgrp_dump(NULL, rgd->rd_gl); + rgd->rd_flags |= GFS2_RDF_ERROR; + return -EIO; } /** diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index 3181c7e624bf..1e76ff0f3e00 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -14,22 +14,22 @@ struct gfs2_rgrpd; struct gfs2_sbd; struct gfs2_holder; -void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd); +extern void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd); struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk); struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp); struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd); -void gfs2_clear_rgrpd(struct gfs2_sbd *sdp); -int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh); +extern void gfs2_clear_rgrpd(struct gfs2_sbd *sdp); +extern int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh); -int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd); -void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd); -void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd); +extern int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd); +extern void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd); +extern void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd); -void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd); +extern void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd); -struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); +extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); static inline void gfs2_alloc_put(struct gfs2_inode *ip) { BUG_ON(ip->i_alloc == NULL); @@ -37,22 +37,22 @@ static inline void gfs2_alloc_put(struct gfs2_inode *ip) ip->i_alloc = NULL; } -int gfs2_inplace_reserve_i(struct gfs2_inode *ip, - char *file, unsigned int line); +extern int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, + unsigned int line); #define gfs2_inplace_reserve(ip) \ gfs2_inplace_reserve_i((ip), __FILE__, __LINE__) -void gfs2_inplace_release(struct gfs2_inode *ip); +extern void gfs2_inplace_release(struct gfs2_inode *ip); -unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block); +extern unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block); -u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n); -u64 gfs2_alloc_di(struct gfs2_inode *ip, u64 *generation); +extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n); +extern u64 gfs2_alloc_di(struct gfs2_inode *ip, u64 *generation); -void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen); -void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); -void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip); -void gfs2_unlink_di(struct inode *inode); +extern void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen); +extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); +extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip); +extern void gfs2_unlink_di(struct inode *inode); struct gfs2_rgrp_list { unsigned int rl_rgrps; @@ -61,10 +61,11 @@ struct gfs2_rgrp_list { struct gfs2_holder *rl_ghs; }; -void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist, - u64 block); -void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state); -void gfs2_rlist_free(struct gfs2_rgrp_list *rlist); -u64 gfs2_ri_total(struct gfs2_sbd *sdp); +extern void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist, + u64 block); +extern void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state); +extern void gfs2_rlist_free(struct gfs2_rgrp_list *rlist); +extern u64 gfs2_ri_total(struct gfs2_sbd *sdp); +extern int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl); #endif /* __RGRP_DOT_H__ */ From 60a0b8f93664621a07b93273fc8ebc29590c62f5 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 21 May 2009 12:23:12 +0100 Subject: [PATCH 12/25] GFS2: Add a rgrp bitmap full flag During block allocation, it is useful to know if sections of disk are full on a finer grained basis than a single resource group. This can make a performance difference when resource groups have larger numbers of bitmap blocks, since we no longer have to search them all block by block in each individual bitmap. The full flag is set on a per-bitmap basis when it has been searched and found to have no free space. It is then skipped in subsequent searches until the flag is reset. The resetting occurs if we have to drop the glock on the resource group for any reason, or if we deallocate some blocks within that resource group and thus free up some space. Signed-off-by: Steven Whitehouse --- fs/gfs2/incore.h | 3 ++ fs/gfs2/rgrp.c | 79 +++++++++++++++++++++++++++++------------------- 2 files changed, 51 insertions(+), 31 deletions(-) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index de50d86fec12..dd87379b61e6 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -64,9 +64,12 @@ struct gfs2_log_element { const struct gfs2_log_operations *le_ops; }; +#define GBF_FULL 1 + struct gfs2_bitmap { struct buffer_head *bi_bh; char *bi_clone; + unsigned long bi_flags; u32 bi_offset; u32 bi_start; u32 bi_len; diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index fbacf09ee34e..23637b9d1c73 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -442,6 +442,7 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd) for (x = 0; x < length; x++) { bi = rgd->rd_bits + x; + bi->bi_flags = 0; /* small rgrp; bitmap stored completely in header block */ if (length == 1) { bytes = bytes_left; @@ -769,6 +770,8 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) } if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) { + for (x = 0; x < length; x++) + clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags); gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data); rgd->rd_flags |= GFS2_RDF_UPTODATE; } @@ -897,6 +900,7 @@ void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd) continue; if (sdp->sd_args.ar_discard) gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bi); + clear_bit(GBF_FULL, &bi->bi_flags); memcpy(bi->bi_clone + bi->bi_offset, bi->bi_bh->b_data + bi->bi_offset, bi->bi_len); } @@ -1309,30 +1313,37 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, { struct gfs2_bitmap *bi = NULL; const u32 length = rgd->rd_length; - u32 blk = 0; + u32 blk = BFITNOENT; unsigned int buf, x; const unsigned int elen = *n; - const u8 *buffer; + const u8 *buffer = NULL; *n = 0; /* Find bitmap block that contains bits for goal block */ for (buf = 0; buf < length; buf++) { bi = rgd->rd_bits + buf; - if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) - break; + /* Convert scope of "goal" from rgrp-wide to within found bit block */ + if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) { + goal -= bi->bi_start * GFS2_NBBY; + goto do_search; + } } + buf = 0; + goal = 0; - gfs2_assert(rgd->rd_sbd, buf < length); - - /* Convert scope of "goal" from rgrp-wide to within found bit block */ - goal -= bi->bi_start * GFS2_NBBY; - +do_search: /* Search (up to entire) bitmap in this rgrp for allocatable block. "x <= length", instead of "x < length", because we typically start the search in the middle of a bit block, but if we can't find an allocatable block anywhere else, we want to be able wrap around and search in the first part of our first-searched bit block. */ for (x = 0; x <= length; x++) { + bi = rgd->rd_bits + buf; + + if (test_bit(GBF_FULL, &bi->bi_flags) && + (old_state == GFS2_BLKST_FREE)) + goto skip; + /* The GFS2_BLKST_UNLINKED state doesn't apply to the clone bitmaps, so we must search the originals for that. */ buffer = bi->bi_bh->b_data + bi->bi_offset; @@ -1343,33 +1354,39 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, if (blk != BFITNOENT) break; + if ((goal == 0) && (old_state == GFS2_BLKST_FREE)) + set_bit(GBF_FULL, &bi->bi_flags); + /* Try next bitmap block (wrap back to rgrp header if at end) */ - buf = (buf + 1) % length; - bi = rgd->rd_bits + buf; +skip: + buf++; + buf %= length; goal = 0; } - if (blk != BFITNOENT && old_state != new_state) { - *n = 1; - gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); - gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset, - bi->bi_len, blk, new_state); - goal = blk; - while (*n < elen) { - goal++; - if (goal >= (bi->bi_len * GFS2_NBBY)) - break; - if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) != - GFS2_BLKST_FREE) - break; - gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, - bi->bi_offset, bi->bi_len, goal, - new_state); - (*n)++; - } - } + if (blk == BFITNOENT) + return blk; + *n = 1; + if (old_state == new_state) + goto out; - return (blk == BFITNOENT) ? blk : (bi->bi_start * GFS2_NBBY) + blk; + gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); + gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset, + bi->bi_len, blk, new_state); + goal = blk; + while (*n < elen) { + goal++; + if (goal >= (bi->bi_len * GFS2_NBBY)) + break; + if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) != + GFS2_BLKST_FREE) + break; + gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset, + bi->bi_len, goal, new_state); + (*n)++; + } +out: + return (bi->bi_start * GFS2_NBBY) + blk; } /** From 1ce97e564b628bee30b8dbb64e5e653a484308f6 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 21 May 2009 15:18:19 +0100 Subject: [PATCH 13/25] GFS2: Be more aggressive in reclaiming unlinked inodes This patch increases the frequency with which gfs2 looks for unlinked, but still allocated inodes. Its the equivalent operation to ext3's orphan list, but done with bitmaps in the resource groups. This also fixes a bug where a field in the rgrp was too small. Signed-off-by: Steven Whitehouse --- fs/gfs2/incore.h | 2 +- fs/gfs2/rgrp.c | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index dd87379b61e6..225347fbff3c 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -94,7 +94,7 @@ struct gfs2_rgrpd { struct gfs2_sbd *rd_sbd; unsigned int rd_bh_count; u32 rd_last_alloc; - unsigned char rd_flags; + u32 rd_flags; #define GFS2_RDF_CHECK 0x10000000 /* check for unlinked inodes */ #define GFS2_RDF_UPTODATE 0x20000000 /* rg is up to date */ #define GFS2_RDF_ERROR 0x40000000 /* error in rg */ diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 23637b9d1c73..ee3d5c1876a3 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -581,7 +581,6 @@ static int read_rindex_entry(struct gfs2_inode *ip, rgd->rd_gl->gl_object = rgd; rgd->rd_flags &= ~GFS2_RDF_UPTODATE; - rgd->rd_flags |= GFS2_RDF_CHECK; return error; } @@ -703,6 +702,8 @@ static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf) rg_flags = be32_to_cpu(str->rg_flags); rg_flags &= ~GFS2_RDF_MASK; + rgd->rd_flags &= GFS2_RDF_MASK; + rgd->rd_flags |= rg_flags; rgd->rd_free = be32_to_cpu(str->rg_free); rgd->rd_dinodes = be32_to_cpu(str->rg_dinodes); rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration); @@ -773,7 +774,7 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) for (x = 0; x < length; x++) clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags); gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data); - rgd->rd_flags |= GFS2_RDF_UPTODATE; + rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); } spin_lock(&sdp->sd_rindex_spin); From b1e71b0622974953e46a284aa986504a90869a9b Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 22 May 2009 10:01:55 +0100 Subject: [PATCH 14/25] GFS2: Clean up some file names This patch renames the ops_*.c files which have no counterpart without the ops_ prefix in order to shorten the name and make it more readable. In addition, ops_address.h (which was very small) is moved into inode.h and inode.h is cleaned up by adding extern where required. Signed-off-by: Steven Whitehouse --- fs/gfs2/Makefile | 2 +- fs/gfs2/{ops_address.c => aops.c} | 1 - fs/gfs2/bmap.c | 1 - fs/gfs2/{ops_dentry.c => dentry.c} | 0 fs/gfs2/{ops_export.c => export.c} | 0 fs/gfs2/{ops_file.c => file.c} | 1 - fs/gfs2/inode.c | 1 - fs/gfs2/inode.h | 53 +++++++++++++++++------------- fs/gfs2/meta_io.c | 1 - fs/gfs2/ops_address.h | 23 ------------- fs/gfs2/quota.c | 1 - fs/gfs2/rgrp.c | 1 - 12 files changed, 32 insertions(+), 53 deletions(-) rename fs/gfs2/{ops_address.c => aops.c} (99%) rename fs/gfs2/{ops_dentry.c => dentry.c} (100%) rename fs/gfs2/{ops_export.c => export.c} (100%) rename fs/gfs2/{ops_file.c => file.c} (99%) delete mode 100644 fs/gfs2/ops_address.h diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile index a851ea4bdf70..4f7332c7682f 100644 --- a/fs/gfs2/Makefile +++ b/fs/gfs2/Makefile @@ -1,7 +1,7 @@ obj-$(CONFIG_GFS2_FS) += gfs2.o gfs2-y := acl.o bmap.o dir.o eaops.o eattr.o glock.o \ glops.o inode.o log.o lops.o main.o meta_io.o \ - mount.o ops_address.o ops_dentry.o ops_export.o ops_file.o \ + mount.o aops.o dentry.o export.o file.o \ ops_fstype.o ops_inode.o ops_super.o quota.o \ recovery.o rgrp.o super.o sys.o trans.o util.o diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/aops.c similarity index 99% rename from fs/gfs2/ops_address.c rename to fs/gfs2/aops.c index e5664210f0d8..03ebb439ace0 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/aops.c @@ -28,7 +28,6 @@ #include "inode.h" #include "log.h" #include "meta_io.h" -#include "ops_address.h" #include "quota.h" #include "trans.h" #include "rgrp.h" diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 253e1a39f841..1153a078920c 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -25,7 +25,6 @@ #include "trans.h" #include "dir.h" #include "util.h" -#include "ops_address.h" /* This doesn't need to be that large as max 64 bit pointers in a 4k * block is 512, so __u16 is fine for that. It saves stack space to diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/dentry.c similarity index 100% rename from fs/gfs2/ops_dentry.c rename to fs/gfs2/dentry.c diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/export.c similarity index 100% rename from fs/gfs2/ops_export.c rename to fs/gfs2/export.c diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/file.c similarity index 99% rename from fs/gfs2/ops_file.c rename to fs/gfs2/file.c index 0ee7bd287c5a..73b6f552f06d 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/file.c @@ -39,7 +39,6 @@ #include "trans.h" #include "util.h" #include "eaops.h" -#include "ops_address.h" /** * gfs2_llseek - seek to a location in a file diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 5a31d426116f..c03a1a384e72 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -30,7 +30,6 @@ #include "inode.h" #include "log.h" #include "meta_io.h" -#include "ops_address.h" #include "quota.h" #include "rgrp.h" #include "trans.h" diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index c30be2b66580..2c3ec072d60e 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -11,8 +11,16 @@ #define __INODE_DOT_H__ #include +#include +#include #include "util.h" +extern int gfs2_releasepage(struct page *page, gfp_t gfp_mask); +extern int gfs2_internal_read(struct gfs2_inode *ip, + struct file_ra_state *ra_state, + char *buf, loff_t *pos, unsigned size); +extern void gfs2_set_aops(struct inode *inode); + static inline int gfs2_is_stuffed(const struct gfs2_inode *ip) { return !ip->i_height; @@ -73,30 +81,31 @@ static inline void gfs2_inum_out(const struct gfs2_inode *ip, } -void gfs2_set_iop(struct inode *inode); -struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, - u64 no_addr, u64 no_formal_ino, - int skip_freeing); -struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr); +extern void gfs2_set_iop(struct inode *inode); +extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, + u64 no_addr, u64 no_formal_ino, + int skip_freeing); +extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr); -int gfs2_inode_refresh(struct gfs2_inode *ip); +extern int gfs2_inode_refresh(struct gfs2_inode *ip); -int gfs2_dinode_dealloc(struct gfs2_inode *inode); -int gfs2_change_nlink(struct gfs2_inode *ip, int diff); -struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, - int is_root); -struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, - unsigned int mode, dev_t dev); -int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, - struct gfs2_inode *ip); -int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, - const struct gfs2_inode *ip); -int gfs2_permission(struct inode *inode, int mask); -int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len); -int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); -struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); -void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); -void gfs2_dinode_print(const struct gfs2_inode *ip); +extern int gfs2_dinode_dealloc(struct gfs2_inode *inode); +extern int gfs2_change_nlink(struct gfs2_inode *ip, int diff); +extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, + int is_root); +extern struct inode *gfs2_createi(struct gfs2_holder *ghs, + const struct qstr *name, + unsigned int mode, dev_t dev); +extern int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, + struct gfs2_inode *ip); +extern int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, + const struct gfs2_inode *ip); +extern int gfs2_permission(struct inode *inode, int mask); +extern int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len); +extern int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); +extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); +extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); +extern void gfs2_dinode_print(const struct gfs2_inode *ip); extern const struct inode_operations gfs2_file_iops; extern const struct inode_operations gfs2_dir_iops; diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 78a5f4312667..cb8d7a93d5ec 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -31,7 +31,6 @@ #include "rgrp.h" #include "trans.h" #include "util.h" -#include "ops_address.h" static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wbc) { diff --git a/fs/gfs2/ops_address.h b/fs/gfs2/ops_address.h deleted file mode 100644 index 5da21285bba4..000000000000 --- a/fs/gfs2/ops_address.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. - * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. - */ - -#ifndef __OPS_ADDRESS_DOT_H__ -#define __OPS_ADDRESS_DOT_H__ - -#include -#include -#include - -extern int gfs2_releasepage(struct page *page, gfp_t gfp_mask); -extern int gfs2_internal_read(struct gfs2_inode *ip, - struct file_ra_state *ra_state, - char *buf, loff_t *pos, unsigned size); -extern void gfs2_set_aops(struct inode *inode); - -#endif /* __OPS_ADDRESS_DOT_H__ */ diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 152e6c4a0dca..2e9b9326bfc9 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -60,7 +60,6 @@ #include "super.h" #include "trans.h" #include "inode.h" -#include "ops_address.h" #include "util.h" #define QUOTA_USER 1 diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index ee3d5c1876a3..6122c7ee3648 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -29,7 +29,6 @@ #include "util.h" #include "log.h" #include "inode.h" -#include "ops_address.h" #define BFITNOENT ((u32)~0) #define NO_BLOCK ((u64)~0) From 9e6e0a128bca0a151d8d3fbd9459b22fc21cfebb Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 22 May 2009 10:36:01 +0100 Subject: [PATCH 15/25] GFS2: Merge mount.c and ops_super.c into super.c mount.c only contained a single function, so is not really worth retaining on its own. All of the super related code is now either in super.c or ops_fstype.c Signed-off-by: Steven Whitehouse --- fs/gfs2/Makefile | 4 +- fs/gfs2/mount.c | 195 ---------- fs/gfs2/ops_super.c | 757 ------------------------------------- fs/gfs2/super.c | 903 +++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 903 insertions(+), 956 deletions(-) delete mode 100644 fs/gfs2/mount.c delete mode 100644 fs/gfs2/ops_super.c diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile index 4f7332c7682f..d53a9bea1c2f 100644 --- a/fs/gfs2/Makefile +++ b/fs/gfs2/Makefile @@ -1,8 +1,8 @@ obj-$(CONFIG_GFS2_FS) += gfs2.o gfs2-y := acl.o bmap.o dir.o eaops.o eattr.o glock.o \ glops.o inode.o log.o lops.o main.o meta_io.o \ - mount.o aops.o dentry.o export.o file.o \ - ops_fstype.o ops_inode.o ops_super.o quota.o \ + aops.o dentry.o export.o file.o \ + ops_fstype.o ops_inode.o quota.o \ recovery.o rgrp.o super.o sys.o trans.o util.o gfs2-$(CONFIG_GFS2_FS_LOCKING_DLM) += lock_dlm.o diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c deleted file mode 100644 index 947af151fa24..000000000000 --- a/fs/gfs2/mount.c +++ /dev/null @@ -1,195 +0,0 @@ -/* - * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. - * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. - */ - -#include -#include -#include -#include -#include -#include - -#include "gfs2.h" -#include "incore.h" -#include "super.h" -#include "sys.h" -#include "util.h" - -enum { - Opt_lockproto, - Opt_locktable, - Opt_hostdata, - Opt_spectator, - Opt_ignore_local_fs, - Opt_localflocks, - Opt_localcaching, - Opt_debug, - Opt_nodebug, - Opt_upgrade, - Opt_acl, - Opt_noacl, - Opt_quota_off, - Opt_quota_account, - Opt_quota_on, - Opt_quota, - Opt_noquota, - Opt_suiddir, - Opt_nosuiddir, - Opt_data_writeback, - Opt_data_ordered, - Opt_meta, - Opt_discard, - Opt_nodiscard, - Opt_commit, - Opt_err, -}; - -static const match_table_t tokens = { - {Opt_lockproto, "lockproto=%s"}, - {Opt_locktable, "locktable=%s"}, - {Opt_hostdata, "hostdata=%s"}, - {Opt_spectator, "spectator"}, - {Opt_ignore_local_fs, "ignore_local_fs"}, - {Opt_localflocks, "localflocks"}, - {Opt_localcaching, "localcaching"}, - {Opt_debug, "debug"}, - {Opt_nodebug, "nodebug"}, - {Opt_upgrade, "upgrade"}, - {Opt_acl, "acl"}, - {Opt_noacl, "noacl"}, - {Opt_quota_off, "quota=off"}, - {Opt_quota_account, "quota=account"}, - {Opt_quota_on, "quota=on"}, - {Opt_quota, "quota"}, - {Opt_noquota, "noquota"}, - {Opt_suiddir, "suiddir"}, - {Opt_nosuiddir, "nosuiddir"}, - {Opt_data_writeback, "data=writeback"}, - {Opt_data_ordered, "data=ordered"}, - {Opt_meta, "meta"}, - {Opt_discard, "discard"}, - {Opt_nodiscard, "nodiscard"}, - {Opt_commit, "commit=%d"}, - {Opt_err, NULL} -}; - -/** - * gfs2_mount_args - Parse mount options - * @sdp: - * @data: - * - * Return: errno - */ - -int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options) -{ - char *o; - int token; - substring_t tmp[MAX_OPT_ARGS]; - int rv; - - /* Split the options into tokens with the "," character and - process them */ - - while (1) { - o = strsep(&options, ","); - if (o == NULL) - break; - if (*o == '\0') - continue; - - token = match_token(o, tokens, tmp); - switch (token) { - case Opt_lockproto: - match_strlcpy(args->ar_lockproto, &tmp[0], - GFS2_LOCKNAME_LEN); - break; - case Opt_locktable: - match_strlcpy(args->ar_locktable, &tmp[0], - GFS2_LOCKNAME_LEN); - break; - case Opt_hostdata: - match_strlcpy(args->ar_hostdata, &tmp[0], - GFS2_LOCKNAME_LEN); - break; - case Opt_spectator: - args->ar_spectator = 1; - break; - case Opt_ignore_local_fs: - args->ar_ignore_local_fs = 1; - break; - case Opt_localflocks: - args->ar_localflocks = 1; - break; - case Opt_localcaching: - args->ar_localcaching = 1; - break; - case Opt_debug: - args->ar_debug = 1; - break; - case Opt_nodebug: - args->ar_debug = 0; - break; - case Opt_upgrade: - args->ar_upgrade = 1; - break; - case Opt_acl: - args->ar_posix_acl = 1; - break; - case Opt_noacl: - args->ar_posix_acl = 0; - break; - case Opt_quota_off: - case Opt_noquota: - args->ar_quota = GFS2_QUOTA_OFF; - break; - case Opt_quota_account: - args->ar_quota = GFS2_QUOTA_ACCOUNT; - break; - case Opt_quota_on: - case Opt_quota: - args->ar_quota = GFS2_QUOTA_ON; - break; - case Opt_suiddir: - args->ar_suiddir = 1; - break; - case Opt_nosuiddir: - args->ar_suiddir = 0; - break; - case Opt_data_writeback: - args->ar_data = GFS2_DATA_WRITEBACK; - break; - case Opt_data_ordered: - args->ar_data = GFS2_DATA_ORDERED; - break; - case Opt_meta: - args->ar_meta = 1; - break; - case Opt_discard: - args->ar_discard = 1; - break; - case Opt_nodiscard: - args->ar_discard = 0; - break; - case Opt_commit: - rv = match_int(&tmp[0], &args->ar_commit); - if (rv || args->ar_commit <= 0) { - fs_info(sdp, "commit mount option requires a positive numeric argument\n"); - return rv ? rv : -EINVAL; - } - break; - case Opt_err: - default: - fs_info(sdp, "invalid mount option: %s\n", o); - return -EINVAL; - } - } - - return 0; -} - diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c deleted file mode 100644 index 2fd1dcbcc5b7..000000000000 --- a/fs/gfs2/ops_super.c +++ /dev/null @@ -1,757 +0,0 @@ -/* - * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. - * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "gfs2.h" -#include "incore.h" -#include "glock.h" -#include "inode.h" -#include "log.h" -#include "quota.h" -#include "recovery.h" -#include "rgrp.h" -#include "super.h" -#include "sys.h" -#include "util.h" -#include "trans.h" -#include "dir.h" -#include "eattr.h" -#include "bmap.h" -#include "meta_io.h" - -#define args_neq(a1, a2, x) ((a1)->ar_##x != (a2)->ar_##x) - -/** - * gfs2_write_inode - Make sure the inode is stable on the disk - * @inode: The inode - * @sync: synchronous write flag - * - * Returns: errno - */ - -static int gfs2_write_inode(struct inode *inode, int sync) -{ - struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_sbd *sdp = GFS2_SB(inode); - struct gfs2_holder gh; - struct buffer_head *bh; - struct timespec atime; - struct gfs2_dinode *di; - int ret = 0; - - /* Check this is a "normal" inode, etc */ - if (!test_bit(GIF_USER, &ip->i_flags) || - (current->flags & PF_MEMALLOC)) - return 0; - ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); - if (ret) - goto do_flush; - ret = gfs2_trans_begin(sdp, RES_DINODE, 0); - if (ret) - goto do_unlock; - ret = gfs2_meta_inode_buffer(ip, &bh); - if (ret == 0) { - di = (struct gfs2_dinode *)bh->b_data; - atime.tv_sec = be64_to_cpu(di->di_atime); - atime.tv_nsec = be32_to_cpu(di->di_atime_nsec); - if (timespec_compare(&inode->i_atime, &atime) > 0) { - gfs2_trans_add_bh(ip->i_gl, bh, 1); - gfs2_dinode_out(ip, bh->b_data); - } - brelse(bh); - } - gfs2_trans_end(sdp); -do_unlock: - gfs2_glock_dq_uninit(&gh); -do_flush: - if (sync != 0) - gfs2_log_flush(GFS2_SB(inode), ip->i_gl); - return ret; -} - -/** - * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one - * @sdp: the filesystem - * - * Returns: errno - */ - -static int gfs2_make_fs_ro(struct gfs2_sbd *sdp) -{ - struct gfs2_holder t_gh; - int error; - - gfs2_quota_sync(sdp); - gfs2_statfs_sync(sdp); - - error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE, - &t_gh); - if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) - return error; - - gfs2_meta_syncfs(sdp); - gfs2_log_shutdown(sdp); - - clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); - - if (t_gh.gh_gl) - gfs2_glock_dq_uninit(&t_gh); - - gfs2_quota_cleanup(sdp); - - return error; -} - -static int gfs2_umount_recovery_wait(void *word) -{ - schedule(); - return 0; -} - -/** - * gfs2_put_super - Unmount the filesystem - * @sb: The VFS superblock - * - */ - -static void gfs2_put_super(struct super_block *sb) -{ - struct gfs2_sbd *sdp = sb->s_fs_info; - int error; - struct gfs2_jdesc *jd; - - /* Unfreeze the filesystem, if we need to */ - - mutex_lock(&sdp->sd_freeze_lock); - if (sdp->sd_freeze_count) - gfs2_glock_dq_uninit(&sdp->sd_freeze_gh); - mutex_unlock(&sdp->sd_freeze_lock); - - /* No more recovery requests */ - set_bit(SDF_NORECOVERY, &sdp->sd_flags); - smp_mb(); - - /* Wait on outstanding recovery */ -restart: - spin_lock(&sdp->sd_jindex_spin); - list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { - if (!test_bit(JDF_RECOVERY, &jd->jd_flags)) - continue; - spin_unlock(&sdp->sd_jindex_spin); - wait_on_bit(&jd->jd_flags, JDF_RECOVERY, - gfs2_umount_recovery_wait, TASK_UNINTERRUPTIBLE); - goto restart; - } - spin_unlock(&sdp->sd_jindex_spin); - - kthread_stop(sdp->sd_quotad_process); - kthread_stop(sdp->sd_logd_process); - - if (!(sb->s_flags & MS_RDONLY)) { - error = gfs2_make_fs_ro(sdp); - if (error) - gfs2_io_error(sdp); - } - /* At this point, we're through modifying the disk */ - - /* Release stuff */ - - iput(sdp->sd_jindex); - iput(sdp->sd_inum_inode); - iput(sdp->sd_statfs_inode); - iput(sdp->sd_rindex); - iput(sdp->sd_quota_inode); - - gfs2_glock_put(sdp->sd_rename_gl); - gfs2_glock_put(sdp->sd_trans_gl); - - if (!sdp->sd_args.ar_spectator) { - gfs2_glock_dq_uninit(&sdp->sd_journal_gh); - gfs2_glock_dq_uninit(&sdp->sd_jinode_gh); - gfs2_glock_dq_uninit(&sdp->sd_ir_gh); - gfs2_glock_dq_uninit(&sdp->sd_sc_gh); - gfs2_glock_dq_uninit(&sdp->sd_qc_gh); - iput(sdp->sd_ir_inode); - iput(sdp->sd_sc_inode); - iput(sdp->sd_qc_inode); - } - - gfs2_glock_dq_uninit(&sdp->sd_live_gh); - gfs2_clear_rgrpd(sdp); - gfs2_jindex_free(sdp); - /* Take apart glock structures and buffer lists */ - gfs2_gl_hash_clear(sdp); - /* Unmount the locking protocol */ - gfs2_lm_unmount(sdp); - - /* At this point, we're through participating in the lockspace */ - gfs2_sys_fs_del(sdp); -} - -/** - * gfs2_write_super - * @sb: the superblock - * - */ - -static void gfs2_write_super(struct super_block *sb) -{ - sb->s_dirt = 0; -} - -/** - * gfs2_sync_fs - sync the filesystem - * @sb: the superblock - * - * Flushes the log to disk. - */ - -static int gfs2_sync_fs(struct super_block *sb, int wait) -{ - sb->s_dirt = 0; - if (wait && sb->s_fs_info) - gfs2_log_flush(sb->s_fs_info, NULL); - return 0; -} - -/** - * gfs2_freeze - prevent further writes to the filesystem - * @sb: the VFS structure for the filesystem - * - */ - -static int gfs2_freeze(struct super_block *sb) -{ - struct gfs2_sbd *sdp = sb->s_fs_info; - int error; - - if (test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) - return -EINVAL; - - for (;;) { - error = gfs2_freeze_fs(sdp); - if (!error) - break; - - switch (error) { - case -EBUSY: - fs_err(sdp, "waiting for recovery before freeze\n"); - break; - - default: - fs_err(sdp, "error freezing FS: %d\n", error); - break; - } - - fs_err(sdp, "retrying...\n"); - msleep(1000); - } - return 0; -} - -/** - * gfs2_unfreeze - reallow writes to the filesystem - * @sb: the VFS structure for the filesystem - * - */ - -static int gfs2_unfreeze(struct super_block *sb) -{ - gfs2_unfreeze_fs(sb->s_fs_info); - return 0; -} - -/** - * statfs_fill - fill in the sg for a given RG - * @rgd: the RG - * @sc: the sc structure - * - * Returns: 0 on success, -ESTALE if the LVB is invalid - */ - -static int statfs_slow_fill(struct gfs2_rgrpd *rgd, - struct gfs2_statfs_change_host *sc) -{ - gfs2_rgrp_verify(rgd); - sc->sc_total += rgd->rd_data; - sc->sc_free += rgd->rd_free; - sc->sc_dinodes += rgd->rd_dinodes; - return 0; -} - -/** - * gfs2_statfs_slow - Stat a filesystem using asynchronous locking - * @sdp: the filesystem - * @sc: the sc info that will be returned - * - * Any error (other than a signal) will cause this routine to fall back - * to the synchronous version. - * - * FIXME: This really shouldn't busy wait like this. - * - * Returns: errno - */ - -static int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc) -{ - struct gfs2_holder ri_gh; - struct gfs2_rgrpd *rgd_next; - struct gfs2_holder *gha, *gh; - unsigned int slots = 64; - unsigned int x; - int done; - int error = 0, err; - - memset(sc, 0, sizeof(struct gfs2_statfs_change_host)); - gha = kcalloc(slots, sizeof(struct gfs2_holder), GFP_KERNEL); - if (!gha) - return -ENOMEM; - - error = gfs2_rindex_hold(sdp, &ri_gh); - if (error) - goto out; - - rgd_next = gfs2_rgrpd_get_first(sdp); - - for (;;) { - done = 1; - - for (x = 0; x < slots; x++) { - gh = gha + x; - - if (gh->gh_gl && gfs2_glock_poll(gh)) { - err = gfs2_glock_wait(gh); - if (err) { - gfs2_holder_uninit(gh); - error = err; - } else { - if (!error) - error = statfs_slow_fill( - gh->gh_gl->gl_object, sc); - gfs2_glock_dq_uninit(gh); - } - } - - if (gh->gh_gl) - done = 0; - else if (rgd_next && !error) { - error = gfs2_glock_nq_init(rgd_next->rd_gl, - LM_ST_SHARED, - GL_ASYNC, - gh); - rgd_next = gfs2_rgrpd_get_next(rgd_next); - done = 0; - } - - if (signal_pending(current)) - error = -ERESTARTSYS; - } - - if (done) - break; - - yield(); - } - - gfs2_glock_dq_uninit(&ri_gh); - -out: - kfree(gha); - return error; -} - -/** - * gfs2_statfs_i - Do a statfs - * @sdp: the filesystem - * @sg: the sg structure - * - * Returns: errno - */ - -static int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc) -{ - struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; - struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; - - spin_lock(&sdp->sd_statfs_spin); - - *sc = *m_sc; - sc->sc_total += l_sc->sc_total; - sc->sc_free += l_sc->sc_free; - sc->sc_dinodes += l_sc->sc_dinodes; - - spin_unlock(&sdp->sd_statfs_spin); - - if (sc->sc_free < 0) - sc->sc_free = 0; - if (sc->sc_free > sc->sc_total) - sc->sc_free = sc->sc_total; - if (sc->sc_dinodes < 0) - sc->sc_dinodes = 0; - - return 0; -} - -/** - * gfs2_statfs - Gather and return stats about the filesystem - * @sb: The superblock - * @statfsbuf: The buffer - * - * Returns: 0 on success or error code - */ - -static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf) -{ - struct super_block *sb = dentry->d_inode->i_sb; - struct gfs2_sbd *sdp = sb->s_fs_info; - struct gfs2_statfs_change_host sc; - int error; - - if (gfs2_tune_get(sdp, gt_statfs_slow)) - error = gfs2_statfs_slow(sdp, &sc); - else - error = gfs2_statfs_i(sdp, &sc); - - if (error) - return error; - - buf->f_type = GFS2_MAGIC; - buf->f_bsize = sdp->sd_sb.sb_bsize; - buf->f_blocks = sc.sc_total; - buf->f_bfree = sc.sc_free; - buf->f_bavail = sc.sc_free; - buf->f_files = sc.sc_dinodes + sc.sc_free; - buf->f_ffree = sc.sc_free; - buf->f_namelen = GFS2_FNAMESIZE; - - return 0; -} - -/** - * gfs2_remount_fs - called when the FS is remounted - * @sb: the filesystem - * @flags: the remount flags - * @data: extra data passed in (not used right now) - * - * Returns: errno - */ - -static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) -{ - struct gfs2_sbd *sdp = sb->s_fs_info; - struct gfs2_args args = sdp->sd_args; /* Default to current settings */ - struct gfs2_tune *gt = &sdp->sd_tune; - int error; - - spin_lock(>->gt_spin); - args.ar_commit = gt->gt_log_flush_secs; - spin_unlock(>->gt_spin); - error = gfs2_mount_args(sdp, &args, data); - if (error) - return error; - - /* Not allowed to change locking details */ - if (strcmp(args.ar_lockproto, sdp->sd_args.ar_lockproto) || - strcmp(args.ar_locktable, sdp->sd_args.ar_locktable) || - strcmp(args.ar_hostdata, sdp->sd_args.ar_hostdata)) - return -EINVAL; - - /* Some flags must not be changed */ - if (args_neq(&args, &sdp->sd_args, spectator) || - args_neq(&args, &sdp->sd_args, ignore_local_fs) || - args_neq(&args, &sdp->sd_args, localflocks) || - args_neq(&args, &sdp->sd_args, localcaching) || - args_neq(&args, &sdp->sd_args, meta)) - return -EINVAL; - - if (sdp->sd_args.ar_spectator) - *flags |= MS_RDONLY; - - if ((sb->s_flags ^ *flags) & MS_RDONLY) { - if (*flags & MS_RDONLY) - error = gfs2_make_fs_ro(sdp); - else - error = gfs2_make_fs_rw(sdp); - if (error) - return error; - } - - sdp->sd_args = args; - if (sdp->sd_args.ar_posix_acl) - sb->s_flags |= MS_POSIXACL; - else - sb->s_flags &= ~MS_POSIXACL; - spin_lock(>->gt_spin); - gt->gt_log_flush_secs = args.ar_commit; - spin_unlock(>->gt_spin); - - return 0; -} - -/** - * gfs2_drop_inode - Drop an inode (test for remote unlink) - * @inode: The inode to drop - * - * If we've received a callback on an iopen lock then its because a - * remote node tried to deallocate the inode but failed due to this node - * still having the inode open. Here we mark the link count zero - * since we know that it must have reached zero if the GLF_DEMOTE flag - * is set on the iopen glock. If we didn't do a disk read since the - * remote node removed the final link then we might otherwise miss - * this event. This check ensures that this node will deallocate the - * inode's blocks, or alternatively pass the baton on to another - * node for later deallocation. - */ - -static void gfs2_drop_inode(struct inode *inode) -{ - struct gfs2_inode *ip = GFS2_I(inode); - - if (test_bit(GIF_USER, &ip->i_flags) && inode->i_nlink) { - struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; - if (gl && test_bit(GLF_DEMOTE, &gl->gl_flags)) - clear_nlink(inode); - } - generic_drop_inode(inode); -} - -/** - * gfs2_clear_inode - Deallocate an inode when VFS is done with it - * @inode: The VFS inode - * - */ - -static void gfs2_clear_inode(struct inode *inode) -{ - struct gfs2_inode *ip = GFS2_I(inode); - - /* This tells us its a "real" inode and not one which only - * serves to contain an address space (see rgrp.c, meta_io.c) - * which therefore doesn't have its own glocks. - */ - if (test_bit(GIF_USER, &ip->i_flags)) { - ip->i_gl->gl_object = NULL; - gfs2_glock_put(ip->i_gl); - ip->i_gl = NULL; - if (ip->i_iopen_gh.gh_gl) { - ip->i_iopen_gh.gh_gl->gl_object = NULL; - gfs2_glock_dq_uninit(&ip->i_iopen_gh); - } - } -} - -static int is_ancestor(const struct dentry *d1, const struct dentry *d2) -{ - do { - if (d1 == d2) - return 1; - d1 = d1->d_parent; - } while (!IS_ROOT(d1)); - return 0; -} - -/** - * gfs2_show_options - Show mount options for /proc/mounts - * @s: seq_file structure - * @mnt: vfsmount - * - * Returns: 0 on success or error code - */ - -static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) -{ - struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info; - struct gfs2_args *args = &sdp->sd_args; - int lfsecs; - - if (is_ancestor(mnt->mnt_root, sdp->sd_master_dir)) - seq_printf(s, ",meta"); - if (args->ar_lockproto[0]) - seq_printf(s, ",lockproto=%s", args->ar_lockproto); - if (args->ar_locktable[0]) - seq_printf(s, ",locktable=%s", args->ar_locktable); - if (args->ar_hostdata[0]) - seq_printf(s, ",hostdata=%s", args->ar_hostdata); - if (args->ar_spectator) - seq_printf(s, ",spectator"); - if (args->ar_ignore_local_fs) - seq_printf(s, ",ignore_local_fs"); - if (args->ar_localflocks) - seq_printf(s, ",localflocks"); - if (args->ar_localcaching) - seq_printf(s, ",localcaching"); - if (args->ar_debug) - seq_printf(s, ",debug"); - if (args->ar_upgrade) - seq_printf(s, ",upgrade"); - if (args->ar_posix_acl) - seq_printf(s, ",acl"); - if (args->ar_quota != GFS2_QUOTA_DEFAULT) { - char *state; - switch (args->ar_quota) { - case GFS2_QUOTA_OFF: - state = "off"; - break; - case GFS2_QUOTA_ACCOUNT: - state = "account"; - break; - case GFS2_QUOTA_ON: - state = "on"; - break; - default: - state = "unknown"; - break; - } - seq_printf(s, ",quota=%s", state); - } - if (args->ar_suiddir) - seq_printf(s, ",suiddir"); - if (args->ar_data != GFS2_DATA_DEFAULT) { - char *state; - switch (args->ar_data) { - case GFS2_DATA_WRITEBACK: - state = "writeback"; - break; - case GFS2_DATA_ORDERED: - state = "ordered"; - break; - default: - state = "unknown"; - break; - } - seq_printf(s, ",data=%s", state); - } - if (args->ar_discard) - seq_printf(s, ",discard"); - lfsecs = sdp->sd_tune.gt_log_flush_secs; - if (lfsecs != 60) - seq_printf(s, ",commit=%d", lfsecs); - return 0; -} - -/* - * We have to (at the moment) hold the inodes main lock to cover - * the gap between unlocking the shared lock on the iopen lock and - * taking the exclusive lock. I'd rather do a shared -> exclusive - * conversion on the iopen lock, but we can change that later. This - * is safe, just less efficient. - */ - -static void gfs2_delete_inode(struct inode *inode) -{ - struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; - struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_holder gh; - int error; - - if (!test_bit(GIF_USER, &ip->i_flags)) - goto out; - - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); - if (unlikely(error)) { - gfs2_glock_dq_uninit(&ip->i_iopen_gh); - goto out; - } - - gfs2_glock_dq_wait(&ip->i_iopen_gh); - gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh); - error = gfs2_glock_nq(&ip->i_iopen_gh); - if (error) - goto out_truncate; - - if (S_ISDIR(inode->i_mode) && - (ip->i_diskflags & GFS2_DIF_EXHASH)) { - error = gfs2_dir_exhash_dealloc(ip); - if (error) - goto out_unlock; - } - - if (ip->i_eattr) { - error = gfs2_ea_dealloc(ip); - if (error) - goto out_unlock; - } - - if (!gfs2_is_stuffed(ip)) { - error = gfs2_file_dealloc(ip); - if (error) - goto out_unlock; - } - - error = gfs2_dinode_dealloc(ip); - if (error) - goto out_unlock; - -out_truncate: - error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks); - if (error) - goto out_unlock; - /* Needs to be done before glock release & also in a transaction */ - truncate_inode_pages(&inode->i_data, 0); - gfs2_trans_end(sdp); - -out_unlock: - if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) - gfs2_glock_dq(&ip->i_iopen_gh); - gfs2_holder_uninit(&ip->i_iopen_gh); - gfs2_glock_dq_uninit(&gh); - if (error && error != GLR_TRYFAILED && error != -EROFS) - fs_warn(sdp, "gfs2_delete_inode: %d\n", error); -out: - truncate_inode_pages(&inode->i_data, 0); - clear_inode(inode); -} - -static struct inode *gfs2_alloc_inode(struct super_block *sb) -{ - struct gfs2_inode *ip; - - ip = kmem_cache_alloc(gfs2_inode_cachep, GFP_KERNEL); - if (ip) { - ip->i_flags = 0; - ip->i_gl = NULL; - } - return &ip->i_inode; -} - -static void gfs2_destroy_inode(struct inode *inode) -{ - kmem_cache_free(gfs2_inode_cachep, inode); -} - -const struct super_operations gfs2_super_ops = { - .alloc_inode = gfs2_alloc_inode, - .destroy_inode = gfs2_destroy_inode, - .write_inode = gfs2_write_inode, - .delete_inode = gfs2_delete_inode, - .put_super = gfs2_put_super, - .write_super = gfs2_write_super, - .sync_fs = gfs2_sync_fs, - .freeze_fs = gfs2_freeze, - .unfreeze_fs = gfs2_unfreeze, - .statfs = gfs2_statfs, - .remount_fs = gfs2_remount_fs, - .clear_inode = gfs2_clear_inode, - .drop_inode = gfs2_drop_inode, - .show_options = gfs2_show_options, -}; - diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 601913e0a482..40bcc37e5a70 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -7,14 +7,20 @@ * of the GNU General Public License version 2. */ +#include #include #include #include #include #include -#include +#include +#include +#include +#include +#include #include -#include +#include +#include #include "gfs2.h" #include "incore.h" @@ -31,6 +37,183 @@ #include "super.h" #include "trans.h" #include "util.h" +#include "sys.h" +#include "eattr.h" + +#define args_neq(a1, a2, x) ((a1)->ar_##x != (a2)->ar_##x) + +enum { + Opt_lockproto, + Opt_locktable, + Opt_hostdata, + Opt_spectator, + Opt_ignore_local_fs, + Opt_localflocks, + Opt_localcaching, + Opt_debug, + Opt_nodebug, + Opt_upgrade, + Opt_acl, + Opt_noacl, + Opt_quota_off, + Opt_quota_account, + Opt_quota_on, + Opt_quota, + Opt_noquota, + Opt_suiddir, + Opt_nosuiddir, + Opt_data_writeback, + Opt_data_ordered, + Opt_meta, + Opt_discard, + Opt_nodiscard, + Opt_commit, + Opt_error, +}; + +static const match_table_t tokens = { + {Opt_lockproto, "lockproto=%s"}, + {Opt_locktable, "locktable=%s"}, + {Opt_hostdata, "hostdata=%s"}, + {Opt_spectator, "spectator"}, + {Opt_ignore_local_fs, "ignore_local_fs"}, + {Opt_localflocks, "localflocks"}, + {Opt_localcaching, "localcaching"}, + {Opt_debug, "debug"}, + {Opt_nodebug, "nodebug"}, + {Opt_upgrade, "upgrade"}, + {Opt_acl, "acl"}, + {Opt_noacl, "noacl"}, + {Opt_quota_off, "quota=off"}, + {Opt_quota_account, "quota=account"}, + {Opt_quota_on, "quota=on"}, + {Opt_quota, "quota"}, + {Opt_noquota, "noquota"}, + {Opt_suiddir, "suiddir"}, + {Opt_nosuiddir, "nosuiddir"}, + {Opt_data_writeback, "data=writeback"}, + {Opt_data_ordered, "data=ordered"}, + {Opt_meta, "meta"}, + {Opt_discard, "discard"}, + {Opt_nodiscard, "nodiscard"}, + {Opt_commit, "commit=%d"}, + {Opt_error, NULL} +}; + +/** + * gfs2_mount_args - Parse mount options + * @sdp: + * @data: + * + * Return: errno + */ + +int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options) +{ + char *o; + int token; + substring_t tmp[MAX_OPT_ARGS]; + int rv; + + /* Split the options into tokens with the "," character and + process them */ + + while (1) { + o = strsep(&options, ","); + if (o == NULL) + break; + if (*o == '\0') + continue; + + token = match_token(o, tokens, tmp); + switch (token) { + case Opt_lockproto: + match_strlcpy(args->ar_lockproto, &tmp[0], + GFS2_LOCKNAME_LEN); + break; + case Opt_locktable: + match_strlcpy(args->ar_locktable, &tmp[0], + GFS2_LOCKNAME_LEN); + break; + case Opt_hostdata: + match_strlcpy(args->ar_hostdata, &tmp[0], + GFS2_LOCKNAME_LEN); + break; + case Opt_spectator: + args->ar_spectator = 1; + break; + case Opt_ignore_local_fs: + args->ar_ignore_local_fs = 1; + break; + case Opt_localflocks: + args->ar_localflocks = 1; + break; + case Opt_localcaching: + args->ar_localcaching = 1; + break; + case Opt_debug: + args->ar_debug = 1; + break; + case Opt_nodebug: + args->ar_debug = 0; + break; + case Opt_upgrade: + args->ar_upgrade = 1; + break; + case Opt_acl: + args->ar_posix_acl = 1; + break; + case Opt_noacl: + args->ar_posix_acl = 0; + break; + case Opt_quota_off: + case Opt_noquota: + args->ar_quota = GFS2_QUOTA_OFF; + break; + case Opt_quota_account: + args->ar_quota = GFS2_QUOTA_ACCOUNT; + break; + case Opt_quota_on: + case Opt_quota: + args->ar_quota = GFS2_QUOTA_ON; + break; + case Opt_suiddir: + args->ar_suiddir = 1; + break; + case Opt_nosuiddir: + args->ar_suiddir = 0; + break; + case Opt_data_writeback: + args->ar_data = GFS2_DATA_WRITEBACK; + break; + case Opt_data_ordered: + args->ar_data = GFS2_DATA_ORDERED; + break; + case Opt_meta: + args->ar_meta = 1; + break; + case Opt_discard: + args->ar_discard = 1; + break; + case Opt_nodiscard: + args->ar_discard = 0; + break; + case Opt_commit: + rv = match_int(&tmp[0], &args->ar_commit); + if (rv || args->ar_commit <= 0) { + fs_info(sdp, "commit mount option requires a positive numeric argument\n"); + return rv ? rv : -EINVAL; + } + break; + case Opt_error: + default: + fs_info(sdp, "invalid mount option: %s\n", o); + return -EINVAL; + } + } + + return 0; +} /** * gfs2_jindex_free - Clear all the journal index information @@ -436,3 +619,719 @@ void gfs2_unfreeze_fs(struct gfs2_sbd *sdp) mutex_unlock(&sdp->sd_freeze_lock); } + +/** + * gfs2_write_inode - Make sure the inode is stable on the disk + * @inode: The inode + * @sync: synchronous write flag + * + * Returns: errno + */ + +static int gfs2_write_inode(struct inode *inode, int sync) +{ + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + struct gfs2_holder gh; + struct buffer_head *bh; + struct timespec atime; + struct gfs2_dinode *di; + int ret = 0; + + /* Check this is a "normal" inode, etc */ + if (!test_bit(GIF_USER, &ip->i_flags) || + (current->flags & PF_MEMALLOC)) + return 0; + ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); + if (ret) + goto do_flush; + ret = gfs2_trans_begin(sdp, RES_DINODE, 0); + if (ret) + goto do_unlock; + ret = gfs2_meta_inode_buffer(ip, &bh); + if (ret == 0) { + di = (struct gfs2_dinode *)bh->b_data; + atime.tv_sec = be64_to_cpu(di->di_atime); + atime.tv_nsec = be32_to_cpu(di->di_atime_nsec); + if (timespec_compare(&inode->i_atime, &atime) > 0) { + gfs2_trans_add_bh(ip->i_gl, bh, 1); + gfs2_dinode_out(ip, bh->b_data); + } + brelse(bh); + } + gfs2_trans_end(sdp); +do_unlock: + gfs2_glock_dq_uninit(&gh); +do_flush: + if (sync != 0) + gfs2_log_flush(GFS2_SB(inode), ip->i_gl); + return ret; +} + +/** + * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one + * @sdp: the filesystem + * + * Returns: errno + */ + +static int gfs2_make_fs_ro(struct gfs2_sbd *sdp) +{ + struct gfs2_holder t_gh; + int error; + + gfs2_quota_sync(sdp); + gfs2_statfs_sync(sdp); + + error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE, + &t_gh); + if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) + return error; + + gfs2_meta_syncfs(sdp); + gfs2_log_shutdown(sdp); + + clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); + + if (t_gh.gh_gl) + gfs2_glock_dq_uninit(&t_gh); + + gfs2_quota_cleanup(sdp); + + return error; +} + +static int gfs2_umount_recovery_wait(void *word) +{ + schedule(); + return 0; +} + +/** + * gfs2_put_super - Unmount the filesystem + * @sb: The VFS superblock + * + */ + +static void gfs2_put_super(struct super_block *sb) +{ + struct gfs2_sbd *sdp = sb->s_fs_info; + int error; + struct gfs2_jdesc *jd; + + /* Unfreeze the filesystem, if we need to */ + + mutex_lock(&sdp->sd_freeze_lock); + if (sdp->sd_freeze_count) + gfs2_glock_dq_uninit(&sdp->sd_freeze_gh); + mutex_unlock(&sdp->sd_freeze_lock); + + /* No more recovery requests */ + set_bit(SDF_NORECOVERY, &sdp->sd_flags); + smp_mb(); + + /* Wait on outstanding recovery */ +restart: + spin_lock(&sdp->sd_jindex_spin); + list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { + if (!test_bit(JDF_RECOVERY, &jd->jd_flags)) + continue; + spin_unlock(&sdp->sd_jindex_spin); + wait_on_bit(&jd->jd_flags, JDF_RECOVERY, + gfs2_umount_recovery_wait, TASK_UNINTERRUPTIBLE); + goto restart; + } + spin_unlock(&sdp->sd_jindex_spin); + + kthread_stop(sdp->sd_quotad_process); + kthread_stop(sdp->sd_logd_process); + + if (!(sb->s_flags & MS_RDONLY)) { + error = gfs2_make_fs_ro(sdp); + if (error) + gfs2_io_error(sdp); + } + /* At this point, we're through modifying the disk */ + + /* Release stuff */ + + iput(sdp->sd_jindex); + iput(sdp->sd_inum_inode); + iput(sdp->sd_statfs_inode); + iput(sdp->sd_rindex); + iput(sdp->sd_quota_inode); + + gfs2_glock_put(sdp->sd_rename_gl); + gfs2_glock_put(sdp->sd_trans_gl); + + if (!sdp->sd_args.ar_spectator) { + gfs2_glock_dq_uninit(&sdp->sd_journal_gh); + gfs2_glock_dq_uninit(&sdp->sd_jinode_gh); + gfs2_glock_dq_uninit(&sdp->sd_ir_gh); + gfs2_glock_dq_uninit(&sdp->sd_sc_gh); + gfs2_glock_dq_uninit(&sdp->sd_qc_gh); + iput(sdp->sd_ir_inode); + iput(sdp->sd_sc_inode); + iput(sdp->sd_qc_inode); + } + + gfs2_glock_dq_uninit(&sdp->sd_live_gh); + gfs2_clear_rgrpd(sdp); + gfs2_jindex_free(sdp); + /* Take apart glock structures and buffer lists */ + gfs2_gl_hash_clear(sdp); + /* Unmount the locking protocol */ + gfs2_lm_unmount(sdp); + + /* At this point, we're through participating in the lockspace */ + gfs2_sys_fs_del(sdp); +} + +/** + * gfs2_write_super + * @sb: the superblock + * + */ + +static void gfs2_write_super(struct super_block *sb) +{ + sb->s_dirt = 0; +} + +/** + * gfs2_sync_fs - sync the filesystem + * @sb: the superblock + * + * Flushes the log to disk. + */ + +static int gfs2_sync_fs(struct super_block *sb, int wait) +{ + sb->s_dirt = 0; + if (wait && sb->s_fs_info) + gfs2_log_flush(sb->s_fs_info, NULL); + return 0; +} + +/** + * gfs2_freeze - prevent further writes to the filesystem + * @sb: the VFS structure for the filesystem + * + */ + +static int gfs2_freeze(struct super_block *sb) +{ + struct gfs2_sbd *sdp = sb->s_fs_info; + int error; + + if (test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) + return -EINVAL; + + for (;;) { + error = gfs2_freeze_fs(sdp); + if (!error) + break; + + switch (error) { + case -EBUSY: + fs_err(sdp, "waiting for recovery before freeze\n"); + break; + + default: + fs_err(sdp, "error freezing FS: %d\n", error); + break; + } + + fs_err(sdp, "retrying...\n"); + msleep(1000); + } + return 0; +} + +/** + * gfs2_unfreeze - reallow writes to the filesystem + * @sb: the VFS structure for the filesystem + * + */ + +static int gfs2_unfreeze(struct super_block *sb) +{ + gfs2_unfreeze_fs(sb->s_fs_info); + return 0; +} + +/** + * statfs_fill - fill in the sg for a given RG + * @rgd: the RG + * @sc: the sc structure + * + * Returns: 0 on success, -ESTALE if the LVB is invalid + */ + +static int statfs_slow_fill(struct gfs2_rgrpd *rgd, + struct gfs2_statfs_change_host *sc) +{ + gfs2_rgrp_verify(rgd); + sc->sc_total += rgd->rd_data; + sc->sc_free += rgd->rd_free; + sc->sc_dinodes += rgd->rd_dinodes; + return 0; +} + +/** + * gfs2_statfs_slow - Stat a filesystem using asynchronous locking + * @sdp: the filesystem + * @sc: the sc info that will be returned + * + * Any error (other than a signal) will cause this routine to fall back + * to the synchronous version. + * + * FIXME: This really shouldn't busy wait like this. + * + * Returns: errno + */ + +static int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc) +{ + struct gfs2_holder ri_gh; + struct gfs2_rgrpd *rgd_next; + struct gfs2_holder *gha, *gh; + unsigned int slots = 64; + unsigned int x; + int done; + int error = 0, err; + + memset(sc, 0, sizeof(struct gfs2_statfs_change_host)); + gha = kcalloc(slots, sizeof(struct gfs2_holder), GFP_KERNEL); + if (!gha) + return -ENOMEM; + + error = gfs2_rindex_hold(sdp, &ri_gh); + if (error) + goto out; + + rgd_next = gfs2_rgrpd_get_first(sdp); + + for (;;) { + done = 1; + + for (x = 0; x < slots; x++) { + gh = gha + x; + + if (gh->gh_gl && gfs2_glock_poll(gh)) { + err = gfs2_glock_wait(gh); + if (err) { + gfs2_holder_uninit(gh); + error = err; + } else { + if (!error) + error = statfs_slow_fill( + gh->gh_gl->gl_object, sc); + gfs2_glock_dq_uninit(gh); + } + } + + if (gh->gh_gl) + done = 0; + else if (rgd_next && !error) { + error = gfs2_glock_nq_init(rgd_next->rd_gl, + LM_ST_SHARED, + GL_ASYNC, + gh); + rgd_next = gfs2_rgrpd_get_next(rgd_next); + done = 0; + } + + if (signal_pending(current)) + error = -ERESTARTSYS; + } + + if (done) + break; + + yield(); + } + + gfs2_glock_dq_uninit(&ri_gh); + +out: + kfree(gha); + return error; +} + +/** + * gfs2_statfs_i - Do a statfs + * @sdp: the filesystem + * @sg: the sg structure + * + * Returns: errno + */ + +static int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc) +{ + struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; + struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; + + spin_lock(&sdp->sd_statfs_spin); + + *sc = *m_sc; + sc->sc_total += l_sc->sc_total; + sc->sc_free += l_sc->sc_free; + sc->sc_dinodes += l_sc->sc_dinodes; + + spin_unlock(&sdp->sd_statfs_spin); + + if (sc->sc_free < 0) + sc->sc_free = 0; + if (sc->sc_free > sc->sc_total) + sc->sc_free = sc->sc_total; + if (sc->sc_dinodes < 0) + sc->sc_dinodes = 0; + + return 0; +} + +/** + * gfs2_statfs - Gather and return stats about the filesystem + * @sb: The superblock + * @statfsbuf: The buffer + * + * Returns: 0 on success or error code + */ + +static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + struct super_block *sb = dentry->d_inode->i_sb; + struct gfs2_sbd *sdp = sb->s_fs_info; + struct gfs2_statfs_change_host sc; + int error; + + if (gfs2_tune_get(sdp, gt_statfs_slow)) + error = gfs2_statfs_slow(sdp, &sc); + else + error = gfs2_statfs_i(sdp, &sc); + + if (error) + return error; + + buf->f_type = GFS2_MAGIC; + buf->f_bsize = sdp->sd_sb.sb_bsize; + buf->f_blocks = sc.sc_total; + buf->f_bfree = sc.sc_free; + buf->f_bavail = sc.sc_free; + buf->f_files = sc.sc_dinodes + sc.sc_free; + buf->f_ffree = sc.sc_free; + buf->f_namelen = GFS2_FNAMESIZE; + + return 0; +} + +/** + * gfs2_remount_fs - called when the FS is remounted + * @sb: the filesystem + * @flags: the remount flags + * @data: extra data passed in (not used right now) + * + * Returns: errno + */ + +static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) +{ + struct gfs2_sbd *sdp = sb->s_fs_info; + struct gfs2_args args = sdp->sd_args; /* Default to current settings */ + struct gfs2_tune *gt = &sdp->sd_tune; + int error; + + spin_lock(>->gt_spin); + args.ar_commit = gt->gt_log_flush_secs; + spin_unlock(>->gt_spin); + error = gfs2_mount_args(sdp, &args, data); + if (error) + return error; + + /* Not allowed to change locking details */ + if (strcmp(args.ar_lockproto, sdp->sd_args.ar_lockproto) || + strcmp(args.ar_locktable, sdp->sd_args.ar_locktable) || + strcmp(args.ar_hostdata, sdp->sd_args.ar_hostdata)) + return -EINVAL; + + /* Some flags must not be changed */ + if (args_neq(&args, &sdp->sd_args, spectator) || + args_neq(&args, &sdp->sd_args, ignore_local_fs) || + args_neq(&args, &sdp->sd_args, localflocks) || + args_neq(&args, &sdp->sd_args, localcaching) || + args_neq(&args, &sdp->sd_args, meta)) + return -EINVAL; + + if (sdp->sd_args.ar_spectator) + *flags |= MS_RDONLY; + + if ((sb->s_flags ^ *flags) & MS_RDONLY) { + if (*flags & MS_RDONLY) + error = gfs2_make_fs_ro(sdp); + else + error = gfs2_make_fs_rw(sdp); + if (error) + return error; + } + + sdp->sd_args = args; + if (sdp->sd_args.ar_posix_acl) + sb->s_flags |= MS_POSIXACL; + else + sb->s_flags &= ~MS_POSIXACL; + spin_lock(>->gt_spin); + gt->gt_log_flush_secs = args.ar_commit; + spin_unlock(>->gt_spin); + + return 0; +} + +/** + * gfs2_drop_inode - Drop an inode (test for remote unlink) + * @inode: The inode to drop + * + * If we've received a callback on an iopen lock then its because a + * remote node tried to deallocate the inode but failed due to this node + * still having the inode open. Here we mark the link count zero + * since we know that it must have reached zero if the GLF_DEMOTE flag + * is set on the iopen glock. If we didn't do a disk read since the + * remote node removed the final link then we might otherwise miss + * this event. This check ensures that this node will deallocate the + * inode's blocks, or alternatively pass the baton on to another + * node for later deallocation. + */ + +static void gfs2_drop_inode(struct inode *inode) +{ + struct gfs2_inode *ip = GFS2_I(inode); + + if (test_bit(GIF_USER, &ip->i_flags) && inode->i_nlink) { + struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; + if (gl && test_bit(GLF_DEMOTE, &gl->gl_flags)) + clear_nlink(inode); + } + generic_drop_inode(inode); +} + +/** + * gfs2_clear_inode - Deallocate an inode when VFS is done with it + * @inode: The VFS inode + * + */ + +static void gfs2_clear_inode(struct inode *inode) +{ + struct gfs2_inode *ip = GFS2_I(inode); + + /* This tells us its a "real" inode and not one which only + * serves to contain an address space (see rgrp.c, meta_io.c) + * which therefore doesn't have its own glocks. + */ + if (test_bit(GIF_USER, &ip->i_flags)) { + ip->i_gl->gl_object = NULL; + gfs2_glock_put(ip->i_gl); + ip->i_gl = NULL; + if (ip->i_iopen_gh.gh_gl) { + ip->i_iopen_gh.gh_gl->gl_object = NULL; + gfs2_glock_dq_uninit(&ip->i_iopen_gh); + } + } +} + +static int is_ancestor(const struct dentry *d1, const struct dentry *d2) +{ + do { + if (d1 == d2) + return 1; + d1 = d1->d_parent; + } while (!IS_ROOT(d1)); + return 0; +} + +/** + * gfs2_show_options - Show mount options for /proc/mounts + * @s: seq_file structure + * @mnt: vfsmount + * + * Returns: 0 on success or error code + */ + +static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) +{ + struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info; + struct gfs2_args *args = &sdp->sd_args; + int lfsecs; + + if (is_ancestor(mnt->mnt_root, sdp->sd_master_dir)) + seq_printf(s, ",meta"); + if (args->ar_lockproto[0]) + seq_printf(s, ",lockproto=%s", args->ar_lockproto); + if (args->ar_locktable[0]) + seq_printf(s, ",locktable=%s", args->ar_locktable); + if (args->ar_hostdata[0]) + seq_printf(s, ",hostdata=%s", args->ar_hostdata); + if (args->ar_spectator) + seq_printf(s, ",spectator"); + if (args->ar_ignore_local_fs) + seq_printf(s, ",ignore_local_fs"); + if (args->ar_localflocks) + seq_printf(s, ",localflocks"); + if (args->ar_localcaching) + seq_printf(s, ",localcaching"); + if (args->ar_debug) + seq_printf(s, ",debug"); + if (args->ar_upgrade) + seq_printf(s, ",upgrade"); + if (args->ar_posix_acl) + seq_printf(s, ",acl"); + if (args->ar_quota != GFS2_QUOTA_DEFAULT) { + char *state; + switch (args->ar_quota) { + case GFS2_QUOTA_OFF: + state = "off"; + break; + case GFS2_QUOTA_ACCOUNT: + state = "account"; + break; + case GFS2_QUOTA_ON: + state = "on"; + break; + default: + state = "unknown"; + break; + } + seq_printf(s, ",quota=%s", state); + } + if (args->ar_suiddir) + seq_printf(s, ",suiddir"); + if (args->ar_data != GFS2_DATA_DEFAULT) { + char *state; + switch (args->ar_data) { + case GFS2_DATA_WRITEBACK: + state = "writeback"; + break; + case GFS2_DATA_ORDERED: + state = "ordered"; + break; + default: + state = "unknown"; + break; + } + seq_printf(s, ",data=%s", state); + } + if (args->ar_discard) + seq_printf(s, ",discard"); + lfsecs = sdp->sd_tune.gt_log_flush_secs; + if (lfsecs != 60) + seq_printf(s, ",commit=%d", lfsecs); + return 0; +} + +/* + * We have to (at the moment) hold the inodes main lock to cover + * the gap between unlocking the shared lock on the iopen lock and + * taking the exclusive lock. I'd rather do a shared -> exclusive + * conversion on the iopen lock, but we can change that later. This + * is safe, just less efficient. + */ + +static void gfs2_delete_inode(struct inode *inode) +{ + struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_holder gh; + int error; + + if (!test_bit(GIF_USER, &ip->i_flags)) + goto out; + + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); + if (unlikely(error)) { + gfs2_glock_dq_uninit(&ip->i_iopen_gh); + goto out; + } + + gfs2_glock_dq_wait(&ip->i_iopen_gh); + gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh); + error = gfs2_glock_nq(&ip->i_iopen_gh); + if (error) + goto out_truncate; + + if (S_ISDIR(inode->i_mode) && + (ip->i_diskflags & GFS2_DIF_EXHASH)) { + error = gfs2_dir_exhash_dealloc(ip); + if (error) + goto out_unlock; + } + + if (ip->i_eattr) { + error = gfs2_ea_dealloc(ip); + if (error) + goto out_unlock; + } + + if (!gfs2_is_stuffed(ip)) { + error = gfs2_file_dealloc(ip); + if (error) + goto out_unlock; + } + + error = gfs2_dinode_dealloc(ip); + if (error) + goto out_unlock; + +out_truncate: + error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks); + if (error) + goto out_unlock; + /* Needs to be done before glock release & also in a transaction */ + truncate_inode_pages(&inode->i_data, 0); + gfs2_trans_end(sdp); + +out_unlock: + if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) + gfs2_glock_dq(&ip->i_iopen_gh); + gfs2_holder_uninit(&ip->i_iopen_gh); + gfs2_glock_dq_uninit(&gh); + if (error && error != GLR_TRYFAILED && error != -EROFS) + fs_warn(sdp, "gfs2_delete_inode: %d\n", error); +out: + truncate_inode_pages(&inode->i_data, 0); + clear_inode(inode); +} + +static struct inode *gfs2_alloc_inode(struct super_block *sb) +{ + struct gfs2_inode *ip; + + ip = kmem_cache_alloc(gfs2_inode_cachep, GFP_KERNEL); + if (ip) { + ip->i_flags = 0; + ip->i_gl = NULL; + } + return &ip->i_inode; +} + +static void gfs2_destroy_inode(struct inode *inode) +{ + kmem_cache_free(gfs2_inode_cachep, inode); +} + +const struct super_operations gfs2_super_ops = { + .alloc_inode = gfs2_alloc_inode, + .destroy_inode = gfs2_destroy_inode, + .write_inode = gfs2_write_inode, + .delete_inode = gfs2_delete_inode, + .put_super = gfs2_put_super, + .write_super = gfs2_write_super, + .sync_fs = gfs2_sync_fs, + .freeze_fs = gfs2_freeze, + .unfreeze_fs = gfs2_unfreeze, + .statfs = gfs2_statfs, + .remount_fs = gfs2_remount_fs, + .clear_inode = gfs2_clear_inode, + .drop_inode = gfs2_drop_inode, + .show_options = gfs2_show_options, +}; + From 2286dbfad1fb622ee2691537e5caaedee4618860 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 22 May 2009 10:45:09 +0100 Subject: [PATCH 16/25] GFS2: Move gfs2_rmdiri into ops_inode.c Move gfs2_rmdiri() into ops_inode.c and make it static. Signed-off-by: Steven Whitehouse --- fs/gfs2/inode.c | 52 -------------------------------------------- fs/gfs2/inode.h | 2 -- fs/gfs2/ops_inode.c | 53 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 53 insertions(+), 54 deletions(-) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index c03a1a384e72..9b17447a0f95 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1046,58 +1046,6 @@ fail: return ERR_PTR(error); } -/** - * gfs2_rmdiri - Remove a directory - * @dip: The parent directory of the directory to be removed - * @name: The name of the directory to be removed - * @ip: The GFS2 inode of the directory to be removed - * - * Assumes Glocks on dip and ip are held - * - * Returns: errno - */ - -int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, - struct gfs2_inode *ip) -{ - struct qstr dotname; - int error; - - if (ip->i_entries != 2) { - if (gfs2_consist_inode(ip)) - gfs2_dinode_print(ip); - return -EIO; - } - - error = gfs2_dir_del(dip, name); - if (error) - return error; - - error = gfs2_change_nlink(dip, -1); - if (error) - return error; - - gfs2_str2qstr(&dotname, "."); - error = gfs2_dir_del(ip, &dotname); - if (error) - return error; - - gfs2_str2qstr(&dotname, ".."); - error = gfs2_dir_del(ip, &dotname); - if (error) - return error; - - /* It looks odd, but it really should be done twice */ - error = gfs2_change_nlink(ip, -1); - if (error) - return error; - - error = gfs2_change_nlink(ip, -1); - if (error) - return error; - - return error; -} /* * gfs2_unlink_ok - check to see that a inode is still in a directory diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index 2c3ec072d60e..6cd39284eb08 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -96,8 +96,6 @@ extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, extern struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, unsigned int mode, dev_t dev); -extern int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, - struct gfs2_inode *ip); extern int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, const struct gfs2_inode *ip); extern int gfs2_permission(struct inode *inode, int mask); diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 1c70fa5168d6..5dacd647ff0d 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -472,6 +472,59 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) return 0; } +/** + * gfs2_rmdiri - Remove a directory + * @dip: The parent directory of the directory to be removed + * @name: The name of the directory to be removed + * @ip: The GFS2 inode of the directory to be removed + * + * Assumes Glocks on dip and ip are held + * + * Returns: errno + */ + +static int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, + struct gfs2_inode *ip) +{ + struct qstr dotname; + int error; + + if (ip->i_entries != 2) { + if (gfs2_consist_inode(ip)) + gfs2_dinode_print(ip); + return -EIO; + } + + error = gfs2_dir_del(dip, name); + if (error) + return error; + + error = gfs2_change_nlink(dip, -1); + if (error) + return error; + + gfs2_str2qstr(&dotname, "."); + error = gfs2_dir_del(ip, &dotname); + if (error) + return error; + + gfs2_str2qstr(&dotname, ".."); + error = gfs2_dir_del(ip, &dotname); + if (error) + return error; + + /* It looks odd, but it really should be done twice */ + error = gfs2_change_nlink(ip, -1); + if (error) + return error; + + error = gfs2_change_nlink(ip, -1); + if (error) + return error; + + return error; +} + /** * gfs2_rmdir - Remove a directory * @dir: The parent directory of the directory to be removed From 536baf02f650f4547f105386878b4736fbc181e8 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 22 May 2009 10:48:59 +0100 Subject: [PATCH 17/25] GFS2: Move gfs2_readlinki into ops_inode.c Move gfs2_readlinki into ops_inode.c and make it static Signed-off-by: Steven Whitehouse --- fs/gfs2/inode.c | 58 +-------------------------------------------- fs/gfs2/inode.h | 1 - fs/gfs2/ops_inode.c | 55 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+), 58 deletions(-) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 9b17447a0f95..676e750fc84c 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1085,63 +1085,7 @@ int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, return 0; } -/** - * gfs2_readlinki - return the contents of a symlink - * @ip: the symlink's inode - * @buf: a pointer to the buffer to be filled - * @len: a pointer to the length of @buf - * - * If @buf is too small, a piece of memory is kmalloc()ed and needs - * to be freed by the caller. - * - * Returns: errno - */ - -int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len) -{ - struct gfs2_holder i_gh; - struct buffer_head *dibh; - unsigned int x; - int error; - - gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh); - error = gfs2_glock_nq(&i_gh); - if (error) { - gfs2_holder_uninit(&i_gh); - return error; - } - - if (!ip->i_disksize) { - gfs2_consist_inode(ip); - error = -EIO; - goto out; - } - - error = gfs2_meta_inode_buffer(ip, &dibh); - if (error) - goto out; - - x = ip->i_disksize + 1; - if (x > *len) { - *buf = kmalloc(x, GFP_NOFS); - if (!*buf) { - error = -ENOMEM; - goto out_brelse; - } - } - - memcpy(*buf, dibh->b_data + sizeof(struct gfs2_dinode), x); - *len = x; - -out_brelse: - brelse(dibh); -out: - gfs2_glock_dq_uninit(&i_gh); - return error; -} - -static int -__gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) +static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) { struct buffer_head *dibh; int error; diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index 6cd39284eb08..fc9a08f45be7 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -99,7 +99,6 @@ extern struct inode *gfs2_createi(struct gfs2_holder *ghs, extern int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, const struct gfs2_inode *ip); extern int gfs2_permission(struct inode *inode, int mask); -extern int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len); extern int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 5dacd647ff0d..f607f0908cff 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -937,6 +937,61 @@ out: return error; } +/** + * gfs2_readlinki - return the contents of a symlink + * @ip: the symlink's inode + * @buf: a pointer to the buffer to be filled + * @len: a pointer to the length of @buf + * + * If @buf is too small, a piece of memory is kmalloc()ed and needs + * to be freed by the caller. + * + * Returns: errno + */ + +static int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len) +{ + struct gfs2_holder i_gh; + struct buffer_head *dibh; + unsigned int x; + int error; + + gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh); + error = gfs2_glock_nq(&i_gh); + if (error) { + gfs2_holder_uninit(&i_gh); + return error; + } + + if (!ip->i_disksize) { + gfs2_consist_inode(ip); + error = -EIO; + goto out; + } + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + goto out; + + x = ip->i_disksize + 1; + if (x > *len) { + *buf = kmalloc(x, GFP_NOFS); + if (!*buf) { + error = -ENOMEM; + goto out_brelse; + } + } + + memcpy(*buf, dibh->b_data + sizeof(struct gfs2_dinode), x); + *len = x; + +out_brelse: + brelse(dibh); +out: + gfs2_glock_dq_uninit(&i_gh); + return error; +} + /** * gfs2_readlink - Read the value of a symlink * @dentry: the symlink From 87ec21741138bb42e7f943bb142b1d8567c10925 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 22 May 2009 10:54:50 +0100 Subject: [PATCH 18/25] GFS2: Move gfs2_unlink_ok into ops_inode.c Another function which is only called from one ops_inode.c so we move it and make it static. Signed-off-by: Steven Whitehouse --- fs/gfs2/inode.c | 39 --------------------------------------- fs/gfs2/inode.h | 2 -- fs/gfs2/ops_inode.c | 38 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 38 insertions(+), 41 deletions(-) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 676e750fc84c..2f94bd723698 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1046,45 +1046,6 @@ fail: return ERR_PTR(error); } - -/* - * gfs2_unlink_ok - check to see that a inode is still in a directory - * @dip: the directory - * @name: the name of the file - * @ip: the inode - * - * Assumes that the lock on (at least) @dip is held. - * - * Returns: 0 if the parent/child relationship is correct, errno if it isn't - */ - -int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, - const struct gfs2_inode *ip) -{ - int error; - - if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode)) - return -EPERM; - - if ((dip->i_inode.i_mode & S_ISVTX) && - dip->i_inode.i_uid != current_fsuid() && - ip->i_inode.i_uid != current_fsuid() && !capable(CAP_FOWNER)) - return -EPERM; - - if (IS_APPEND(&dip->i_inode)) - return -EPERM; - - error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC); - if (error) - return error; - - error = gfs2_dir_check(&dip->i_inode, name, ip); - if (error) - return error; - - return 0; -} - static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) { struct buffer_head *dibh; diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index fc9a08f45be7..c341aaf67adb 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -96,8 +96,6 @@ extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, extern struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, unsigned int mode, dev_t dev); -extern int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, - const struct gfs2_inode *ip); extern int gfs2_permission(struct inode *inode, int mask); extern int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index f607f0908cff..f8bd20baf99c 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -262,6 +262,44 @@ out_parent: return error; } +/* + * gfs2_unlink_ok - check to see that a inode is still in a directory + * @dip: the directory + * @name: the name of the file + * @ip: the inode + * + * Assumes that the lock on (at least) @dip is held. + * + * Returns: 0 if the parent/child relationship is correct, errno if it isn't + */ + +static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, + const struct gfs2_inode *ip) +{ + int error; + + if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode)) + return -EPERM; + + if ((dip->i_inode.i_mode & S_ISVTX) && + dip->i_inode.i_uid != current_fsuid() && + ip->i_inode.i_uid != current_fsuid() && !capable(CAP_FOWNER)) + return -EPERM; + + if (IS_APPEND(&dip->i_inode)) + return -EPERM; + + error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC); + if (error) + return error; + + error = gfs2_dir_check(&dip->i_inode, name, ip); + if (error) + return error; + + return 0; +} + /** * gfs2_unlink - Unlink a file * @dir: The inode of the directory containing the file to unlink From e1b28aab5804aa477c33d19855d6747607a885fd Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 26 May 2009 15:41:27 +0100 Subject: [PATCH 19/25] GFS2: Remove lockstruct subdir from gfs2 sysfs files The lockstruct sub directory contained two entries, both of which are duplicated elsewhere in the gfs2 sysfs files as well as being available via /proc/mounts. There is no userland program using either of them, so this patch removes them. Signed-off-by: Steven Whitehouse --- fs/gfs2/sys.c | 41 ++++++++--------------------------------- 1 file changed, 8 insertions(+), 33 deletions(-) diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 9f6d48b75fd2..94bd59ec54e2 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -276,25 +276,6 @@ static struct kobj_type gfs2_ktype = { .sysfs_ops = &gfs2_attr_ops, }; -/* - * display struct lm_lockstruct fields - */ - -#define LOCKSTRUCT_ATTR(name, fmt) \ -static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \ -{ \ - return snprintf(buf, PAGE_SIZE, fmt, sdp->sd_lockstruct.ls_##name); \ -} \ -static struct gfs2_attr lockstruct_attr_##name = __ATTR_RO(name) - -LOCKSTRUCT_ATTR(jid, "%u\n"); -LOCKSTRUCT_ATTR(first, "%u\n"); - -static struct attribute *lockstruct_attrs[] = { - &lockstruct_attr_jid.attr, - &lockstruct_attr_first.attr, - NULL, -}; /* * lock_module. Originally from lock_dlm @@ -397,6 +378,11 @@ static ssize_t recover_status_show(struct gfs2_sbd *sdp, char *buf) return sprintf(buf, "%d\n", ls->ls_recover_jid_status); } +static ssize_t jid_show(struct gfs2_sbd *sdp, char *buf) +{ + return sprintf(buf, "%u\n", sdp->sd_lockstruct.ls_jid); +} + #define GDLM_ATTR(_name,_mode,_show,_store) \ static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store) @@ -404,6 +390,7 @@ GDLM_ATTR(proto_name, 0444, proto_name_show, NULL); GDLM_ATTR(block, 0644, block_show, block_store); GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store); GDLM_ATTR(id, 0444, lkid_show, NULL); +GDLM_ATTR(jid, 0444, jid_show, NULL); GDLM_ATTR(first, 0444, lkfirst_show, NULL); GDLM_ATTR(first_done, 0444, first_done_show, NULL); GDLM_ATTR(recover, 0200, NULL, recover_store); @@ -415,7 +402,7 @@ static struct attribute *lock_module_attrs[] = { &gdlm_attr_block.attr, &gdlm_attr_withdraw.attr, &gdlm_attr_id.attr, - &lockstruct_attr_jid.attr, + &gdlm_attr_jid.attr, &gdlm_attr_first.attr, &gdlm_attr_first_done.attr, &gdlm_attr_recover.attr, @@ -558,11 +545,6 @@ static struct attribute *tune_attrs[] = { NULL, }; -static struct attribute_group lockstruct_group = { - .name = "lockstruct", - .attrs = lockstruct_attrs, -}; - static struct attribute_group args_group = { .name = "args", .attrs = args_attrs, @@ -588,13 +570,9 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp) if (error) goto fail; - error = sysfs_create_group(&sdp->sd_kobj, &lockstruct_group); - if (error) - goto fail_reg; - error = sysfs_create_group(&sdp->sd_kobj, &args_group); if (error) - goto fail_lockstruct; + goto fail_reg; error = sysfs_create_group(&sdp->sd_kobj, &tune_group); if (error) @@ -611,8 +589,6 @@ fail_tune: sysfs_remove_group(&sdp->sd_kobj, &tune_group); fail_args: sysfs_remove_group(&sdp->sd_kobj, &args_group); -fail_lockstruct: - sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group); fail_reg: kobject_put(&sdp->sd_kobj); fail: @@ -624,7 +600,6 @@ void gfs2_sys_fs_del(struct gfs2_sbd *sdp) { sysfs_remove_group(&sdp->sd_kobj, &tune_group); sysfs_remove_group(&sdp->sd_kobj, &args_group); - sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group); sysfs_remove_group(&sdp->sd_kobj, &lock_module_group); kobject_put(&sdp->sd_kobj); } From f6eb53498ee8f725832f3a0fffca90566bb118a6 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 26 May 2009 15:50:25 +0100 Subject: [PATCH 20/25] GFS2: Remove args subdir from gfs2 sysfs files Since we can cat /proc/mounts there is no need to have this subdirectory in the gfs2 sysfs files. In fact this does not reflect the full range of possible mount argumenmts, where as /proc/mounts does. There was only one userland user of this set of sysfs files and it will function perfectly well without these files being present (in fact that subcommand of gfs2_tool is obsolete anyway). The tune/* subdirectory is also considered mostly obsolete, but there are a few uses of this until mount arguments can be added for the last few functions for which there are no equivalents currently. However the tune/* directory is still in my sights and new code should avoid using it. Only the gfs2_quota and gfs2_tool programs are know to use tune/* at the moment. Signed-off-by: Steven Whitehouse --- fs/gfs2/sys.c | 52 +-------------------------------------------------- 1 file changed, 1 insertion(+), 51 deletions(-) diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 94bd59ec54e2..23419dc3027b 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -411,44 +411,6 @@ static struct attribute *lock_module_attrs[] = { NULL, }; -#define ARGS_ATTR(name, fmt) \ -static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \ -{ \ - return snprintf(buf, PAGE_SIZE, fmt, sdp->sd_args.ar_##name); \ -} \ -static struct gfs2_attr args_attr_##name = __ATTR_RO(name) - -ARGS_ATTR(lockproto, "%s\n"); -ARGS_ATTR(locktable, "%s\n"); -ARGS_ATTR(hostdata, "%s\n"); -ARGS_ATTR(spectator, "%d\n"); -ARGS_ATTR(ignore_local_fs, "%d\n"); -ARGS_ATTR(localcaching, "%d\n"); -ARGS_ATTR(localflocks, "%d\n"); -ARGS_ATTR(debug, "%d\n"); -ARGS_ATTR(upgrade, "%d\n"); -ARGS_ATTR(posix_acl, "%d\n"); -ARGS_ATTR(quota, "%u\n"); -ARGS_ATTR(suiddir, "%d\n"); -ARGS_ATTR(data, "%d\n"); - -static struct attribute *args_attrs[] = { - &args_attr_lockproto.attr, - &args_attr_locktable.attr, - &args_attr_hostdata.attr, - &args_attr_spectator.attr, - &args_attr_ignore_local_fs.attr, - &args_attr_localcaching.attr, - &args_attr_localflocks.attr, - &args_attr_debug.attr, - &args_attr_upgrade.attr, - &args_attr_posix_acl.attr, - &args_attr_quota.attr, - &args_attr_suiddir.attr, - &args_attr_data.attr, - NULL, -}; - /* * get and set struct gfs2_tune fields */ @@ -545,11 +507,6 @@ static struct attribute *tune_attrs[] = { NULL, }; -static struct attribute_group args_group = { - .name = "args", - .attrs = args_attrs, -}; - static struct attribute_group tune_group = { .name = "tune", .attrs = tune_attrs, @@ -570,13 +527,9 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp) if (error) goto fail; - error = sysfs_create_group(&sdp->sd_kobj, &args_group); - if (error) - goto fail_reg; - error = sysfs_create_group(&sdp->sd_kobj, &tune_group); if (error) - goto fail_args; + goto fail_reg; error = sysfs_create_group(&sdp->sd_kobj, &lock_module_group); if (error) @@ -587,8 +540,6 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp) fail_tune: sysfs_remove_group(&sdp->sd_kobj, &tune_group); -fail_args: - sysfs_remove_group(&sdp->sd_kobj, &args_group); fail_reg: kobject_put(&sdp->sd_kobj); fail: @@ -599,7 +550,6 @@ fail: void gfs2_sys_fs_del(struct gfs2_sbd *sdp) { sysfs_remove_group(&sdp->sd_kobj, &tune_group); - sysfs_remove_group(&sdp->sd_kobj, &args_group); sysfs_remove_group(&sdp->sd_kobj, &lock_module_group); kobject_put(&sdp->sd_kobj); } From a12af1ebe675e85831fde3c4d0908fc3b0908b7a Mon Sep 17 00:00:00 2001 From: Abhijith Das Date: Mon, 1 Jun 2009 12:30:03 -0500 Subject: [PATCH 21/25] GFS2: smbd proccess hangs with flock() call. GFS2 currently does not support mandatory flocks. An flock() call with LOCK_MAND triggers unexpected behavior because gfs2 is not checking for this lock type. This patch corrects that. Signed-off-by: Abhi Das Signed-off-by: Steven Whitehouse --- fs/gfs2/file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 73b6f552f06d..841ddc979388 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -698,8 +698,8 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl) if (!(fl->fl_flags & FL_FLOCK)) return -ENOLCK; - if (__mandatory_lock(&ip->i_inode)) - return -ENOLCK; + if (fl->fl_type & LOCK_MAND) + return -EOPNOTSUPP; if (fl->fl_type == F_UNLCK) { do_unflock(file, fl); From e09f9446b94ac64b27d37e98c1110f29d712cdad Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 3 Jun 2009 10:07:44 +0100 Subject: [PATCH 22/25] GFS2: Remove unused variable Signed-off-by: Steven Whitehouse --- fs/gfs2/file.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 841ddc979388..73318a3ce6f1 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -694,8 +694,6 @@ static void do_unflock(struct file *file, struct file_lock *fl) static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl) { - struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); - if (!(fl->fl_flags & FL_FLOCK)) return -ENOLCK; if (fl->fl_type & LOCK_MAND) From f6d03139d745198b434f65a28aabed524f415a4c Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 5 Jun 2009 07:18:57 +0100 Subject: [PATCH 23/25] GFS2: Fix locking issue mounting gfs2meta fs This patch uses sget() to get a reference to the existing gfs2 sb when mouting the gfs2meta filesystem (in fact thats just another mount of the gfs2 filesystem with a different root and this interface is for backward compatibility). Signed-off-by: Steven Whitehouse Reported-by: Benjamin Marzinski Tested-by: Benjamin Marzinski Cc: Christoph Hellwig --- fs/gfs2/ops_fstype.c | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 2cd1164c88d7..9da161cbb30f 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1273,9 +1273,20 @@ static int gfs2_get_sb(struct file_system_type *fs_type, int flags, return get_sb_bdev(fs_type, flags, dev_name, data, fill_super, mnt); } +static int test_meta_super(struct super_block *s, void *ptr) +{ + struct block_device *bdev = ptr; + return (bdev == s->s_bdev); +} + +static int set_meta_super(struct super_block *s, void *ptr) +{ + return -EINVAL; +} + static struct super_block *get_gfs2_sb(const char *dev_name) { - struct super_block *sb; + struct super_block *s; struct path path; int error; @@ -1283,30 +1294,27 @@ static struct super_block *get_gfs2_sb(const char *dev_name) if (error) { printk(KERN_WARNING "GFS2: path_lookup on %s returned error %d\n", dev_name, error); - return NULL; + return ERR_PTR(-ENOENT); } - sb = path.dentry->d_inode->i_sb; - if (sb && (sb->s_type == &gfs2_fs_type)) - atomic_inc(&sb->s_active); - else - sb = NULL; + s = sget(&gfs2_fs_type, test_meta_super, set_meta_super, + path.dentry->d_inode->i_sb->s_bdev); path_put(&path); - return sb; + return s; } static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - struct super_block *sb = NULL; + struct super_block *s; struct gfs2_sbd *sdp; - sb = get_gfs2_sb(dev_name); - if (!sb) { + s = get_gfs2_sb(dev_name); + if (IS_ERR(s)) { printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n"); - return -ENOENT; + return PTR_ERR(s); } - sdp = sb->s_fs_info; - mnt->mnt_sb = sb; + sdp = s->s_fs_info; + mnt->mnt_sb = s; mnt->mnt_root = dget(sdp->sd_master_dir); return 0; } From 40bc9a27e00d6c8c7e4dc2865c02d7402a950472 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 10 Jun 2009 09:09:40 +0100 Subject: [PATCH 24/25] GFS2: Fix cache coherency between truncate and O_DIRECT read If a page was partially zeroed as the result of a truncate, then it was not being correctly marked dirty. This resulted in the deleted data reappearing if the file was read back via direct I/O. Reported-by: Eric Sandeen Signed-off-by: Steven Whitehouse --- fs/gfs2/bmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 1153a078920c..329763530dc0 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1012,7 +1012,7 @@ static int gfs2_block_truncate_page(struct address_space *mapping) gfs2_trans_add_bh(ip->i_gl, bh, 0); zero_user(page, offset, length); - + mark_buffer_dirty(bh); unlock: unlock_page(page); page_cache_release(page); From 003dec8913d6bebb4ecc989ec04a235cf38f5ea9 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 10 Jun 2009 10:31:45 +0100 Subject: [PATCH 25/25] GFS2: Merge gfs2_get_sb into gfs2_get_sb_meta These don't need to be separate functions. Reported-by: Christoph Hellwig Signed-off-by: Steven Whitehouse --- fs/gfs2/ops_fstype.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 9da161cbb30f..f234aba36fb8 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1284,9 +1284,11 @@ static int set_meta_super(struct super_block *s, void *ptr) return -EINVAL; } -static struct super_block *get_gfs2_sb(const char *dev_name) +static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data, struct vfsmount *mnt) { struct super_block *s; + struct gfs2_sbd *sdp; struct path path; int error; @@ -1294,21 +1296,11 @@ static struct super_block *get_gfs2_sb(const char *dev_name) if (error) { printk(KERN_WARNING "GFS2: path_lookup on %s returned error %d\n", dev_name, error); - return ERR_PTR(-ENOENT); + return error; } s = sget(&gfs2_fs_type, test_meta_super, set_meta_super, path.dentry->d_inode->i_sb->s_bdev); path_put(&path); - return s; -} - -static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data, struct vfsmount *mnt) -{ - struct super_block *s; - struct gfs2_sbd *sdp; - - s = get_gfs2_sb(dev_name); if (IS_ERR(s)) { printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n"); return PTR_ERR(s);