From acc546fd6108cb17f87f985e4235b68756d7b01f Mon Sep 17 00:00:00 2001 From: Abhi Das Date: Tue, 10 Nov 2015 15:07:26 -0600 Subject: [PATCH 01/19] gfs2: Automatically set GFS2_DIF_SYSTEM flag on system files When new files and directories are created inside a parent directory we automatically inherit the GFS2_DIF_SYSTEM flag (if set) and assign it to the new file/dirs. All new system files/dirs created in the metafs by, say gfs2_jadd, will have this flag set because they will have parent directories in the metafs whose GFS2_DIF_SYSTEM flag has already been set (most likely by a previous mkfs.gfs2) Signed-off-by: Abhi Das Signed-off-by: Bob Peterson --- fs/gfs2/file.c | 4 ++-- fs/gfs2/inode.c | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 5e425469f0c2..201282046693 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -298,9 +298,9 @@ static int gfs2_set_flags(struct file *filp, u32 __user *ptr) gfsflags &= ~GFS2_DIF_TOPDIR; if (gfsflags & GFS2_DIF_INHERIT_JDATA) gfsflags ^= (GFS2_DIF_JDATA | GFS2_DIF_INHERIT_JDATA); - return do_gfs2_set_flags(filp, gfsflags, ~0); + return do_gfs2_set_flags(filp, gfsflags, ~GFS2_DIF_SYSTEM); } - return do_gfs2_set_flags(filp, gfsflags, ~GFS2_DIF_JDATA); + return do_gfs2_set_flags(filp, gfsflags, ~(GFS2_DIF_SYSTEM | GFS2_DIF_JDATA)); } static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 063fdfcf8275..2c05bc3d1947 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -685,6 +685,11 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, ip->i_entries = 2; break; } + + /* Force SYSTEM flag on all files and subdirs of a SYSTEM directory */ + if (dip->i_diskflags & GFS2_DIF_SYSTEM) + ip->i_diskflags |= GFS2_DIF_SYSTEM; + gfs2_set_inode_flags(inode); if ((GFS2_I(d_inode(sdp->sd_root_dir)) == dip) || From 6fde22426be6af261816db5941744b8d3c4c7f96 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Fri, 13 Nov 2015 07:55:59 -0600 Subject: [PATCH 02/19] GFS2: Delete an unnecessary check before the function call "iput" The iput() function tests whether its argument is NULL and then returns immediately. Thus the test around the call is not needed. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Bob Peterson --- fs/gfs2/ops_fstype.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index baab99b69d8a..1f9de173c4a0 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -910,8 +910,7 @@ fail_qc_i: fail_ut_i: iput(sdp->sd_sc_inode); fail: - if (pn) - iput(pn); + iput(pn); return error; } From 3dd1dd8c696bdb7c8dcc9456cb23558ad1b336b8 Mon Sep 17 00:00:00 2001 From: Andrew Price Date: Thu, 12 Nov 2015 14:07:52 -0600 Subject: [PATCH 03/19] GFS2: Use rht_for_each_entry_rcu in glock_hash_walk This lockdep splat was being triggered on umount: [55715.973122] =============================== [55715.980169] [ INFO: suspicious RCU usage. ] [55715.981021] 4.3.0-11553-g8d3de01-dirty #15 Tainted: G W [55715.982353] ------------------------------- [55715.983301] fs/gfs2/glock.c:1427 suspicious rcu_dereference_protected() usage! The code it refers to is the rht_for_each_entry_safe usage in glock_hash_walk. The condition that triggers the warning is lockdep_rht_bucket_is_held(tbl, hash) which is checked in the __rcu_dereference_protected macro. The rhashtable buckets are not changed in glock_hash_walk so it's safe to rely on the rcu protection. Replace the rht_for_each_entry_safe() usage with rht_for_each_entry_rcu(), which doesn't care whether the bucket lock is held if the rcu read lock is held. Signed-off-by: Andrew Price Signed-off-by: Bob Peterson Acked-by: Steven Whitehouse --- fs/gfs2/glock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 32e74710b1aa..430326e631dc 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1417,14 +1417,14 @@ static struct shrinker glock_shrinker = { static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp) { struct gfs2_glock *gl; - struct rhash_head *pos, *next; + struct rhash_head *pos; const struct bucket_table *tbl; int i; rcu_read_lock(); tbl = rht_dereference_rcu(gl_hash_table.tbl, &gl_hash_table); for (i = 0; i < tbl->size; i++) { - rht_for_each_entry_safe(gl, pos, next, tbl, i, gl_node) { + rht_for_each_entry_rcu(gl, pos, tbl, i, gl_node) { if ((gl->gl_name.ln_sbd == sdp) && lockref_get_not_dead(&gl->gl_lockref)) examiner(gl); From c8d577038449a718ad0027d1790b6ef4441715d4 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 11 Nov 2015 15:00:35 -0600 Subject: [PATCH 04/19] gfs2: Extended attribute readahead When gfs2 allocates an inode and its extended attribute block next to each other at inode create time, the inode's directory entry indicates that in de_rahead. In that case, we can readahead the extended attribute block when we read in the inode. Signed-off-by: Andreas Gruenbacher Signed-off-by: Bob Peterson --- fs/gfs2/dir.c | 15 +++++++++++---- fs/gfs2/incore.h | 1 + fs/gfs2/meta_io.c | 27 +++++++++++++++++++++++++-- fs/gfs2/meta_io.h | 2 +- fs/gfs2/quota.c | 2 +- fs/gfs2/rgrp.c | 2 +- fs/gfs2/super.c | 1 + fs/gfs2/xattr.c | 10 +++++----- 8 files changed, 46 insertions(+), 14 deletions(-) diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index ad8a5b757cc7..c2486598fb87 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -108,7 +108,7 @@ static int gfs2_dir_get_existing_buffer(struct gfs2_inode *ip, u64 block, struct buffer_head *bh; int error; - error = gfs2_meta_read(ip->i_gl, block, DIO_WAIT, &bh); + error = gfs2_meta_read(ip->i_gl, block, DIO_WAIT, 0, &bh); if (error) return error; if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), bh, GFS2_METATYPE_JD)) { @@ -305,7 +305,7 @@ static int gfs2_dir_read_data(struct gfs2_inode *ip, __be64 *buf, BUG_ON(extlen < 1); bh = gfs2_meta_ra(ip->i_gl, dblock, extlen); } else { - error = gfs2_meta_read(ip->i_gl, dblock, DIO_WAIT, &bh); + error = gfs2_meta_read(ip->i_gl, dblock, DIO_WAIT, 0, &bh); if (error) goto fail; } @@ -723,7 +723,7 @@ static int get_leaf(struct gfs2_inode *dip, u64 leaf_no, { int error; - error = gfs2_meta_read(dip->i_gl, leaf_no, DIO_WAIT, bhp); + error = gfs2_meta_read(dip->i_gl, leaf_no, DIO_WAIT, 0, bhp); if (!error && gfs2_metatype_check(GFS2_SB(&dip->i_inode), *bhp, GFS2_METATYPE_LF)) { /* pr_info("block num=%llu\n", leaf_no); */ error = -EIO; @@ -1560,15 +1560,22 @@ struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name, dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh); if (dent) { + struct inode *inode; + u16 rahead; + if (IS_ERR(dent)) return ERR_CAST(dent); dtype = be16_to_cpu(dent->de_type); + rahead = be16_to_cpu(dent->de_rahead); addr = be64_to_cpu(dent->de_inum.no_addr); formal_ino = be64_to_cpu(dent->de_inum.no_formal_ino); brelse(bh); if (fail_on_exist) return ERR_PTR(-EEXIST); - return gfs2_inode_lookup(dir->i_sb, dtype, addr, formal_ino, 0); + inode = gfs2_inode_lookup(dir->i_sb, dtype, addr, formal_ino, 0); + if (!IS_ERR(inode)) + GFS2_I(inode)->i_rahead = rahead; + return inode; } return ERR_PTR(-ENOENT); } diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index de7b4f97ac75..77e778496903 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -402,6 +402,7 @@ struct gfs2_inode { u32 i_diskflags; u8 i_height; u8 i_depth; + u16 i_rahead; }; /* diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 0e1d4be5865a..0f24828f8488 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -187,6 +187,21 @@ struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno) return bh; } +static void gfs2_meta_readahead(struct gfs2_glock *gl, u64 blkno) +{ + struct buffer_head *bh; + + bh = gfs2_getbuf(gl, blkno, 1); + lock_buffer(bh); + if (buffer_uptodate(bh)) { + unlock_buffer(bh); + brelse(bh); + return; + } + bh->b_end_io = end_buffer_read_sync; + submit_bh(READA | REQ_META | REQ_PRIO, bh); +} + /** * gfs2_meta_read - Read a block from disk * @gl: The glock covering the block @@ -198,7 +213,7 @@ struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno) */ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, - struct buffer_head **bhp) + int rahead, struct buffer_head **bhp) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct buffer_head *bh; @@ -213,11 +228,15 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, lock_buffer(bh); if (buffer_uptodate(bh)) { unlock_buffer(bh); + if (rahead) + gfs2_meta_readahead(gl, blkno + 1); return 0; } bh->b_end_io = end_buffer_read_sync; get_bh(bh); submit_bh(READ_SYNC | REQ_META | REQ_PRIO, bh); + if (rahead) + gfs2_meta_readahead(gl, blkno + 1); if (!(flags & DIO_WAIT)) return 0; @@ -341,8 +360,12 @@ int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num, struct buffer_head *bh; int ret = 0; u32 mtype = height ? GFS2_METATYPE_IN : GFS2_METATYPE_DI; + int rahead = 0; - ret = gfs2_meta_read(gl, num, DIO_WAIT, &bh); + if (num == ip->i_no_addr) + rahead = ip->i_rahead; + + ret = gfs2_meta_read(gl, num, DIO_WAIT, rahead, &bh); if (ret == 0 && gfs2_metatype_check(sdp, bh, mtype)) { brelse(bh); ret = -EIO; diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h index 8ca161567a93..c5086c8af5ed 100644 --- a/fs/gfs2/meta_io.h +++ b/fs/gfs2/meta_io.h @@ -53,7 +53,7 @@ static inline struct gfs2_sbd *gfs2_mapping2sbd(struct address_space *mapping) extern struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno); extern int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, - struct buffer_head **bhp); + int rahead, struct buffer_head **bhp); extern int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh); extern struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create); diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 3a31226531ea..e01298d922c0 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -388,7 +388,7 @@ static int bh_get(struct gfs2_quota_data *qd) error = gfs2_block_map(&ip->i_inode, block, &bh_map, 0); if (error) goto fail; - error = gfs2_meta_read(ip->i_gl, bh_map.b_blocknr, DIO_WAIT, &bh); + error = gfs2_meta_read(ip->i_gl, bh_map.b_blocknr, DIO_WAIT, 0, &bh); if (error) goto fail; error = -EIO; diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index c134c0462cee..ac0a65d94a7e 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1158,7 +1158,7 @@ static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) for (x = 0; x < length; x++) { bi = rgd->rd_bits + x; - error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh); + error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, 0, &bi->bi_bh); if (error) goto fail; } diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 894fb01a91da..8f94282db2fe 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1633,6 +1633,7 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb) ip->i_gl = NULL; ip->i_rgd = NULL; ip->i_res = NULL; + ip->i_rahead = 0; } return &ip->i_inode; } diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 4c096fa9e2a1..f0fe88449bd2 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -119,7 +119,7 @@ static int ea_foreach(struct gfs2_inode *ip, ea_call_t ea_call, void *data) __be64 *eablk, *end; int error; - error = gfs2_meta_read(ip->i_gl, ip->i_eattr, DIO_WAIT, &bh); + error = gfs2_meta_read(ip->i_gl, ip->i_eattr, DIO_WAIT, 0, &bh); if (error) return error; @@ -143,7 +143,7 @@ static int ea_foreach(struct gfs2_inode *ip, ea_call_t ea_call, void *data) break; bn = be64_to_cpu(*eablk); - error = gfs2_meta_read(ip->i_gl, bn, DIO_WAIT, &eabh); + error = gfs2_meta_read(ip->i_gl, bn, DIO_WAIT, 0, &eabh); if (error) break; error = ea_foreach_i(ip, eabh, ea_call, data); @@ -477,7 +477,7 @@ static int gfs2_iter_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea, return -ENOMEM; for (x = 0; x < nptrs; x++) { - error = gfs2_meta_read(ip->i_gl, be64_to_cpu(*dataptrs), 0, + error = gfs2_meta_read(ip->i_gl, be64_to_cpu(*dataptrs), 0, 0, bh + x); if (error) { while (x--) @@ -977,7 +977,7 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er, if (ip->i_diskflags & GFS2_DIF_EA_INDIRECT) { __be64 *end; - error = gfs2_meta_read(ip->i_gl, ip->i_eattr, DIO_WAIT, + error = gfs2_meta_read(ip->i_gl, ip->i_eattr, DIO_WAIT, 0, &indbh); if (error) return error; @@ -1303,7 +1303,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip) memset(&rlist, 0, sizeof(struct gfs2_rgrp_list)); - error = gfs2_meta_read(ip->i_gl, ip->i_eattr, DIO_WAIT, &indbh); + error = gfs2_meta_read(ip->i_gl, ip->i_eattr, DIO_WAIT, 0, &indbh); if (error) return error; From 39b0555f7a1f96ecd303103df15596db49c36c65 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 13 Nov 2015 07:44:57 -0600 Subject: [PATCH 05/19] gfs2: Extended attribute readahead optimization Instead of submitting a READ_SYNC bio for the inode and a READA bio for the inode's extended attributes through submit_bh, submit a single READ_SYNC bio for both through submit_bio when possible. This can be more efficient on some kinds of block devices. Signed-off-by: Andreas Gruenbacher Signed-off-by: Bob Peterson --- fs/gfs2/meta_io.c | 83 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 64 insertions(+), 19 deletions(-) diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 0f24828f8488..e137d96f1b17 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -187,19 +187,50 @@ struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno) return bh; } -static void gfs2_meta_readahead(struct gfs2_glock *gl, u64 blkno) +static void gfs2_meta_read_endio(struct bio *bio) { - struct buffer_head *bh; + struct bio_vec *bvec; + int i; - bh = gfs2_getbuf(gl, blkno, 1); - lock_buffer(bh); - if (buffer_uptodate(bh)) { - unlock_buffer(bh); - brelse(bh); - return; + bio_for_each_segment_all(bvec, bio, i) { + struct page *page = bvec->bv_page; + struct buffer_head *bh = page_buffers(page); + unsigned int len = bvec->bv_len; + + while (bh_offset(bh) < bvec->bv_offset) + bh = bh->b_this_page; + do { + struct buffer_head *next = bh->b_this_page; + len -= bh->b_size; + bh->b_end_io(bh, !bio->bi_error); + bh = next; + } while (bh && len); } - bh->b_end_io = end_buffer_read_sync; - submit_bh(READA | REQ_META | REQ_PRIO, bh); + bio_put(bio); +} + +/* + * Submit several consecutive buffer head I/O requests as a single bio I/O + * request. (See submit_bh_wbc.) + */ +static void gfs2_submit_bhs(int rw, struct buffer_head *bhs[], int num) +{ + struct buffer_head *bh = bhs[0]; + struct bio *bio; + int i; + + if (!num) + return; + + bio = bio_alloc(GFP_NOIO, num); + bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); + bio->bi_bdev = bh->b_bdev; + for (i = 0; i < num; i++) { + bh = bhs[i]; + bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh)); + } + bio->bi_end_io = gfs2_meta_read_endio; + submit_bio(rw, bio); } /** @@ -216,7 +247,8 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, int rahead, struct buffer_head **bhp) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - struct buffer_head *bh; + struct buffer_head *bh, *bhs[2]; + int num = 0; if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) { *bhp = NULL; @@ -228,18 +260,31 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, lock_buffer(bh); if (buffer_uptodate(bh)) { unlock_buffer(bh); - if (rahead) - gfs2_meta_readahead(gl, blkno + 1); - return 0; + flags &= ~DIO_WAIT; + } else { + bh->b_end_io = end_buffer_read_sync; + get_bh(bh); + bhs[num++] = bh; } - bh->b_end_io = end_buffer_read_sync; - get_bh(bh); - submit_bh(READ_SYNC | REQ_META | REQ_PRIO, bh); - if (rahead) - gfs2_meta_readahead(gl, blkno + 1); + + if (rahead) { + bh = gfs2_getbuf(gl, blkno + 1, CREATE); + + lock_buffer(bh); + if (buffer_uptodate(bh)) { + unlock_buffer(bh); + brelse(bh); + } else { + bh->b_end_io = end_buffer_read_sync; + bhs[num++] = bh; + } + } + + gfs2_submit_bhs(READ_SYNC | REQ_META | REQ_PRIO, bhs, num); if (!(flags & DIO_WAIT)) return 0; + bh = *bhp; wait_on_buffer(bh); if (unlikely(!buffer_uptodate(bh))) { struct gfs2_trans *tr = current->journal_info; From b54e9a0b92d44843f6719ae22b0f6daf5b9b23b4 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Mon, 26 Oct 2015 10:40:28 -0500 Subject: [PATCH 06/19] GFS2: Extract quota data from reservations structure (revert 5407e24) This patch basically reverts the majority of patch 5407e24. That patch eliminated the gfs2_qadata structure in favor of just using the reservations structure. The problem with doing that is that it increases the size of the reservations structure. That is not an issue until it comes time to fold the reservations structure into the inode in memory so we know it's always there. By separating out the quota structure again, we aren't punishing the non-quota users by making all the inodes bigger, requiring more slab space. This patch creates a new slab area to allocate the quota stuff so it's managed a little more sanely. Signed-off-by: Bob Peterson --- fs/gfs2/aops.c | 2 +- fs/gfs2/bmap.c | 2 +- fs/gfs2/file.c | 10 ++--- fs/gfs2/incore.h | 13 +++--- fs/gfs2/inode.c | 18 ++++---- fs/gfs2/main.c | 11 +++++ fs/gfs2/quota.c | 105 +++++++++++++++++++++++++++++++---------------- fs/gfs2/quota.h | 2 + fs/gfs2/rgrp.c | 17 ++++++-- fs/gfs2/rgrp.h | 4 +- fs/gfs2/super.c | 2 +- fs/gfs2/util.c | 1 + fs/gfs2/util.h | 1 + 13 files changed, 125 insertions(+), 63 deletions(-) diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 1caee0534587..93f07465e5a6 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -914,7 +914,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, failed: gfs2_trans_end(sdp); gfs2_inplace_release(ip); - if (ip->i_res->rs_qa_qd_num) + if (ip->i_qadata && ip->i_qadata->qa_qd_num) gfs2_quota_unlock(ip); if (inode == sdp->sd_rindex) { gfs2_glock_dq(&m_ip->i_gh); diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 61296ecbd0e2..8d46ae4fa873 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1297,7 +1297,7 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize) inode_dio_wait(inode); - ret = gfs2_rs_alloc(ip); + ret = gfs2_rsqa_alloc(ip); if (ret) goto out; diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 201282046693..de001eb27bed 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -401,7 +401,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) if (ret) goto out; - ret = gfs2_rs_alloc(ip); + ret = gfs2_rsqa_alloc(ip); if (ret) goto out_write_access; @@ -623,7 +623,7 @@ static int gfs2_release(struct inode *inode, struct file *file) if (!(file->f_mode & FMODE_WRITE)) return 0; - gfs2_rs_delete(ip, &inode->i_writecount); + gfs2_rsqa_delete(ip, &inode->i_writecount); return 0; } @@ -703,7 +703,7 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct gfs2_inode *ip = GFS2_I(file_inode(file)); int ret; - ret = gfs2_rs_alloc(ip); + ret = gfs2_rsqa_alloc(ip); if (ret) return ret; @@ -938,7 +938,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t le if (ret) goto out_unlock; - ret = gfs2_rs_alloc(ip); + ret = gfs2_rsqa_alloc(ip); if (ret) goto out_putw; @@ -962,7 +962,7 @@ static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe, int error; struct gfs2_inode *ip = GFS2_I(out->f_mapping->host); - error = gfs2_rs_alloc(ip); + error = gfs2_rsqa_alloc(ip); if (error) return (ssize_t)error; diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 77e778496903..6a22f66f058d 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -270,6 +270,13 @@ struct gfs2_holder { /* Number of quota types we support */ #define GFS2_MAXQUOTAS 2 +struct gfs2_qadata { /* quota allocation data */ + /* Quota stuff */ + struct gfs2_quota_data *qa_qd[2 * GFS2_MAXQUOTAS]; + struct gfs2_holder qa_qd_ghs[2 * GFS2_MAXQUOTAS]; + unsigned int qa_qd_num; +}; + /* Resource group multi-block reservation, in order of appearance: Step 1. Function prepares to write, allocates a mb, sets the size hint. @@ -288,11 +295,6 @@ struct gfs2_blkreserv { struct gfs2_rbm rs_rbm; /* Start of reservation */ u32 rs_free; /* how many blocks are still free */ u64 rs_inum; /* Inode number for reservation */ - - /* ancillary quota stuff */ - struct gfs2_quota_data *rs_qa_qd[2 * GFS2_MAXQUOTAS]; - struct gfs2_holder rs_qa_qd_ghs[2 * GFS2_MAXQUOTAS]; - unsigned int rs_qa_qd_num; }; /* @@ -391,6 +393,7 @@ struct gfs2_inode { struct gfs2_glock *i_gl; /* Move into i_gh? */ struct gfs2_holder i_iopen_gh; struct gfs2_holder i_gh; /* for prepare/commit_write only */ + struct gfs2_qadata *i_qadata; /* quota allocation data */ struct gfs2_blkreserv *i_res; /* rgrp multi-block reservation */ struct gfs2_rgrpd *i_rgd; u64 i_goal; /* goal block for allocations */ diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 2c05bc3d1947..c37e6bf2958e 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -601,7 +601,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (!name->len || name->len > GFS2_FNAMESIZE) return -ENAMETOOLONG; - error = gfs2_rs_alloc(dip); + error = gfs2_rsqa_alloc(dip); if (error) return error; @@ -653,7 +653,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, goto fail_free_vfs_inode; ip = GFS2_I(inode); - error = gfs2_rs_alloc(ip); + error = gfs2_rsqa_alloc(ip); if (error) goto fail_free_acls; @@ -781,7 +781,7 @@ fail_gunlock2: fail_free_inode: if (ip->i_gl) gfs2_glock_put(ip->i_gl); - gfs2_rs_delete(ip, NULL); + gfs2_rsqa_delete(ip, NULL); fail_free_acls: if (default_acl) posix_acl_release(default_acl); @@ -903,7 +903,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, if (S_ISDIR(inode->i_mode)) return -EPERM; - error = gfs2_rs_alloc(dip); + error = gfs2_rsqa_alloc(dip); if (error) return error; @@ -1376,7 +1376,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, if (error) return error; - error = gfs2_rs_alloc(ndip); + error = gfs2_rsqa_alloc(ndip); if (error) return error; @@ -1863,7 +1863,7 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) if (error) return error; - error = gfs2_rs_alloc(ip); + error = gfs2_rsqa_alloc(ip); if (error) goto out; @@ -1925,7 +1925,7 @@ static int gfs2_setattr(struct dentry *dentry, struct iattr *attr) struct gfs2_holder i_gh; int error; - error = gfs2_rs_alloc(ip); + error = gfs2_rsqa_alloc(ip); if (error) return error; @@ -2007,7 +2007,7 @@ static int gfs2_setxattr(struct dentry *dentry, const char *name, gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); ret = gfs2_glock_nq(&gh); if (ret == 0) { - ret = gfs2_rs_alloc(ip); + ret = gfs2_rsqa_alloc(ip); if (ret == 0) ret = generic_setxattr(dentry, name, data, size, flags); gfs2_glock_dq(&gh); @@ -2048,7 +2048,7 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name) gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); ret = gfs2_glock_nq(&gh); if (ret == 0) { - ret = gfs2_rs_alloc(ip); + ret = gfs2_rsqa_alloc(ip); if (ret == 0) ret = generic_removexattr(dentry, name); gfs2_glock_dq(&gh); diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index fb2b42cf46b5..cde5c73c42df 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -41,6 +41,7 @@ static void gfs2_init_inode_once(void *foo) inode_init_once(&ip->i_inode); init_rwsem(&ip->i_rw_mutex); INIT_LIST_HEAD(&ip->i_trunc_list); + ip->i_qadata = NULL; ip->i_res = NULL; ip->i_hash_cache = NULL; } @@ -135,6 +136,12 @@ static int __init init_gfs2_fs(void) if (!gfs2_quotad_cachep) goto fail; + gfs2_qadata_cachep = kmem_cache_create("gfs2_qadata", + sizeof(struct gfs2_qadata), + 0, 0, NULL); + if (!gfs2_qadata_cachep) + goto fail; + gfs2_rsrv_cachep = kmem_cache_create("gfs2_mblk", sizeof(struct gfs2_blkreserv), 0, 0, NULL); @@ -196,6 +203,9 @@ fail_lru: if (gfs2_rsrv_cachep) kmem_cache_destroy(gfs2_rsrv_cachep); + if (gfs2_qadata_cachep) + kmem_cache_destroy(gfs2_qadata_cachep); + if (gfs2_quotad_cachep) kmem_cache_destroy(gfs2_quotad_cachep); @@ -239,6 +249,7 @@ static void __exit exit_gfs2_fs(void) mempool_destroy(gfs2_page_pool); kmem_cache_destroy(gfs2_rsrv_cachep); + kmem_cache_destroy(gfs2_qadata_cachep); kmem_cache_destroy(gfs2_quotad_cachep); kmem_cache_destroy(gfs2_rgrpd_cachep); kmem_cache_destroy(gfs2_bufdata_cachep); diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index e01298d922c0..b845efdb5e3a 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -527,37 +527,70 @@ static void qdsb_put(struct gfs2_quota_data *qd) qd_put(qd); } +/** + * gfs2_qa_alloc - make sure we have a quota allocations data structure, + * if necessary + * @ip: the inode for this reservation + */ +int gfs2_qa_alloc(struct gfs2_inode *ip) +{ + int error = 0; + struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + + if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) + return 0; + + down_write(&ip->i_rw_mutex); + if (ip->i_qadata == NULL) { + ip->i_qadata = kmem_cache_zalloc(gfs2_qadata_cachep, GFP_NOFS); + if (!ip->i_qadata) + error = -ENOMEM; + } + up_write(&ip->i_rw_mutex); + return error; +} + +void gfs2_qa_delete(struct gfs2_inode *ip) +{ + down_write(&ip->i_rw_mutex); + if (ip->i_qadata) { + kmem_cache_free(gfs2_qadata_cachep, ip->i_qadata); + ip->i_qadata = NULL; + } + up_write(&ip->i_rw_mutex); +} + int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_quota_data **qd; int error; - if (ip->i_res == NULL) { - error = gfs2_rs_alloc(ip); + if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) + return 0; + + if (ip->i_qadata == NULL) { + error = gfs2_rsqa_alloc(ip); if (error) return error; } - qd = ip->i_res->rs_qa_qd; + qd = ip->i_qadata->qa_qd; - if (gfs2_assert_warn(sdp, !ip->i_res->rs_qa_qd_num) || + if (gfs2_assert_warn(sdp, !ip->i_qadata->qa_qd_num) || gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags))) return -EIO; - if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) - return 0; - error = qdsb_get(sdp, make_kqid_uid(ip->i_inode.i_uid), qd); if (error) goto out; - ip->i_res->rs_qa_qd_num++; + ip->i_qadata->qa_qd_num++; qd++; error = qdsb_get(sdp, make_kqid_gid(ip->i_inode.i_gid), qd); if (error) goto out; - ip->i_res->rs_qa_qd_num++; + ip->i_qadata->qa_qd_num++; qd++; if (!uid_eq(uid, NO_UID_QUOTA_CHANGE) && @@ -565,7 +598,7 @@ int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) error = qdsb_get(sdp, make_kqid_uid(uid), qd); if (error) goto out; - ip->i_res->rs_qa_qd_num++; + ip->i_qadata->qa_qd_num++; qd++; } @@ -574,7 +607,7 @@ int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) error = qdsb_get(sdp, make_kqid_gid(gid), qd); if (error) goto out; - ip->i_res->rs_qa_qd_num++; + ip->i_qadata->qa_qd_num++; qd++; } @@ -589,15 +622,15 @@ void gfs2_quota_unhold(struct gfs2_inode *ip) struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); unsigned int x; - if (ip->i_res == NULL) + if (ip->i_qadata == NULL) return; gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags)); - for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) { - qdsb_put(ip->i_res->rs_qa_qd[x]); - ip->i_res->rs_qa_qd[x] = NULL; + for (x = 0; x < ip->i_qadata->qa_qd_num; x++) { + qdsb_put(ip->i_qadata->qa_qd[x]); + ip->i_qadata->qa_qd[x] = NULL; } - ip->i_res->rs_qa_qd_num = 0; + ip->i_qadata->qa_qd_num = 0; } static int sort_qd(const void *a, const void *b) @@ -843,7 +876,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) unsigned int nalloc = 0, blocks; int error; - error = gfs2_rs_alloc(ip); + error = gfs2_rsqa_alloc(ip); if (error) return error; @@ -1006,20 +1039,20 @@ int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) unsigned int x; int error = 0; - error = gfs2_quota_hold(ip, uid, gid); - if (error) - return error; - if (capable(CAP_SYS_RESOURCE) || sdp->sd_args.ar_quota != GFS2_QUOTA_ON) return 0; - sort(ip->i_res->rs_qa_qd, ip->i_res->rs_qa_qd_num, + error = gfs2_quota_hold(ip, uid, gid); + if (error) + return error; + + sort(ip->i_qadata->qa_qd, ip->i_qadata->qa_qd_num, sizeof(struct gfs2_quota_data *), sort_qd, NULL); - for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) { - qd = ip->i_res->rs_qa_qd[x]; - error = do_glock(qd, NO_FORCE, &ip->i_res->rs_qa_qd_ghs[x]); + for (x = 0; x < ip->i_qadata->qa_qd_num; x++) { + qd = ip->i_qadata->qa_qd[x]; + error = do_glock(qd, NO_FORCE, &ip->i_qadata->qa_qd_ghs[x]); if (error) break; } @@ -1028,7 +1061,7 @@ int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) set_bit(GIF_QD_LOCKED, &ip->i_flags); else { while (x--) - gfs2_glock_dq_uninit(&ip->i_res->rs_qa_qd_ghs[x]); + gfs2_glock_dq_uninit(&ip->i_qadata->qa_qd_ghs[x]); gfs2_quota_unhold(ip); } @@ -1082,14 +1115,14 @@ void gfs2_quota_unlock(struct gfs2_inode *ip) if (!test_and_clear_bit(GIF_QD_LOCKED, &ip->i_flags)) goto out; - for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) { + for (x = 0; x < ip->i_qadata->qa_qd_num; x++) { struct gfs2_quota_data *qd; int sync; - qd = ip->i_res->rs_qa_qd[x]; + qd = ip->i_qadata->qa_qd[x]; sync = need_sync(qd); - gfs2_glock_dq_uninit(&ip->i_res->rs_qa_qd_ghs[x]); + gfs2_glock_dq_uninit(&ip->i_qadata->qa_qd_ghs[x]); if (!sync) continue; @@ -1168,8 +1201,8 @@ int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid, if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON) return 0; - for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) { - qd = ip->i_res->rs_qa_qd[x]; + for (x = 0; x < ip->i_qadata->qa_qd_num; x++) { + qd = ip->i_qadata->qa_qd[x]; if (!(qid_eq(qd->qd_id, make_kqid_uid(uid)) || qid_eq(qd->qd_id, make_kqid_gid(gid)))) @@ -1217,14 +1250,16 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change, { struct gfs2_quota_data *qd; unsigned int x; + struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - if (gfs2_assert_warn(GFS2_SB(&ip->i_inode), change)) + if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON || + gfs2_assert_warn(sdp, change)) return; if (ip->i_diskflags & GFS2_DIF_SYSTEM) return; - for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) { - qd = ip->i_res->rs_qa_qd[x]; + for (x = 0; x < ip->i_qadata->qa_qd_num; x++) { + qd = ip->i_qadata->qa_qd[x]; if (qid_eq(qd->qd_id, make_kqid_uid(uid)) || qid_eq(qd->qd_id, make_kqid_gid(gid))) { @@ -1635,7 +1670,7 @@ static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid, if (error) return error; - error = gfs2_rs_alloc(ip); + error = gfs2_rsqa_alloc(ip); if (error) goto out_put; diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index ad04b3acae2b..1940dd9cb1c7 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h @@ -18,6 +18,8 @@ struct gfs2_sbd; #define NO_UID_QUOTA_CHANGE INVALID_UID #define NO_GID_QUOTA_CHANGE INVALID_GID +extern int gfs2_qa_alloc(struct gfs2_inode *ip); +extern void gfs2_qa_delete(struct gfs2_inode *ip); extern int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid); extern void gfs2_quota_unhold(struct gfs2_inode *ip); diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index ac0a65d94a7e..cb30748e7b19 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -596,10 +596,11 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd) } /** - * gfs2_rs_alloc - make sure we have a reservation assigned to the inode + * gfs2_rsqa_alloc - make sure we have a reservation assigned to the inode + * plus a quota allocations data structure, if necessary * @ip: the inode for this reservation */ -int gfs2_rs_alloc(struct gfs2_inode *ip) +int gfs2_rsqa_alloc(struct gfs2_inode *ip) { int error = 0; @@ -614,6 +615,12 @@ int gfs2_rs_alloc(struct gfs2_inode *ip) } RB_CLEAR_NODE(&ip->i_res->rs_node); + error = gfs2_qa_alloc(ip); + if (error) { + kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); + ip->i_res = NULL; + } + out: up_write(&ip->i_rw_mutex); return error; @@ -678,12 +685,12 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs) } /** - * gfs2_rs_delete - delete a multi-block reservation + * gfs2_rsqa_delete - delete a multi-block reservation and quota allocation * @ip: The inode for this reservation * @wcount: The inode's write count, or NULL * */ -void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount) +void gfs2_rsqa_delete(struct gfs2_inode *ip, atomic_t *wcount) { down_write(&ip->i_rw_mutex); if (ip->i_res && ((wcount == NULL) || (atomic_read(wcount) <= 1))) { @@ -691,6 +698,8 @@ void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount) BUG_ON(ip->i_res->rs_free); kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); ip->i_res = NULL; + + gfs2_qa_delete(ip); } up_write(&ip->i_rw_mutex); } diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index c0ab33fa3eed..06bbefaabc31 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -49,9 +49,9 @@ extern void gfs2_inplace_release(struct gfs2_inode *ip); extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, bool dinode, u64 *generation); -extern int gfs2_rs_alloc(struct gfs2_inode *ip); +extern int gfs2_rsqa_alloc(struct gfs2_inode *ip); extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs); -extern void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount); +extern void gfs2_rsqa_delete(struct gfs2_inode *ip, atomic_t *wcount); extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta); extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 8f94282db2fe..b030ca223067 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1607,7 +1607,7 @@ out_unlock: out: /* Case 3 starts here */ truncate_inode_pages_final(&inode->i_data); - gfs2_rs_delete(ip, NULL); + gfs2_rsqa_delete(ip, NULL); gfs2_ordered_del_inode(ip); clear_inode(inode); gfs2_dir_hash_inval(ip); diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 86d2035ac669..3b4819d8bdd6 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -27,6 +27,7 @@ struct kmem_cache *gfs2_inode_cachep __read_mostly; struct kmem_cache *gfs2_bufdata_cachep __read_mostly; struct kmem_cache *gfs2_rgrpd_cachep __read_mostly; struct kmem_cache *gfs2_quotad_cachep __read_mostly; +struct kmem_cache *gfs2_qadata_cachep __read_mostly; struct kmem_cache *gfs2_rsrv_cachep __read_mostly; mempool_t *gfs2_page_pool __read_mostly; diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index cbdcbdf39614..9edbcc94bdf6 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h @@ -149,6 +149,7 @@ extern struct kmem_cache *gfs2_inode_cachep; extern struct kmem_cache *gfs2_bufdata_cachep; extern struct kmem_cache *gfs2_rgrpd_cachep; extern struct kmem_cache *gfs2_quotad_cachep; +extern struct kmem_cache *gfs2_qadata_cachep; extern struct kmem_cache *gfs2_rsrv_cachep; extern mempool_t *gfs2_page_pool; From a097dc7e24cba7980bc5e2df461a4ef228e97e59 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 16 Jul 2015 08:28:04 -0500 Subject: [PATCH 07/19] GFS2: Make rgrp reservations part of the gfs2_inode structure Before this patch, multi-block reservation structures were allocated from a special slab. This patch folds the structure into the gfs2_inode structure. The disadvantage is that the gfs2_inode needs more memory, even when a file is opened read-only. The advantages are: (a) we don't need the special slab and the extra time it takes to allocate and deallocate from it. (b) we no longer need to worry that the structure exists for things like quota management. (c) This also allows us to remove the calls to get_write_access and put_write_access since we know the structure will exist. Signed-off-by: Bob Peterson --- fs/gfs2/bmap.c | 11 +++------- fs/gfs2/file.c | 15 +++++--------- fs/gfs2/incore.h | 2 +- fs/gfs2/inode.c | 5 ----- fs/gfs2/main.c | 13 ++---------- fs/gfs2/quota.c | 4 ++-- fs/gfs2/quota.h | 2 +- fs/gfs2/rgrp.c | 52 +++++++++++++----------------------------------- fs/gfs2/rgrp.h | 2 +- fs/gfs2/super.c | 7 ++++--- fs/gfs2/util.c | 1 - fs/gfs2/util.h | 1 - 12 files changed, 33 insertions(+), 82 deletions(-) diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 8d46ae4fa873..0860f0b5b3f1 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -787,8 +787,8 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, if (error) goto out_rlist; - if (gfs2_rs_active(ip->i_res)) /* needs to be done with the rgrp glock held */ - gfs2_rs_deltree(ip->i_res); + if (gfs2_rs_active(&ip->i_res)) /* needs to be done with the rgrp glock held */ + gfs2_rs_deltree(&ip->i_res); error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE + RES_INDIRECT + RES_STATFS + RES_QUOTA, @@ -1291,10 +1291,6 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize) if (ret) return ret; - ret = get_write_access(inode); - if (ret) - return ret; - inode_dio_wait(inode); ret = gfs2_rsqa_alloc(ip); @@ -1307,10 +1303,9 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize) goto out; } - gfs2_rs_deltree(ip->i_res); ret = do_shrink(inode, oldsize, newsize); out: - put_write_access(inode); + gfs2_rsqa_delete(ip, NULL); return ret; } diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index de001eb27bed..3ead27d64bf0 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -336,8 +336,8 @@ static void gfs2_size_hint(struct file *filep, loff_t offset, size_t size) size_t blks = (size + sdp->sd_sb.sb_bsize - 1) >> sdp->sd_sb.sb_bsize_shift; int hint = min_t(size_t, INT_MAX, blks); - if (hint > atomic_read(&ip->i_res->rs_sizehint)) - atomic_set(&ip->i_res->rs_sizehint, hint); + if (hint > atomic_read(&ip->i_res.rs_sizehint)) + atomic_set(&ip->i_res.rs_sizehint, hint); } /** @@ -397,13 +397,9 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) /* Update file times before taking page lock */ file_update_time(vma->vm_file); - ret = get_write_access(inode); - if (ret) - goto out; - ret = gfs2_rsqa_alloc(ip); if (ret) - goto out_write_access; + goto out; gfs2_size_hint(vma->vm_file, pos, PAGE_CACHE_SIZE); @@ -486,8 +482,6 @@ out_uninit: set_page_dirty(page); wait_for_stable_page(page); } -out_write_access: - put_write_access(inode); out: sb_end_pagefault(inode->i_sb); return block_page_mkwrite_return(ret); @@ -944,7 +938,8 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t le ret = __gfs2_fallocate(file, mode, offset, len); if (ret) - gfs2_rs_deltree(ip->i_res); + gfs2_rs_deltree(&ip->i_res); + out_putw: put_write_access(inode); out_unlock: diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 6a22f66f058d..25d0f12aaec5 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -394,7 +394,7 @@ struct gfs2_inode { struct gfs2_holder i_iopen_gh; struct gfs2_holder i_gh; /* for prepare/commit_write only */ struct gfs2_qadata *i_qadata; /* quota allocation data */ - struct gfs2_blkreserv *i_res; /* rgrp multi-block reservation */ + struct gfs2_blkreserv i_res; /* rgrp multi-block reservation */ struct gfs2_rgrpd *i_rgd; u64 i_goal; /* goal block for allocations */ struct rw_semaphore i_rw_mutex; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index c37e6bf2958e..a8ce2e99cf5d 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1859,10 +1859,6 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) if (!(attr->ia_valid & ATTR_GID) || gid_eq(ogid, ngid)) ogid = ngid = NO_GID_QUOTA_CHANGE; - error = get_write_access(inode); - if (error) - return error; - error = gfs2_rsqa_alloc(ip); if (error) goto out; @@ -1903,7 +1899,6 @@ out_end_trans: out_gunlock_q: gfs2_quota_unlock(ip); out: - put_write_access(inode); return error; } diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index cde5c73c42df..1d709d496364 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -42,7 +42,8 @@ static void gfs2_init_inode_once(void *foo) init_rwsem(&ip->i_rw_mutex); INIT_LIST_HEAD(&ip->i_trunc_list); ip->i_qadata = NULL; - ip->i_res = NULL; + memset(&ip->i_res, 0, sizeof(ip->i_res)); + RB_CLEAR_NODE(&ip->i_res.rs_node); ip->i_hash_cache = NULL; } @@ -142,12 +143,6 @@ static int __init init_gfs2_fs(void) if (!gfs2_qadata_cachep) goto fail; - gfs2_rsrv_cachep = kmem_cache_create("gfs2_mblk", - sizeof(struct gfs2_blkreserv), - 0, 0, NULL); - if (!gfs2_rsrv_cachep) - goto fail; - register_shrinker(&gfs2_qd_shrinker); error = register_filesystem(&gfs2_fs_type); @@ -200,9 +195,6 @@ fail_lru: unregister_shrinker(&gfs2_qd_shrinker); gfs2_glock_exit(); - if (gfs2_rsrv_cachep) - kmem_cache_destroy(gfs2_rsrv_cachep); - if (gfs2_qadata_cachep) kmem_cache_destroy(gfs2_qadata_cachep); @@ -248,7 +240,6 @@ static void __exit exit_gfs2_fs(void) rcu_barrier(); mempool_destroy(gfs2_page_pool); - kmem_cache_destroy(gfs2_rsrv_cachep); kmem_cache_destroy(gfs2_qadata_cachep); kmem_cache_destroy(gfs2_quotad_cachep); kmem_cache_destroy(gfs2_rgrpd_cachep); diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index b845efdb5e3a..63a72109976c 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -550,10 +550,10 @@ int gfs2_qa_alloc(struct gfs2_inode *ip) return error; } -void gfs2_qa_delete(struct gfs2_inode *ip) +void gfs2_qa_delete(struct gfs2_inode *ip, atomic_t *wcount) { down_write(&ip->i_rw_mutex); - if (ip->i_qadata) { + if (ip->i_qadata && ((wcount == NULL) || (atomic_read(wcount) <= 1))) { kmem_cache_free(gfs2_qadata_cachep, ip->i_qadata); ip->i_qadata = NULL; } diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index 1940dd9cb1c7..5e47c935a515 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h @@ -19,7 +19,7 @@ struct gfs2_sbd; #define NO_GID_QUOTA_CHANGE INVALID_GID extern int gfs2_qa_alloc(struct gfs2_inode *ip); -extern void gfs2_qa_delete(struct gfs2_inode *ip); +extern void gfs2_qa_delete(struct gfs2_inode *ip, atomic_t *wcount); extern int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid); extern void gfs2_quota_unhold(struct gfs2_inode *ip); diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index cb30748e7b19..b879925ce134 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -602,28 +602,7 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd) */ int gfs2_rsqa_alloc(struct gfs2_inode *ip) { - int error = 0; - - down_write(&ip->i_rw_mutex); - if (ip->i_res) - goto out; - - ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS); - if (!ip->i_res) { - error = -ENOMEM; - goto out; - } - - RB_CLEAR_NODE(&ip->i_res->rs_node); - error = gfs2_qa_alloc(ip); - if (error) { - kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); - ip->i_res = NULL; - } - -out: - up_write(&ip->i_rw_mutex); - return error; + return gfs2_qa_alloc(ip); } static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs) @@ -693,15 +672,12 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs) void gfs2_rsqa_delete(struct gfs2_inode *ip, atomic_t *wcount) { down_write(&ip->i_rw_mutex); - if (ip->i_res && ((wcount == NULL) || (atomic_read(wcount) <= 1))) { - gfs2_rs_deltree(ip->i_res); - BUG_ON(ip->i_res->rs_free); - kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); - ip->i_res = NULL; - - gfs2_qa_delete(ip); + if ((wcount == NULL) || (atomic_read(wcount) <= 1)) { + gfs2_rs_deltree(&ip->i_res); + BUG_ON(ip->i_res.rs_free); } up_write(&ip->i_rw_mutex); + gfs2_qa_delete(ip, wcount); } /** @@ -1465,7 +1441,7 @@ static void rs_insert(struct gfs2_inode *ip) { struct rb_node **newn, *parent = NULL; int rc; - struct gfs2_blkreserv *rs = ip->i_res; + struct gfs2_blkreserv *rs = &ip->i_res; struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd; u64 fsblock = gfs2_rbm_to_block(&rs->rs_rbm); @@ -1512,7 +1488,7 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, { struct gfs2_rbm rbm = { .rgd = rgd, }; u64 goal; - struct gfs2_blkreserv *rs = ip->i_res; + struct gfs2_blkreserv *rs = &ip->i_res; u32 extlen; u32 free_blocks = rgd->rd_free_clone - rgd->rd_reserved; int ret; @@ -1583,7 +1559,7 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, } if (n) { - while ((rs_cmp(block, length, rs) == 0) && (ip->i_res != rs)) { + while ((rs_cmp(block, length, rs) == 0) && (&ip->i_res != rs)) { block = gfs2_rbm_to_block(&rs->rs_rbm) + rs->rs_free; n = n->rb_right; if (n == NULL) @@ -1993,7 +1969,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_rgrpd *begin = NULL; - struct gfs2_blkreserv *rs = ip->i_res; + struct gfs2_blkreserv *rs = &ip->i_res; int error = 0, rg_locked, flags = 0; u64 last_unlinked = NO_BLOCK; int loops = 0; @@ -2122,7 +2098,7 @@ next_rgrp: void gfs2_inplace_release(struct gfs2_inode *ip) { - struct gfs2_blkreserv *rs = ip->i_res; + struct gfs2_blkreserv *rs = &ip->i_res; if (rs->rs_rgd_gh.gh_gl) gfs2_glock_dq_uninit(&rs->rs_rgd_gh); @@ -2276,7 +2252,7 @@ static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd) static void gfs2_adjust_reservation(struct gfs2_inode *ip, const struct gfs2_rbm *rbm, unsigned len) { - struct gfs2_blkreserv *rs = ip->i_res; + struct gfs2_blkreserv *rs = &ip->i_res; struct gfs2_rgrpd *rgd = rbm->rgd; unsigned rlen; u64 block; @@ -2319,8 +2295,8 @@ static void gfs2_set_alloc_start(struct gfs2_rbm *rbm, { u64 goal; - if (gfs2_rs_active(ip->i_res)) { - *rbm = ip->i_res->rs_rbm; + if (gfs2_rs_active(&ip->i_res)) { + *rbm = ip->i_res.rs_rbm; return; } @@ -2374,7 +2350,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, gfs2_alloc_extent(&rbm, dinode, nblocks); block = gfs2_rbm_to_block(&rbm); rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0; - if (gfs2_rs_active(ip->i_res)) + if (gfs2_rs_active(&ip->i_res)) gfs2_adjust_reservation(ip, &rbm, *nblocks); ndata = *nblocks; if (dinode) diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index 06bbefaabc31..66b51cf66dfa 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -78,7 +78,7 @@ extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, extern int gfs2_fitrim(struct file *filp, void __user *argp); /* This is how to tell if a reservation is in the rgrp tree: */ -static inline bool gfs2_rs_active(struct gfs2_blkreserv *rs) +static inline bool gfs2_rs_active(const struct gfs2_blkreserv *rs) { return rs && !RB_EMPTY_NODE(&rs->rs_node); } diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index b030ca223067..64f03c821b5d 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1593,8 +1593,8 @@ out_truncate: out_unlock: /* Error path for case 1 */ - if (gfs2_rs_active(ip->i_res)) - gfs2_rs_deltree(ip->i_res); + if (gfs2_rs_active(&ip->i_res)) + gfs2_rs_deltree(&ip->i_res); if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) { ip->i_iopen_gh.gh_flags |= GL_NOCACHE; @@ -1632,7 +1632,8 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb) ip->i_flags = 0; ip->i_gl = NULL; ip->i_rgd = NULL; - ip->i_res = NULL; + memset(&ip->i_res, 0, sizeof(ip->i_res)); + RB_CLEAR_NODE(&ip->i_res.rs_node); ip->i_rahead = 0; } return &ip->i_inode; diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 3b4819d8bdd6..cf645835710f 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -28,7 +28,6 @@ struct kmem_cache *gfs2_bufdata_cachep __read_mostly; struct kmem_cache *gfs2_rgrpd_cachep __read_mostly; struct kmem_cache *gfs2_quotad_cachep __read_mostly; struct kmem_cache *gfs2_qadata_cachep __read_mostly; -struct kmem_cache *gfs2_rsrv_cachep __read_mostly; mempool_t *gfs2_page_pool __read_mostly; void gfs2_assert_i(struct gfs2_sbd *sdp) diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index 9edbcc94bdf6..c81295f407f6 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h @@ -150,7 +150,6 @@ extern struct kmem_cache *gfs2_bufdata_cachep; extern struct kmem_cache *gfs2_rgrpd_cachep; extern struct kmem_cache *gfs2_quotad_cachep; extern struct kmem_cache *gfs2_qadata_cachep; -extern struct kmem_cache *gfs2_rsrv_cachep; extern mempool_t *gfs2_page_pool; static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt, From b58bf407ca4669a2dfcc00f5888ee719d9c34150 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Fri, 24 Jul 2015 09:45:43 -0500 Subject: [PATCH 08/19] GFS2: Reduce size of incore inode This patch makes no functional changes. Its goal is to reduce the size of the gfs2 inode in memory by rearranging structures and changing the size of some variables within the structure. Signed-off-by: Bob Peterson --- fs/gfs2/file.c | 2 +- fs/gfs2/glock.c | 10 +++++----- fs/gfs2/glock.h | 26 +++++++++++++------------- fs/gfs2/incore.h | 4 ++-- fs/gfs2/quota.c | 10 +++++----- 5 files changed, 26 insertions(+), 26 deletions(-) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 3ead27d64bf0..860408053c95 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -1013,7 +1013,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl) struct gfs2_inode *ip = GFS2_I(file_inode(file)); struct gfs2_glock *gl; unsigned int state; - int flags; + u16 flags; int error = 0; int sleeptime; diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 430326e631dc..68484ef89a2c 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -446,7 +446,7 @@ __acquires(&gl->gl_lockref.lock) { const struct gfs2_glock_operations *glops = gl->gl_ops; struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - unsigned int lck_flags = gh ? gh->gh_flags : 0; + unsigned int lck_flags = (unsigned int)(gh ? gh->gh_flags : 0); int ret; lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP | @@ -750,7 +750,7 @@ again: * */ -void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, +void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, u16 flags, struct gfs2_holder *gh) { INIT_LIST_HEAD(&gh->gh_list); @@ -774,7 +774,7 @@ void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, * */ -void gfs2_holder_reinit(unsigned int state, unsigned flags, struct gfs2_holder *gh) +void gfs2_holder_reinit(unsigned int state, u16 flags, struct gfs2_holder *gh) { gh->gh_state = state; gh->gh_flags = flags; @@ -1080,7 +1080,7 @@ void gfs2_glock_dq_uninit(struct gfs2_holder *gh) int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number, const struct gfs2_glock_operations *glops, - unsigned int state, int flags, struct gfs2_holder *gh) + unsigned int state, u16 flags, struct gfs2_holder *gh) { struct gfs2_glock *gl; int error; @@ -1539,7 +1539,7 @@ static const char *state2str(unsigned state) return "??"; } -static const char *hflags2str(char *buf, unsigned flags, unsigned long iflags) +static const char *hflags2str(char *buf, u16 flags, unsigned long iflags) { char *p = buf; if (flags & LM_FLAG_TRY) diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index f7cdaa8b4c83..46ab67fc16da 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -79,15 +79,15 @@ enum { * requested had acquired and released the lock. */ -#define LM_FLAG_TRY 0x00000001 -#define LM_FLAG_TRY_1CB 0x00000002 -#define LM_FLAG_NOEXP 0x00000004 -#define LM_FLAG_ANY 0x00000008 -#define LM_FLAG_PRIORITY 0x00000010 -#define GL_ASYNC 0x00000040 -#define GL_EXACT 0x00000080 -#define GL_SKIP 0x00000100 -#define GL_NOCACHE 0x00000400 +#define LM_FLAG_TRY 0x0001 +#define LM_FLAG_TRY_1CB 0x0002 +#define LM_FLAG_NOEXP 0x0004 +#define LM_FLAG_ANY 0x0008 +#define LM_FLAG_PRIORITY 0x0010 +#define GL_ASYNC 0x0040 +#define GL_EXACT 0x0080 +#define GL_SKIP 0x0100 +#define GL_NOCACHE 0x0400 /* * lm_async_cb return flags @@ -183,8 +183,8 @@ extern int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, int create, struct gfs2_glock **glp); extern void gfs2_glock_put(struct gfs2_glock *gl); extern void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, - unsigned flags, struct gfs2_holder *gh); -extern void gfs2_holder_reinit(unsigned int state, unsigned flags, + u16 flags, struct gfs2_holder *gh); +extern void gfs2_holder_reinit(unsigned int state, u16 flags, struct gfs2_holder *gh); extern void gfs2_holder_uninit(struct gfs2_holder *gh); extern int gfs2_glock_nq(struct gfs2_holder *gh); @@ -195,7 +195,7 @@ extern void gfs2_glock_dq_wait(struct gfs2_holder *gh); extern void gfs2_glock_dq_uninit(struct gfs2_holder *gh); extern int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number, const struct gfs2_glock_operations *glops, - unsigned int state, int flags, + unsigned int state, u16 flags, struct gfs2_holder *gh); extern int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs); extern void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); @@ -215,7 +215,7 @@ void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...); */ static inline int gfs2_glock_nq_init(struct gfs2_glock *gl, - unsigned int state, int flags, + unsigned int state, u16 flags, struct gfs2_holder *gh) { int error; diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 25d0f12aaec5..921304e1d785 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -259,8 +259,8 @@ struct gfs2_holder { struct gfs2_glock *gh_gl; struct pid *gh_owner_pid; - unsigned int gh_state; - unsigned gh_flags; + u16 gh_flags; + u16 gh_state; int gh_error; unsigned long gh_iflags; /* HIF_... */ diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 63a72109976c..be6d9c450b22 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -620,7 +620,7 @@ out: void gfs2_quota_unhold(struct gfs2_inode *ip) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - unsigned int x; + u32 x; if (ip->i_qadata == NULL) return; @@ -1036,7 +1036,7 @@ int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_quota_data *qd; - unsigned int x; + u32 x; int error = 0; if (capable(CAP_SYS_RESOURCE) || @@ -1109,7 +1109,7 @@ void gfs2_quota_unlock(struct gfs2_inode *ip) struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_quota_data *qda[4]; unsigned int count = 0; - unsigned int x; + u32 x; int found; if (!test_and_clear_bit(GIF_QD_LOCKED, &ip->i_flags)) @@ -1191,7 +1191,7 @@ int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid, struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_quota_data *qd; s64 value, warn, limit; - unsigned int x; + u32 x; int error = 0; ap->allowed = UINT_MAX; /* Assume we are permitted a whole lot */ @@ -1249,7 +1249,7 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change, kuid_t uid, kgid_t gid) { struct gfs2_quota_data *qd; - unsigned int x; + u32 x; struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON || From 901c6c665b1024ea2bbabc24ba609a118459a2d8 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 11 Mar 2015 09:52:31 -0500 Subject: [PATCH 09/19] GFS2: Update master statfs buffer with sd_statfs_spin locked Before this patch, function update_statfs called gfs2_statfs_change_out to update the master statfs buffer without the sd_statfs_spin held. In theory, another process could call gfs2_statfs_sync, which takes the sd_statfs_spin lock and re-reads m_sc from the buffer. So there's a theoretical timing window in which one process could write the master statfs buffer, then another comes along and re-reads it, wiping out the changes. Signed-off-by: Bob Peterson --- fs/gfs2/super.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 64f03c821b5d..03fa155f703e 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -556,6 +556,7 @@ void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh, struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; gfs2_trans_add_meta(l_ip->i_gl, l_bh); + gfs2_trans_add_meta(m_ip->i_gl, m_bh); spin_lock(&sdp->sd_statfs_spin); m_sc->sc_total += l_sc->sc_total; @@ -564,10 +565,8 @@ void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh, memset(l_sc, 0, sizeof(struct gfs2_statfs_change)); memset(l_bh->b_data + sizeof(struct gfs2_dinode), 0, sizeof(struct gfs2_statfs_change)); - spin_unlock(&sdp->sd_statfs_spin); - - gfs2_trans_add_meta(m_ip->i_gl, m_bh); gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode)); + spin_unlock(&sdp->sd_statfs_spin); } int gfs2_statfs_sync(struct super_block *sb, int type) From 2aba1b5b4f78d56a764b92bae58298ad3fffdc4a Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 19 May 2015 09:11:23 -0500 Subject: [PATCH 10/19] GFS2: Reintroduce a timeout in function gfs2_gl_hash_clear At some point in the past, we used to have a timeout when GFS2 was unmounting, trying to clear out its glocks. If the timeout expires, it would dump the remaining glocks to the kernel messages so that developers can debug the problem. That timeout was eliminated, probably by accident. This patch reintroduces it. Signed-off-by: Bob Peterson --- fs/gfs2/glock.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 68484ef89a2c..a4ff7b56f5cd 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1506,7 +1506,9 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) flush_workqueue(glock_workqueue); glock_hash_walk(clear_glock, sdp); flush_workqueue(glock_workqueue); - wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0); + wait_event_timeout(sdp->sd_glock_wait, + atomic_read(&sdp->sd_glock_disposal) == 0, + HZ * 600); glock_hash_walk(dump_glock_func, sdp); } From 340174722929d80a107120400bab527cfc7e47f1 Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Tue, 1 Dec 2015 08:30:34 -0600 Subject: [PATCH 11/19] gfs2: keep offset when splitting dir leaf blocks Currently, when gfs2 splits a directory leaf block, the dirents that need to be copied to the new leaf block are packed into the start of it. This is good for space efficiency. However, if gfs2 were to copy those dirents into the exact same offset in the new leaf block as they had in the old block, it would be able to generate a readdir cookie based on the dirent location, that would be guaranteed to be unique up well past where the current code is statistically almost guaranteed to have collisions. So, gfs2 now keeps the dirent's offset in the block the same when it copies it to the new leaf block. Signed-off-by: Benjamin Marzinski Signed-off-by: Bob Peterson --- fs/gfs2/dir.c | 71 +++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 54 insertions(+), 17 deletions(-) diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index c2486598fb87..4ee008c6d64b 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -443,6 +443,27 @@ static int gfs2_dirent_last(const struct gfs2_dirent *dent, return 0; } +/* Look for the dirent that contains the offset specified in data. Once we + * find that dirent, there must be space available there for the new dirent */ +static int gfs2_dirent_find_offset(const struct gfs2_dirent *dent, + const struct qstr *name, + void *ptr) +{ + unsigned required = GFS2_DIRENT_SIZE(name->len); + unsigned actual = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len)); + unsigned totlen = be16_to_cpu(dent->de_rec_len); + + if (ptr < (void *)dent || ptr >= (void *)dent + totlen) + return 0; + if (gfs2_dirent_sentinel(dent)) + actual = 0; + if (ptr < (void *)dent + actual) + return -1; + if ((void *)dent + totlen >= ptr + required) + return 1; + return -1; +} + static int gfs2_dirent_find_space(const struct gfs2_dirent *dent, const struct qstr *name, void *opaque) @@ -682,21 +703,17 @@ static void dirent_del(struct gfs2_inode *dip, struct buffer_head *bh, prev->de_rec_len = cpu_to_be16(prev_rec_len); } -/* - * Takes a dent from which to grab space as an argument. Returns the - * newly created dent. - */ -static struct gfs2_dirent *gfs2_init_dirent(struct inode *inode, - struct gfs2_dirent *dent, - const struct qstr *name, - struct buffer_head *bh) + +static struct gfs2_dirent *do_init_dirent(struct inode *inode, + struct gfs2_dirent *dent, + const struct qstr *name, + struct buffer_head *bh, + unsigned offset) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_dirent *ndent; - unsigned offset = 0, totlen; + unsigned totlen; - if (!gfs2_dirent_sentinel(dent)) - offset = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len)); totlen = be16_to_cpu(dent->de_rec_len); BUG_ON(offset + name->len > totlen); gfs2_trans_add_meta(ip->i_gl, bh); @@ -706,16 +723,35 @@ static struct gfs2_dirent *gfs2_init_dirent(struct inode *inode, return ndent; } -static struct gfs2_dirent *gfs2_dirent_alloc(struct inode *inode, - struct buffer_head *bh, - const struct qstr *name) + +/* + * Takes a dent from which to grab space as an argument. Returns the + * newly created dent. + */ +static struct gfs2_dirent *gfs2_init_dirent(struct inode *inode, + struct gfs2_dirent *dent, + const struct qstr *name, + struct buffer_head *bh) +{ + unsigned offset = 0; + + if (!gfs2_dirent_sentinel(dent)) + offset = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len)); + return do_init_dirent(inode, dent, name, bh, offset); +} + +static struct gfs2_dirent *gfs2_dirent_split_alloc(struct inode *inode, + struct buffer_head *bh, + const struct qstr *name, + void *ptr) { struct gfs2_dirent *dent; dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, - gfs2_dirent_find_space, name, NULL); + gfs2_dirent_find_offset, name, ptr); if (!dent || IS_ERR(dent)) return dent; - return gfs2_init_dirent(inode, dent, name, bh); + return do_init_dirent(inode, dent, name, bh, + (unsigned)(ptr - (void *)dent)); } static int get_leaf(struct gfs2_inode *dip, u64 leaf_no, @@ -1051,10 +1087,11 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name) if (!gfs2_dirent_sentinel(dent) && be32_to_cpu(dent->de_hash) < divider) { struct qstr str; + void *ptr = ((char *)dent - obh->b_data) + nbh->b_data; str.name = (char*)(dent+1); str.len = be16_to_cpu(dent->de_name_len); str.hash = be32_to_cpu(dent->de_hash); - new = gfs2_dirent_alloc(inode, nbh, &str); + new = gfs2_dirent_split_alloc(inode, nbh, &str, ptr); if (IS_ERR(new)) { error = PTR_ERR(new); break; From 471f3db2786bc32011d6693413eb93b0c3da2579 Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Tue, 1 Dec 2015 08:46:55 -0600 Subject: [PATCH 12/19] gfs2: change gfs2 readdir cookie gfs2 currently returns 31 bits of filename hash as a cookie that readdir uses for an offset into the directory. When there are a large number of directory entries, the likelihood of a collision goes up way too quickly. GFS2 will now return cookies that are guaranteed unique for a while, and then fail back to using 30 bits of filename hash. Specifically, the directory leaf blocks are divided up into chunks based on the minimum size of a gfs2 directory entry (48 bytes). Each entry's cookie is based off the chunk where it starts, in the linked list of leaf blocks that it hashes to (there are 131072 hash buckets). Directory entries will have unique names until they take reach chunk 8192. Assuming the largest filenames possible, and the least efficient spacing possible, this new method will still be able to return unique names when the previous method has statistically more than a 99% chance of a collision. The non-unique names it fails back to are guaranteed to not collide with the unique names. unique cookies will be in this format: - 1 bit "0" to make sure the the returned cookie is positive - 17 bits for the hash table index - 1 bit for the mode "0" - 13 bits for the offset non-unique cookies will be in this format: - 1 bit "0" to make sure the the returned cookie is positive - 17 bits for the hash table index - 1 bit for the mode "1" - 13 more bits of the name hash Another benefit of location based cookies, is that once a directory's exhash table is fully extended (so that multiple hash table indexs do not use the same leaf blocks), gfs2 can skip sorting the directory entries until it reaches the non-unique ones, and then it only needs to sort these. This provides a significant speed up for directory reads of very large directories. The only issue is that for these cookies to continue to point to the correct entry as files are added and removed from the directory, gfs2 must keep the entries at the same offset in the leaf block when they are split (see my previous patch). This means that until all the nodes in a cluster are running with code that will split the directory leaf blocks this way, none of the nodes can use the new cookie code. To deal with this, gfs2 now has the mount option loccookie, which, if set, will make it return these new location based cookies. This option must not be set until all nodes in the cluster are at least running this version of the kernel code, and you have guaranteed that there are no outstanding cookies required by other software, such as NFS. gfs2 uses some of the extra space at the end of the gfs2_dirent structure to store the calculated readdir cookies. This keeps us from needing to allocate a seperate array to hold these values. gfs2 recomputes the cookie stored in de_cookie for every readdir call. The time it takes to do so is small, and if gfs2 expected this value to be saved on disk, the new code wouldn't work correctly on filesystems created with an earlier version of gfs2. One issue with adding de_cookie to the union in the gfs2_dirent structure is that it caused the union to align itself to a 4 byte boundary, instead of its previous 2 byte boundary. This changed the offset of de_rahead. To solve that, I pulled de_rahead out of the union, since it does not need to be there. Signed-off-by: Benjamin Marzinski Signed-off-by: Bob Peterson --- fs/gfs2/dir.c | 91 +++++++++++++++++++++++++------- fs/gfs2/incore.h | 3 ++ fs/gfs2/ops_fstype.c | 3 ++ fs/gfs2/super.c | 12 +++++ include/uapi/linux/gfs2_ondisk.h | 9 ++-- 5 files changed, 95 insertions(+), 23 deletions(-) diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 4ee008c6d64b..6a92592304fb 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -82,6 +82,8 @@ #define gfs2_disk_hash2offset(h) (((u64)(h)) >> 1) #define gfs2_dir_offset2hash(p) ((u32)(((u64)(p)) << 1)) +#define GFS2_HASH_INDEX_MASK 0xffffc000 +#define GFS2_USE_HASH_FLAG 0x2000 struct qstr gfs2_qdot __read_mostly; struct qstr gfs2_qdotdot __read_mostly; @@ -1223,10 +1225,10 @@ static int compare_dents(const void *a, const void *b) int ret = 0; dent_a = *(const struct gfs2_dirent **)a; - hash_a = be32_to_cpu(dent_a->de_hash); + hash_a = dent_a->de_cookie; dent_b = *(const struct gfs2_dirent **)b; - hash_b = be32_to_cpu(dent_b->de_hash); + hash_b = dent_b->de_cookie; if (hash_a > hash_b) ret = 1; @@ -1264,19 +1266,20 @@ static int compare_dents(const void *a, const void *b) */ static int do_filldir_main(struct gfs2_inode *dip, struct dir_context *ctx, - const struct gfs2_dirent **darr, u32 entries, - int *copied) + struct gfs2_dirent **darr, u32 entries, + u32 sort_start, int *copied) { const struct gfs2_dirent *dent, *dent_next; u64 off, off_next; unsigned int x, y; int run = 0; - sort(darr, entries, sizeof(struct gfs2_dirent *), compare_dents, NULL); + if (sort_start < entries) + sort(&darr[sort_start], entries - sort_start, + sizeof(struct gfs2_dirent *), compare_dents, NULL); dent_next = darr[0]; - off_next = be32_to_cpu(dent_next->de_hash); - off_next = gfs2_disk_hash2offset(off_next); + off_next = dent_next->de_cookie; for (x = 0, y = 1; x < entries; x++, y++) { dent = dent_next; @@ -1284,8 +1287,7 @@ static int do_filldir_main(struct gfs2_inode *dip, struct dir_context *ctx, if (y < entries) { dent_next = darr[y]; - off_next = be32_to_cpu(dent_next->de_hash); - off_next = gfs2_disk_hash2offset(off_next); + off_next = dent_next->de_cookie; if (off < ctx->pos) continue; @@ -1332,6 +1334,40 @@ static void *gfs2_alloc_sort_buffer(unsigned size) return ptr; } + +static int gfs2_set_cookies(struct gfs2_sbd *sdp, struct buffer_head *bh, + unsigned leaf_nr, struct gfs2_dirent **darr, + unsigned entries) +{ + int sort_id = -1; + int i; + + for (i = 0; i < entries; i++) { + unsigned offset; + + darr[i]->de_cookie = be32_to_cpu(darr[i]->de_hash); + darr[i]->de_cookie = gfs2_disk_hash2offset(darr[i]->de_cookie); + + if (!sdp->sd_args.ar_loccookie) + continue; + offset = (char *)(darr[i]) - + (bh->b_data + gfs2_dirent_offset(bh->b_data)); + offset /= GFS2_MIN_DIRENT_SIZE; + offset += leaf_nr * sdp->sd_max_dents_per_leaf; + if (offset >= GFS2_USE_HASH_FLAG || + leaf_nr >= GFS2_USE_HASH_FLAG) { + darr[i]->de_cookie |= GFS2_USE_HASH_FLAG; + if (sort_id < 0) + sort_id = i; + continue; + } + darr[i]->de_cookie &= GFS2_HASH_INDEX_MASK; + darr[i]->de_cookie |= offset; + } + return sort_id; +} + + static int gfs2_dir_read_leaf(struct inode *inode, struct dir_context *ctx, int *copied, unsigned *depth, u64 leaf_no) @@ -1341,12 +1377,11 @@ static int gfs2_dir_read_leaf(struct inode *inode, struct dir_context *ctx, struct buffer_head *bh; struct gfs2_leaf *lf; unsigned entries = 0, entries2 = 0; - unsigned leaves = 0; - const struct gfs2_dirent **darr, *dent; + unsigned leaves = 0, leaf = 0, offset, sort_offset; + struct gfs2_dirent **darr, *dent; struct dirent_gather g; struct buffer_head **larr; - int leaf = 0; - int error, i; + int error, i, need_sort = 0, sort_id; u64 lfn = leaf_no; do { @@ -1362,6 +1397,11 @@ static int gfs2_dir_read_leaf(struct inode *inode, struct dir_context *ctx, brelse(bh); } while(lfn); + if (*depth < GFS2_DIR_MAX_DEPTH || !sdp->sd_args.ar_loccookie) { + need_sort = 1; + sort_offset = 0; + } + if (!entries) return 0; @@ -1375,8 +1415,8 @@ static int gfs2_dir_read_leaf(struct inode *inode, struct dir_context *ctx, larr = gfs2_alloc_sort_buffer((leaves + entries + 99) * sizeof(void *)); if (!larr) goto out; - darr = (const struct gfs2_dirent **)(larr + leaves); - g.pdent = darr; + darr = (struct gfs2_dirent **)(larr + leaves); + g.pdent = (const struct gfs2_dirent **)darr; g.offset = 0; lfn = leaf_no; @@ -1387,6 +1427,7 @@ static int gfs2_dir_read_leaf(struct inode *inode, struct dir_context *ctx, lf = (struct gfs2_leaf *)bh->b_data; lfn = be64_to_cpu(lf->lf_next); if (lf->lf_entries) { + offset = g.offset; entries2 += be16_to_cpu(lf->lf_entries); dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, gfs2_dirent_gather, NULL, &g); @@ -1404,17 +1445,26 @@ static int gfs2_dir_read_leaf(struct inode *inode, struct dir_context *ctx, goto out_free; } error = 0; + sort_id = gfs2_set_cookies(sdp, bh, leaf, &darr[offset], + be16_to_cpu(lf->lf_entries)); + if (!need_sort && sort_id >= 0) { + need_sort = 1; + sort_offset = offset + sort_id; + } larr[leaf++] = bh; } else { + larr[leaf++] = NULL; brelse(bh); } } while(lfn); BUG_ON(entries2 != entries); - error = do_filldir_main(ip, ctx, darr, entries, copied); + error = do_filldir_main(ip, ctx, darr, entries, need_sort ? + sort_offset : entries, copied); out_free: for(i = 0; i < leaf; i++) - brelse(larr[i]); + if (larr[i]) + brelse(larr[i]); kvfree(larr); out: return error; @@ -1520,7 +1570,7 @@ int gfs2_dir_read(struct inode *inode, struct dir_context *ctx, struct gfs2_inode *dip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); struct dirent_gather g; - const struct gfs2_dirent **darr, *dent; + struct gfs2_dirent **darr, *dent; struct buffer_head *dibh; int copied = 0; int error; @@ -1544,7 +1594,7 @@ int gfs2_dir_read(struct inode *inode, struct dir_context *ctx, /* 96 is max number of dirents which can be stuffed into an inode */ darr = kmalloc(96 * sizeof(struct gfs2_dirent *), GFP_NOFS); if (darr) { - g.pdent = darr; + g.pdent = (const struct gfs2_dirent **)darr; g.offset = 0; dent = gfs2_dirent_scan(inode, dibh->b_data, dibh->b_size, gfs2_dirent_gather, NULL, &g); @@ -1561,8 +1611,9 @@ int gfs2_dir_read(struct inode *inode, struct dir_context *ctx, error = -EIO; goto out; } + gfs2_set_cookies(sdp, dibh, 0, darr, dip->i_entries); error = do_filldir_main(dip, ctx, darr, - dip->i_entries, &copied); + dip->i_entries, 0, &copied); out: kfree(darr); } diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 921304e1d785..845fb09cc606 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -562,6 +562,8 @@ struct gfs2_args { unsigned int ar_errors:2; /* errors=withdraw | panic */ unsigned int ar_nobarrier:1; /* do not send barriers */ unsigned int ar_rgrplvb:1; /* use lvbs for rgrp info */ + unsigned int ar_loccookie:1; /* use location based readdir + cookies */ int ar_commit; /* Commit interval */ int ar_statfs_quantum; /* The fast statfs interval */ int ar_quota_quantum; /* The quota interval */ @@ -689,6 +691,7 @@ struct gfs2_sbd { u64 sd_heightsize[GFS2_MAX_META_HEIGHT + 1]; u32 sd_max_jheight; /* Max height of journaled file's meta tree */ u64 sd_jheightsize[GFS2_MAX_META_HEIGHT + 1]; + u32 sd_max_dents_per_leaf; /* Max number of dirents in a leaf block */ struct gfs2_args sd_args; /* Mount arguments */ struct gfs2_tune sd_tune; /* Filesystem tuning structure */ diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 1f9de173c4a0..7aacdf2bafd1 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -352,6 +352,9 @@ static int gfs2_read_sb(struct gfs2_sbd *sdp, int silent) sdp->sd_jheightsize[x] = ~0; gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT); + sdp->sd_max_dents_per_leaf = (sdp->sd_sb.sb_bsize - + sizeof(struct gfs2_leaf)) / + GFS2_MIN_DIRENT_SIZE; return 0; } diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 03fa155f703e..0f3d64606e93 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -83,6 +83,8 @@ enum { Opt_nobarrier, Opt_rgrplvb, Opt_norgrplvb, + Opt_loccookie, + Opt_noloccookie, Opt_error, }; @@ -122,6 +124,8 @@ static const match_table_t tokens = { {Opt_nobarrier, "nobarrier"}, {Opt_rgrplvb, "rgrplvb"}, {Opt_norgrplvb, "norgrplvb"}, + {Opt_loccookie, "loccookie"}, + {Opt_noloccookie, "noloccookie"}, {Opt_error, NULL} }; @@ -278,6 +282,12 @@ int gfs2_mount_args(struct gfs2_args *args, char *options) case Opt_norgrplvb: args->ar_rgrplvb = 0; break; + case Opt_loccookie: + args->ar_loccookie = 1; + break; + case Opt_noloccookie: + args->ar_loccookie = 0; + break; case Opt_error: default: pr_warn("invalid mount option: %s\n", o); @@ -1418,6 +1428,8 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root) seq_puts(s, ",demote_interface_used"); if (args->ar_rgrplvb) seq_puts(s, ",rgrplvb"); + if (args->ar_loccookie) + seq_puts(s, ",loccookie"); return 0; } diff --git a/include/uapi/linux/gfs2_ondisk.h b/include/uapi/linux/gfs2_ondisk.h index 1a763eaae0bb..7c4be7711c81 100644 --- a/include/uapi/linux/gfs2_ondisk.h +++ b/include/uapi/linux/gfs2_ondisk.h @@ -297,6 +297,8 @@ struct gfs2_dinode { #define GFS2_FNAMESIZE 255 #define GFS2_DIRENT_SIZE(name_len) ((sizeof(struct gfs2_dirent) + (name_len) + 7) & ~7) +#define GFS2_MIN_DIRENT_SIZE (GFS2_DIRENT_SIZE(1)) + struct gfs2_dirent { struct gfs2_inum de_inum; @@ -304,11 +306,12 @@ struct gfs2_dirent { __be16 de_rec_len; __be16 de_name_len; __be16 de_type; + __be16 de_rahead; union { - __u8 __pad[14]; + __u8 __pad[12]; struct { - __be16 de_rahead; - __u8 pad2[12]; + __u32 de_cookie; /* ondisk value not used */ + __u8 pad3[8]; }; }; }; From 400ac52e805bb6852e743817bc05a136e85042a9 Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Wed, 9 Dec 2015 07:46:33 -0600 Subject: [PATCH 13/19] gfs2: clear journal live bit in gfs2_log_flush When gfs2 was unmounting filesystems or changing them to read-only it was clearing the SDF_JOURNAL_LIVE bit before the final log flush. This caused a race. If an inode glock got demoted in the gap between clearing the bit and the shutdown flush, it would be unable to reserve log space to clear out the active items list in inode_go_sync, causing an error in inode_go_inval because the glock was still dirty. To solve this, the SDF_JOURNAL_LIVE bit is now cleared inside the shutdown log flush. This means that, because of the locking on the log blocks, either inode_go_sync will be able to reserve space to clean the glock before the shutdown flush, or the shutdown flush will clean the glock itself, before inode_go_sync fails to reserve the space. Either way, the glock will be clean before inode_go_inval. Signed-off-by: Benjamin Marzinski Signed-off-by: Bob Peterson --- fs/gfs2/log.c | 3 +++ fs/gfs2/super.c | 4 ---- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 536e7a6252cd..0ff028c15199 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -716,6 +716,9 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, } trace_gfs2_log_flush(sdp, 1); + if (type == SHUTDOWN_FLUSH) + clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); + sdp->sd_log_flush_head = sdp->sd_log_head; sdp->sd_log_flush_wrapped = 0; tr = sdp->sd_log_tr; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 0f3d64606e93..1e7da3886254 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -851,10 +851,6 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp) gfs2_quota_sync(sdp->sd_vfs, 0); gfs2_statfs_sync(sdp->sd_vfs, 0); - down_write(&sdp->sd_log_flush_lock); - clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); - up_write(&sdp->sd_log_flush_lock); - gfs2_log_flush(sdp, NULL, SHUTDOWN_FLUSH); wait_event(sdp->sd_reserving_log_wait, atomic_read(&sdp->sd_reserving_log) == 0); gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks); From 86d067a797d4e8546a7c92b985f31e8cd3ec39ad Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Mon, 7 Dec 2015 15:10:42 -0600 Subject: [PATCH 14/19] GFS2: Wait for iopen glock dequeues This patch changes every glock_dq for iopen glocks into a dq_wait. This makes sure that iopen glocks do not outlive the inode itself. In turn, that ensures that anyone trying to unlink the glock will be able to find the inode when it receives a remote iopen callback. Signed-off-by: Bob Peterson Acked-by: Steven Whitehouse --- fs/gfs2/inode.c | 3 ++- fs/gfs2/super.c | 8 +++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index a8ce2e99cf5d..bf2af049359b 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -191,7 +191,8 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, fail_refresh: ip->i_iopen_gh.gh_flags |= GL_NOCACHE; ip->i_iopen_gh.gh_gl->gl_object = NULL; - gfs2_glock_dq_uninit(&ip->i_iopen_gh); + gfs2_glock_dq_wait(&ip->i_iopen_gh); + gfs2_holder_uninit(&ip->i_iopen_gh); fail_iopen: if (io_gl) gfs2_glock_put(io_gl); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 1e7da3886254..4f0708f7bf6c 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1533,7 +1533,8 @@ static void gfs2_evict_inode(struct inode *inode) error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, &gh); if (unlikely(error)) { ip->i_iopen_gh.gh_flags |= GL_NOCACHE; - gfs2_glock_dq_uninit(&ip->i_iopen_gh); + gfs2_glock_dq_wait(&ip->i_iopen_gh); + gfs2_holder_uninit(&ip->i_iopen_gh); goto out; } @@ -1605,7 +1606,7 @@ out_unlock: if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) { ip->i_iopen_gh.gh_flags |= GL_NOCACHE; - gfs2_glock_dq(&ip->i_iopen_gh); + gfs2_glock_dq_wait(&ip->i_iopen_gh); } gfs2_holder_uninit(&ip->i_iopen_gh); gfs2_glock_dq_uninit(&gh); @@ -1626,7 +1627,8 @@ out: if (ip->i_iopen_gh.gh_gl) { ip->i_iopen_gh.gh_gl->gl_object = NULL; ip->i_iopen_gh.gh_flags |= GL_NOCACHE; - gfs2_glock_dq_uninit(&ip->i_iopen_gh); + gfs2_glock_dq_wait(&ip->i_iopen_gh); + gfs2_holder_uninit(&ip->i_iopen_gh); } } From ee530beafeca9826b2086eab053312b182c09669 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Mon, 7 Dec 2015 15:13:28 -0600 Subject: [PATCH 15/19] GFS2: Truncate address space mapping when deleting an inode In function gfs2_delete_inode() we write and flush the mapping for a glock, among other things. We truncate the mapping for the inode, but we never truncate the mapping for the glock. This patch makes it also truncate the metamapping. This avoid cases where the glock is reused by another process who is trying to recreate an inode in its place using the same block. Signed-off-by: Bob Peterson Acked-by: Steven Whitehouse --- fs/gfs2/super.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 4f0708f7bf6c..8f960a51a9a0 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1519,6 +1519,7 @@ static void gfs2_evict_inode(struct inode *inode) struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder gh; + struct address_space *metamapping; int error; if (test_bit(GIF_FREE_VFS_INODE, &ip->i_flags)) { @@ -1583,8 +1584,8 @@ static void gfs2_evict_inode(struct inode *inode) out_truncate: gfs2_log_flush(sdp, ip->i_gl, NORMAL_FLUSH); + metamapping = gfs2_glock2aspace(ip->i_gl); if (test_bit(GLF_DIRTY, &ip->i_gl->gl_flags)) { - struct address_space *metamapping = gfs2_glock2aspace(ip->i_gl); filemap_fdatawrite(metamapping); filemap_fdatawait(metamapping); } @@ -1597,6 +1598,7 @@ out_truncate: goto out_unlock; /* Needs to be done before glock release & also in a transaction */ truncate_inode_pages(&inode->i_data, 0); + truncate_inode_pages(metamapping, 0); gfs2_trans_end(sdp); out_unlock: From 783013c0f5c7263a31703b15aeebbac279b4d4fe Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Fri, 4 Dec 2015 10:19:14 -0600 Subject: [PATCH 16/19] GFS2: Release iopen glock in gfs2_create_inode error cases Some error cases in gfs2_create_inode were not unlocking the iopen glock, getting the reference count off. This adds the proper unlock. The error logic in function gfs2_create_inode was also convoluted, so this patch simplifies it. It also takes care of a bug in which gfs2_qa_delete() was not called in an error case. Signed-off-by: Bob Peterson --- fs/gfs2/inode.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index bf2af049359b..87fa5992e12a 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -594,7 +594,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, struct gfs2_inode *dip = GFS2_I(dir), *ip; struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); struct gfs2_glock *io_gl; - int error, free_vfs_inode = 0; + int error, free_vfs_inode = 1; u32 aflags = 0; unsigned blocks = 1; struct gfs2_diradd da = { .bh = NULL, .save_loc = 1, }; @@ -651,7 +651,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, error = posix_acl_create(dir, &mode, &default_acl, &acl); if (error) - goto fail_free_vfs_inode; + goto fail_gunlock; ip = GFS2_I(inode); error = gfs2_rsqa_alloc(ip); @@ -739,6 +739,9 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, gfs2_set_iop(inode); insert_inode_hash(inode); + free_vfs_inode = 0; /* After this point, the inode is no longer + considered free. Any failures need to undo + the gfs2 structures. */ if (default_acl) { error = gfs2_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); posix_acl_release(default_acl); @@ -772,11 +775,8 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, return error; fail_gunlock3: - gfs2_glock_dq_uninit(ghs + 1); - if (ip->i_gl) - gfs2_glock_put(ip->i_gl); - goto fail_gunlock; - + gfs2_glock_dq_uninit(&ip->i_iopen_gh); + gfs2_glock_put(io_gl); fail_gunlock2: gfs2_glock_dq_uninit(ghs + 1); fail_free_inode: @@ -788,8 +788,6 @@ fail_free_acls: posix_acl_release(default_acl); if (acl) posix_acl_release(acl); -fail_free_vfs_inode: - free_vfs_inode = 1; fail_gunlock: gfs2_dir_no_add(&da); gfs2_glock_dq_uninit(ghs); From 5ea31bc0a6524b4fee8dc9ae8005d4a114a79812 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Fri, 4 Dec 2015 12:57:00 -0600 Subject: [PATCH 17/19] GFS2: Always use iopen glock for gl_deletes Before this patch, when function try_rgrp_unlink queued a glock for delete_work to reclaim the space, it used the inode glock to do so. That's different from the iopen callback which uses the iopen glock for the same purpose. We should be consistent and always use the iopen glock. This may also save us reference counting problems with the inode glock, since clear_glock does an extra glock_put() for the inode glock. Signed-off-by: Bob Peterson --- fs/gfs2/rgrp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index b879925ce134..07c0265aa195 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1789,7 +1789,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip continue; *last_unlinked = block; - error = gfs2_glock_get(sdp, block, &gfs2_inode_glops, CREATE, &gl); + error = gfs2_glock_get(sdp, block, &gfs2_iopen_glops, CREATE, &gl); if (error) continue; From 6cc4b6e801c725321e9f63ca7c2d00af8df24699 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Fri, 4 Dec 2015 13:04:34 -0600 Subject: [PATCH 18/19] GFS2: Don't do glock put on when inode creation fails Currently the error path of function gfs2_inode_lookup calls function gfs2_glock_put corresponding to an earlier call to gfs2_glock_get for the inode glock. That's wrong because the error path also calls iget_failed() which eventually calls iput, which eventually calls gfs2_evict_inode, which does another gfs2_glock_put. This double-put can cause the glock reference count to get off. Signed-off-by: Bob Peterson --- fs/gfs2/inode.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 87fa5992e12a..009b551a5d8c 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -198,7 +198,6 @@ fail_iopen: gfs2_glock_put(io_gl); fail_put: ip->i_gl->gl_object = NULL; - gfs2_glock_put(ip->i_gl); fail: iget_failed(inode); return ERR_PTR(error); From a93a99838248bdab49db2eaac00236847670bc7f Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Tue, 22 Dec 2015 08:06:08 -0600 Subject: [PATCH 19/19] gfs2: fix flock panic issue Commit 4f6563677ae8 ("Move locks API users to locks_lock_inode_wait()") moved flock/posix lock identify code to locks_lock_inode_wait(), but missed to set fl_flags to FL_FLOCK which will cause kernel panic in locks_lock_inode_wait(). Fixes: 4f6563677ae8 ("Move locks API users to locks_lock_inode_wait()") Signed-off-by: Junxiao Bi Signed-off-by: Bob Peterson --- fs/gfs2/file.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 860408053c95..7412863cda1e 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -1027,7 +1027,10 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl) if (fl_gh->gh_state == state) goto out; locks_lock_file_wait(file, - &(struct file_lock){.fl_type = F_UNLCK}); + &(struct file_lock) { + .fl_type = F_UNLCK, + .fl_flags = FL_FLOCK + }); gfs2_glock_dq(fl_gh); gfs2_holder_reinit(state, flags, fl_gh); } else {