diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 2524807ee070..9d5eecb123de 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -86,19 +86,6 @@ int __gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type) char *data; const char *name = gfs2_acl_name(type); - if (acl && acl->a_count > GFS2_ACL_MAX_ENTRIES(GFS2_SB(inode))) - return -E2BIG; - - if (type == ACL_TYPE_ACCESS) { - umode_t mode = inode->i_mode; - - error = posix_acl_update_mode(inode, &inode->i_mode, &acl); - if (error) - return error; - if (mode != inode->i_mode) - mark_inode_dirty(inode); - } - if (acl) { len = posix_acl_to_xattr(&init_user_ns, acl, NULL, 0); if (len == 0) @@ -129,6 +116,10 @@ int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type) struct gfs2_holder gh; bool need_unlock = false; int ret; + umode_t mode; + + if (acl && acl->a_count > GFS2_ACL_MAX_ENTRIES(GFS2_SB(inode))) + return -E2BIG; ret = gfs2_rsqa_alloc(ip); if (ret) @@ -140,7 +131,20 @@ int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type) return ret; need_unlock = true; } + + mode = inode->i_mode; + if (type == ACL_TYPE_ACCESS && acl) { + ret = posix_acl_update_mode(inode, &mode, &acl); + if (ret) + goto unlock; + } + ret = __gfs2_set_acl(inode, acl, type); + if (!ret && mode != inode->i_mode) { + inode->i_mode = mode; + mark_inode_dirty(inode); + } +unlock: if (need_unlock) gfs2_glock_dq_uninit(&gh); return ret; diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index ed7a2e252ad8..68ed06962537 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -234,7 +234,19 @@ out: static int gfs2_writepages(struct address_space *mapping, struct writeback_control *wbc) { - return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc); + struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping); + int ret = mpage_writepages(mapping, wbc, gfs2_get_block_noalloc); + + /* + * Even if we didn't write any pages here, we might still be holding + * dirty pages in the ail. We forcibly flush the ail because we don't + * want balance_dirty_pages() to loop indefinitely trying to write out + * pages held in the ail that it can't find. + */ + if (ret == 0) + set_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags); + + return ret; } /** diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 9fa3aef9a5b3..3dd0cceefa43 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -291,8 +291,9 @@ static void gfs2_metapath_ra(struct gfs2_glock *gl, if (trylock_buffer(rabh)) { if (!buffer_uptodate(rabh)) { rabh->b_end_io = end_buffer_read_sync; - submit_bh(REQ_OP_READ, REQ_RAHEAD | REQ_META, - rabh); + submit_bh(REQ_OP_READ, + REQ_RAHEAD | REQ_META | REQ_PRIO, + rabh); continue; } unlock_buffer(rabh); @@ -1103,8 +1104,15 @@ static bool find_nonnull_ptr(struct gfs2_sbd *sdp, struct metapath *mp, while (true) { ptr = metapointer(h, mp); - if (*ptr) /* if we have a non-null pointer */ + if (*ptr) { /* if we have a non-null pointer */ + /* Now zero the metapath after the current height. */ + h++; + if (h < GFS2_MAX_META_HEIGHT) + memset(&mp->mp_list[h], 0, + (GFS2_MAX_META_HEIGHT - h) * + sizeof(mp->mp_list[0])); return true; + } if (mp->mp_list[h] < ptrs) mp->mp_list[h]++; @@ -1120,6 +1128,13 @@ enum dealloc_states { DEALLOC_DONE = 3, /* process complete */ }; +static bool mp_eq_to_hgt(struct metapath *mp, __u16 *nbof, unsigned int h) +{ + if (memcmp(mp->mp_list, nbof, h * sizeof(mp->mp_list[0]))) + return false; + return true; +} + /** * trunc_dealloc - truncate a file down to a desired size * @ip: inode to truncate @@ -1197,8 +1212,7 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 newsize) /* If we're truncating to a non-zero size and the mp is at the beginning of file for the strip height, we need to preserve the first metadata pointer. */ - preserve1 = (newsize && - (mp.mp_list[mp_h] == nbof[mp_h])); + preserve1 = (newsize && mp_eq_to_hgt(&mp, nbof, mp_h)); bh = mp.mp_bh[mp_h]; gfs2_assert_withdraw(sdp, bh); if (gfs2_assert_withdraw(sdp, diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 5ee2e2f8576c..06a0d1947c77 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -1513,7 +1513,9 @@ static void gfs2_dir_readahead(struct inode *inode, unsigned hsize, u32 index, continue; } bh->b_end_io = end_buffer_read_sync; - submit_bh(REQ_OP_READ, REQ_RAHEAD | REQ_META, bh); + submit_bh(REQ_OP_READ, + REQ_RAHEAD | REQ_META | REQ_PRIO, + bh); continue; } brelse(bh); diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index c2062a108d19..bb48074be019 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -1030,8 +1030,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl) mutex_lock(&fp->f_fl_mutex); - gl = fl_gh->gh_gl; - if (gl) { + if (gfs2_holder_initialized(fl_gh)) { if (fl_gh->gh_state == state) goto out; locks_lock_file_wait(file, diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index c38ab6c81898..98e845b7841b 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -71,7 +72,7 @@ static DEFINE_SPINLOCK(lru_lock); #define GFS2_GL_HASH_SHIFT 15 #define GFS2_GL_HASH_SIZE BIT(GFS2_GL_HASH_SHIFT) -static struct rhashtable_params ht_parms = { +static const struct rhashtable_params ht_parms = { .nelem_hint = GFS2_GL_HASH_SIZE * 3 / 4, .key_len = offsetofend(struct lm_lockname, ln_type), .key_offset = offsetof(struct gfs2_glock, gl_name), @@ -80,6 +81,49 @@ static struct rhashtable_params ht_parms = { static struct rhashtable gl_hash_table; +#define GLOCK_WAIT_TABLE_BITS 12 +#define GLOCK_WAIT_TABLE_SIZE (1 << GLOCK_WAIT_TABLE_BITS) +static wait_queue_head_t glock_wait_table[GLOCK_WAIT_TABLE_SIZE] __cacheline_aligned; + +struct wait_glock_queue { + struct lm_lockname *name; + wait_queue_entry_t wait; +}; + +static int glock_wake_function(wait_queue_entry_t *wait, unsigned int mode, + int sync, void *key) +{ + struct wait_glock_queue *wait_glock = + container_of(wait, struct wait_glock_queue, wait); + struct lm_lockname *wait_name = wait_glock->name; + struct lm_lockname *wake_name = key; + + if (wake_name->ln_sbd != wait_name->ln_sbd || + wake_name->ln_number != wait_name->ln_number || + wake_name->ln_type != wait_name->ln_type) + return 0; + return autoremove_wake_function(wait, mode, sync, key); +} + +static wait_queue_head_t *glock_waitqueue(struct lm_lockname *name) +{ + u32 hash = jhash2((u32 *)name, sizeof(*name) / 4, 0); + + return glock_wait_table + hash_32(hash, GLOCK_WAIT_TABLE_BITS); +} + +/** + * wake_up_glock - Wake up waiters on a glock + * @gl: the glock + */ +static void wake_up_glock(struct gfs2_glock *gl) +{ + wait_queue_head_t *wq = glock_waitqueue(&gl->gl_name); + + if (waitqueue_active(wq)) + __wake_up(wq, TASK_NORMAL, 1, &gl->gl_name); +} + static void gfs2_glock_dealloc(struct rcu_head *rcu) { struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu); @@ -96,6 +140,9 @@ void gfs2_glock_free(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; + rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms); + smp_mb(); + wake_up_glock(gl); call_rcu(&gl->gl_rcu, gfs2_glock_dealloc); if (atomic_dec_and_test(&sdp->sd_glock_disposal)) wake_up(&sdp->sd_glock_wait); @@ -107,7 +154,7 @@ void gfs2_glock_free(struct gfs2_glock *gl) * */ -static void gfs2_glock_hold(struct gfs2_glock *gl) +void gfs2_glock_hold(struct gfs2_glock *gl) { GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref)); lockref_get(&gl->gl_lockref); @@ -150,6 +197,9 @@ void gfs2_glock_add_to_lru(struct gfs2_glock *gl) static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl) { + if (!(gl->gl_ops->go_flags & GLOF_LRU)) + return; + spin_lock(&lru_lock); if (!list_empty(&gl->gl_lru)) { list_del_init(&gl->gl_lru); @@ -191,13 +241,20 @@ static void __gfs2_glock_put(struct gfs2_glock *gl) gfs2_glock_remove_from_lru(gl); spin_unlock(&gl->gl_lockref.lock); - rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms); GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); GLOCK_BUG_ON(gl, mapping && mapping->nrpages); trace_gfs2_glock_put(gl); sdp->sd_lockstruct.ls_ops->lm_put_lock(gl); } +/* + * Cause the glock to be put in work queue context. + */ +void gfs2_glock_queue_put(struct gfs2_glock *gl) +{ + gfs2_glock_queue_work(gl, 0); +} + /** * gfs2_glock_put() - Decrement reference count on glock * @gl: The glock to put @@ -676,6 +733,40 @@ static void glock_work_func(struct work_struct *work) spin_unlock(&gl->gl_lockref.lock); } +static struct gfs2_glock *find_insert_glock(struct lm_lockname *name, + struct gfs2_glock *new) +{ + struct wait_glock_queue wait; + wait_queue_head_t *wq = glock_waitqueue(name); + struct gfs2_glock *gl; + + wait.name = name; + init_wait(&wait.wait); + wait.wait.func = glock_wake_function; + +again: + prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); + rcu_read_lock(); + if (new) { + gl = rhashtable_lookup_get_insert_fast(&gl_hash_table, + &new->gl_node, ht_parms); + if (IS_ERR(gl)) + goto out; + } else { + gl = rhashtable_lookup_fast(&gl_hash_table, + name, ht_parms); + } + if (gl && !lockref_get_not_dead(&gl->gl_lockref)) { + rcu_read_unlock(); + schedule(); + goto again; + } +out: + rcu_read_unlock(); + finish_wait(wq, &wait.wait); + return gl; +} + /** * gfs2_glock_get() - Get a glock, or create one if one doesn't exist * @sdp: The GFS2 superblock @@ -702,15 +793,11 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, struct kmem_cache *cachep; int ret = 0; - rcu_read_lock(); - gl = rhashtable_lookup_fast(&gl_hash_table, &name, ht_parms); - if (gl && !lockref_get_not_dead(&gl->gl_lockref)) - gl = NULL; - rcu_read_unlock(); - - *glp = gl; - if (gl) + gl = find_insert_glock(&name, NULL); + if (gl) { + *glp = gl; return 0; + } if (!create) return -ENOENT; @@ -764,10 +851,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, mapping->writeback_index = 0; } -again: - rcu_read_lock(); - tmp = rhashtable_lookup_get_insert_fast(&gl_hash_table, &gl->gl_node, - ht_parms); + tmp = find_insert_glock(&name, gl); if (!tmp) { *glp = gl; goto out; @@ -776,13 +860,7 @@ again: ret = PTR_ERR(tmp); goto out_free; } - if (lockref_get_not_dead(&tmp->gl_lockref)) { - *glp = tmp; - goto out_free; - } - rcu_read_unlock(); - cond_resched(); - goto again; + *glp = tmp; out_free: kfree(gl->gl_lksb.sb_lvbptr); @@ -790,7 +868,6 @@ out_free: atomic_dec(&sdp->sd_glock_disposal); out: - rcu_read_unlock(); return ret; } @@ -1473,14 +1550,15 @@ static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp) do { gl = ERR_PTR(rhashtable_walk_start(&iter)); - if (gl) - continue; + if (IS_ERR(gl)) + goto walk_stop; while ((gl = rhashtable_walk_next(&iter)) && !IS_ERR(gl)) - if ((gl->gl_name.ln_sbd == sdp) && + if (gl->gl_name.ln_sbd == sdp && lockref_get_not_dead(&gl->gl_lockref)) examiner(gl); +walk_stop: rhashtable_walk_stop(&iter); } while (cond_resched(), gl == ERR_PTR(-EAGAIN)); @@ -1803,7 +1881,7 @@ static int gfs2_sbstats_seq_show(struct seq_file *seq, void *iter_ptr) int __init gfs2_glock_init(void) { - int ret; + int i, ret; ret = rhashtable_init(&gl_hash_table, &ht_parms); if (ret < 0) @@ -1832,6 +1910,9 @@ int __init gfs2_glock_init(void) return ret; } + for (i = 0; i < GLOCK_WAIT_TABLE_SIZE; i++) + init_waitqueue_head(glock_wait_table + i); + return 0; } @@ -1860,6 +1941,7 @@ static void gfs2_glock_iter_next(struct gfs2_glock_iter *gi) } static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(RCU) { struct gfs2_glock_iter *gi = seq->private; loff_t n = *pos; @@ -1892,6 +1974,7 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr, } static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr) + __releases(RCU) { struct gfs2_glock_iter *gi = seq->private; diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 9ad4a6ac6c84..5e12220cc0c2 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -13,6 +13,7 @@ #include #include #include "incore.h" +#include "util.h" /* Options for hostdata parser */ @@ -181,7 +182,9 @@ static inline struct address_space *gfs2_glock2aspace(struct gfs2_glock *gl) extern int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, const struct gfs2_glock_operations *glops, int create, struct gfs2_glock **glp); +extern void gfs2_glock_hold(struct gfs2_glock *gl); extern void gfs2_glock_put(struct gfs2_glock *gl); +extern void gfs2_glock_queue_put(struct gfs2_glock *gl); extern void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, u16 flags, struct gfs2_holder *gh); extern void gfs2_holder_reinit(unsigned int state, u16 flags, @@ -257,11 +260,44 @@ static inline bool gfs2_holder_initialized(struct gfs2_holder *gh) return gh->gh_gl; } +/** + * glock_set_object - set the gl_object field of a glock + * @gl: the glock + * @object: the object + */ static inline void glock_set_object(struct gfs2_glock *gl, void *object) { spin_lock(&gl->gl_lockref.lock); + if (gfs2_assert_warn(gl->gl_name.ln_sbd, gl->gl_object == NULL)) + gfs2_dump_glock(NULL, gl); gl->gl_object = object; spin_unlock(&gl->gl_lockref.lock); } +/** + * glock_clear_object - clear the gl_object field of a glock + * @gl: the glock + * @object: the object + * + * I'd love to similarly add this: + * else if (gfs2_assert_warn(gl->gl_sbd, gl->gl_object == object)) + * gfs2_dump_glock(NULL, gl); + * Unfortunately, that's not possible because as soon as gfs2_delete_inode + * frees the block in the rgrp, another process can reassign it for an I_NEW + * inode in gfs2_create_inode because that calls new_inode, not gfs2_iget. + * That means gfs2_delete_inode may subsequently try to call this function + * for a glock that's already pointing to a brand new inode. If we clear the + * new inode's gl_object, we'll introduce metadata corruption. Function + * gfs2_delete_inode calls clear_inode which calls gfs2_clear_inode which also + * tries to clear gl_object, so it's more than just gfs2_delete_inode. + * + */ +static inline void glock_clear_object(struct gfs2_glock *gl, void *object) +{ + spin_lock(&gl->gl_lockref.lock); + if (gl->gl_object == object) + gl->gl_object = NULL; + spin_unlock(&gl->gl_lockref.lock); +} + #endif /* __GLOCK_DOT_H__ */ diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 5e69636d4dd3..dac6559e2195 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -329,32 +329,6 @@ static int inode_go_demote_ok(const struct gfs2_glock *gl) return 1; } -/** - * gfs2_set_nlink - Set the inode's link count based on on-disk info - * @inode: The inode in question - * @nlink: The link count - * - * If the link count has hit zero, it must never be raised, whatever the - * on-disk inode might say. When new struct inodes are created the link - * count is set to 1, so that we can safely use this test even when reading - * in on disk information for the first time. - */ - -static void gfs2_set_nlink(struct inode *inode, u32 nlink) -{ - /* - * We will need to review setting the nlink count here in the - * light of the forthcoming ro bind mount work. This is a reminder - * to do that. - */ - if ((inode->i_nlink != nlink) && (inode->i_nlink != 0)) { - if (nlink == 0) - clear_nlink(inode); - else - set_nlink(inode, nlink); - } -} - static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) { const struct gfs2_dinode *str = buf; @@ -376,7 +350,7 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) i_uid_write(&ip->i_inode, be32_to_cpu(str->di_uid)); i_gid_write(&ip->i_inode, be32_to_cpu(str->di_gid)); - gfs2_set_nlink(&ip->i_inode, be32_to_cpu(str->di_nlink)); + set_nlink(&ip->i_inode, be32_to_cpu(str->di_nlink)); i_size_write(&ip->i_inode, be64_to_cpu(str->di_size)); gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); atime.tv_sec = be64_to_cpu(str->di_atime); @@ -470,7 +444,7 @@ static int inode_go_lock(struct gfs2_holder *gh) (gh->gh_state == LM_ST_EXCLUSIVE)) { spin_lock(&sdp->sd_trunc_lock); if (list_empty(&ip->i_trunc_list)) - list_add(&sdp->sd_trunc_list, &ip->i_trunc_list); + list_add(&ip->i_trunc_list, &sdp->sd_trunc_list); spin_unlock(&sdp->sd_trunc_lock); wake_up(&sdp->sd_quota_wait); return 1; diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 73fce76e67ee..6e18e9793ec4 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -606,6 +606,7 @@ enum { SDF_NOJOURNALID = 6, SDF_RORECOVERY = 7, /* read only recovery */ SDF_SKIP_DLM_UNLOCK = 8, + SDF_FORCE_AIL_FLUSH = 9, }; enum gfs2_freeze_state { @@ -816,6 +817,7 @@ struct gfs2_sbd { atomic_t sd_log_in_flight; struct bio *sd_log_bio; wait_queue_head_t sd_log_flush_wait; + int sd_log_error; atomic_t sd_reserving_log; wait_queue_head_t sd_reserving_log_wait; @@ -831,7 +833,7 @@ struct gfs2_sbd { atomic_t sd_freeze_state; struct mutex sd_freeze_mutex; - char sd_fsname[GFS2_FSNAME_LEN]; + char sd_fsname[GFS2_FSNAME_LEN + 3 * sizeof(int) + 2]; char sd_table_name[GFS2_FSNAME_LEN]; char sd_proto_name[GFS2_FSNAME_LEN]; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index acca501f8110..863749e29bf9 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -109,7 +109,7 @@ static void gfs2_set_iop(struct inode *inode) * @no_addr: The inode number * @no_formal_ino: The inode generation number * @blktype: Requested block type (GFS2_BLKST_DINODE or GFS2_BLKST_UNLINKED; - * GFS2_BLKST_FREE do indicate not to verify) + * GFS2_BLKST_FREE to indicate not to verify) * * If @type is DT_UNKNOWN, the inode type is fetched from disk. * @@ -145,7 +145,6 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, if (unlikely(error)) goto fail; flush_delayed_work(&ip->i_gl->gl_work); - glock_set_object(ip->i_gl, ip); error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl); if (unlikely(error)) @@ -170,11 +169,11 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, } } + glock_set_object(ip->i_gl, ip); set_bit(GIF_INVALID, &ip->i_flags); error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); if (unlikely(error)) goto fail_put; - flush_delayed_work(&ip->i_iopen_gh.gh_gl->gl_work); glock_set_object(ip->i_iopen_gh.gh_gl, ip); gfs2_glock_put(io_gl); io_gl = NULL; @@ -202,14 +201,14 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, fail_refresh: ip->i_iopen_gh.gh_flags |= GL_NOCACHE; - glock_set_object(ip->i_iopen_gh.gh_gl, NULL); + glock_clear_object(ip->i_iopen_gh.gh_gl, ip); gfs2_glock_dq_uninit(&ip->i_iopen_gh); fail_put: if (io_gl) gfs2_glock_put(io_gl); + glock_clear_object(ip->i_gl, ip); if (gfs2_holder_initialized(&i_gh)) gfs2_glock_dq_uninit(&i_gh); - glock_set_object(ip->i_gl, NULL); fail: iget_failed(inode); return ERR_PTR(error); @@ -706,8 +705,9 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); if (error) goto fail_free_inode; - + flush_delayed_work(&ip->i_gl->gl_work); glock_set_object(ip->i_gl, ip); + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1); if (error) goto fail_free_inode; @@ -775,14 +775,17 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, return error; fail_gunlock3: + glock_clear_object(io_gl, ip); gfs2_glock_dq_uninit(&ip->i_iopen_gh); gfs2_glock_put(io_gl); fail_gunlock2: if (io_gl) clear_bit(GLF_INODE_CREATING, &io_gl->gl_flags); fail_free_inode: - if (ip->i_gl) + if (ip->i_gl) { + glock_clear_object(ip->i_gl, ip); gfs2_glock_put(ip->i_gl); + } gfs2_rsqa_delete(ip, NULL); fail_free_acls: if (default_acl) diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 0515f0a68637..65f33a0ac190 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -23,8 +23,6 @@ #include "sys.h" #include "trace_gfs2.h" -extern struct workqueue_struct *gfs2_control_wq; - /** * gfs2_update_stats - Update time based stats * @mv: Pointer to mean/variance structure to update @@ -1059,6 +1057,7 @@ static void free_recover_size(struct lm_lockstruct *ls) ls->ls_recover_submit = NULL; ls->ls_recover_result = NULL; ls->ls_recover_size = 0; + ls->ls_lvb_bits = NULL; } /* dlm calls before it does lock recovery */ @@ -1175,7 +1174,7 @@ static void gdlm_recovery_result(struct gfs2_sbd *sdp, unsigned int jid, spin_unlock(&ls->ls_recover_spin); } -const struct dlm_lockspace_ops gdlm_lockspace_ops = { +static const struct dlm_lockspace_ops gdlm_lockspace_ops = { .recover_prep = gdlm_recover_prep, .recover_slot = gdlm_recover_slot, .recover_done = gdlm_recover_done, diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 9a624f694400..f72c44231406 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -898,6 +898,10 @@ static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp) static inline int gfs2_ail_flush_reqd(struct gfs2_sbd *sdp) { unsigned int used_blocks = sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free); + + if (test_and_clear_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags)) + return 1; + return used_blocks + atomic_read(&sdp->sd_log_blks_needed) >= atomic_read(&sdp->sd_log_thresh2); } @@ -919,6 +923,15 @@ int gfs2_logd(void *data) while (!kthread_should_stop()) { + /* Check for errors writing to the journal */ + if (sdp->sd_log_error) { + gfs2_lm_withdraw(sdp, + "GFS2: fsid=%s: error %d: " + "withdrawing the file system to " + "prevent further damage.\n", + sdp->sd_fsname, sdp->sd_log_error); + } + did_flush = false; if (gfs2_jrnl_flush_reqd(sdp) || t == 0) { gfs2_ail1_empty(sdp); diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 3010f9edd177..7dabbe721dba 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -207,8 +207,11 @@ static void gfs2_end_log_write(struct bio *bio) struct page *page; int i; - if (bio->bi_status) - fs_err(sdp, "Error %d writing to log\n", bio->bi_status); + if (bio->bi_status) { + fs_err(sdp, "Error %d writing to journal, jid=%u\n", + bio->bi_status, sdp->sd_jdesc->jd_jid); + wake_up(&sdp->sd_logd_waitq); + } bio_for_each_segment_all(bvec, bio, i) { page = bvec->bv_page; diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index fabe1614f879..61ef6c9be816 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -419,8 +419,9 @@ int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num, if (ret == 0 && gfs2_metatype_check(sdp, bh, mtype)) { brelse(bh); ret = -EIO; + } else { + *bhp = bh; } - *bhp = bh; return ret; } @@ -452,7 +453,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen) if (buffer_uptodate(first_bh)) goto out; if (!buffer_locked(first_bh)) - ll_rw_block(REQ_OP_READ, REQ_META, 1, &first_bh); + ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1, &first_bh); dblock++; extlen--; @@ -461,7 +462,9 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen) bh = gfs2_getbuf(gl, dblock, CREATE); if (!buffer_uptodate(bh) && !buffer_locked(bh)) - ll_rw_block(REQ_OP_READ, REQ_RAHEAD | REQ_META, 1, &bh); + ll_rw_block(REQ_OP_READ, + REQ_RAHEAD | REQ_META | REQ_PRIO, + 1, &bh); brelse(bh); dblock++; extlen--; diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index e76058d34b74..c0a4b3778f3f 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1113,7 +1113,7 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent return error; } - snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s", sdp->sd_table_name); + snprintf(sdp->sd_fsname, sizeof(sdp->sd_fsname), "%s", sdp->sd_table_name); error = gfs2_sys_fs_add(sdp); /* @@ -1159,10 +1159,10 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent } if (sdp->sd_args.ar_spectator) - snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.s", + snprintf(sdp->sd_fsname, sizeof(sdp->sd_fsname), "%s.s", sdp->sd_table_name); else - snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u", + snprintf(sdp->sd_fsname, sizeof(sdp->sd_fsname), "%s.%u", sdp->sd_table_name, sdp->sd_lockstruct.ls_jid); error = init_inodes(sdp, DO); @@ -1388,7 +1388,6 @@ static void gfs2_kill_sb(struct super_block *sb) sdp->sd_root_dir = NULL; sdp->sd_master_dir = NULL; shrink_dcache_sb(sb); - gfs2_delete_debugfs_file(sdp); free_percpu(sdp->sd_lkstats); kill_block_super(sb); } diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index c2ca9566b764..e647938432bd 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -730,7 +730,7 @@ static int gfs2_write_buf_to_page(struct gfs2_inode *ip, unsigned long index, if (PageUptodate(page)) set_buffer_uptodate(bh); if (!buffer_uptodate(bh)) { - ll_rw_block(REQ_OP_READ, REQ_META, 1, &bh); + ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1, &bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) goto unlock_out; @@ -1474,8 +1474,11 @@ static void quotad_error(struct gfs2_sbd *sdp, const char *msg, int error) { if (error == 0 || error == -EROFS) return; - if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) + if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) { fs_err(sdp, "gfs2_quotad: %s error %d\n", msg, error); + sdp->sd_log_error = error; + wake_up(&sdp->sd_logd_waitq); + } } static void quotad_check_timeo(struct gfs2_sbd *sdp, const char *msg, diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 836e38ba5d0a..95b2a57ded33 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -705,8 +705,7 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) rb_erase(n, &sdp->sd_rindex_tree); if (gl) { - glock_set_object(gl, NULL); - gfs2_glock_add_to_lru(gl); + glock_clear_object(gl, rgd); gfs2_glock_put(gl); } diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index fdedec379b78..769841185ce5 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -924,6 +924,7 @@ restart: gfs2_jindex_free(sdp); /* Take apart glock structures and buffer lists */ gfs2_gl_hash_clear(sdp); + gfs2_delete_debugfs_file(sdp); /* Unmount the locking protocol */ gfs2_lm_unmount(sdp); @@ -943,9 +944,9 @@ static int gfs2_sync_fs(struct super_block *sb, int wait) struct gfs2_sbd *sdp = sb->s_fs_info; gfs2_quota_sync(sb, -1); - if (wait && sdp) + if (wait) gfs2_log_flush(sdp, NULL, NORMAL_FLUSH); - return 0; + return sdp->sd_log_error; } void gfs2_freeze_func(struct work_struct *work) @@ -1295,7 +1296,7 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) * gfs2_drop_inode - Drop an inode (test for remote unlink) * @inode: The inode to drop * - * If we've received a callback on an iopen lock then its because a + * If we've received a callback on an iopen lock then it's because a * remote node tried to deallocate the inode but failed due to this node * still having the inode open. Here we mark the link count zero * since we know that it must have reached zero if the GLF_DEMOTE flag @@ -1317,6 +1318,23 @@ static int gfs2_drop_inode(struct inode *inode) if (test_bit(GLF_DEMOTE, &gl->gl_flags)) clear_nlink(inode); } + + /* + * When under memory pressure when an inode's link count has dropped to + * zero, defer deleting the inode to the delete workqueue. This avoids + * calling into DLM under memory pressure, which can deadlock. + */ + if (!inode->i_nlink && + unlikely(current->flags & PF_MEMALLOC) && + gfs2_holder_initialized(&ip->i_iopen_gh)) { + struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; + + gfs2_glock_hold(gl); + if (queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0) + gfs2_glock_queue_put(gl); + return false; + } + return generic_drop_inode(inode); } @@ -1500,6 +1518,22 @@ out_qs: return error; } +/** + * gfs2_glock_put_eventually + * @gl: The glock to put + * + * When under memory pressure, trigger a deferred glock put to make sure we + * won't call into DLM and deadlock. Otherwise, put the glock directly. + */ + +static void gfs2_glock_put_eventually(struct gfs2_glock *gl) +{ + if (current->flags & PF_MEMALLOC) + gfs2_glock_queue_put(gl); + else + gfs2_glock_put(gl); +} + /** * gfs2_evict_inode - Remove an inode from cache * @inode: The inode to evict @@ -1544,9 +1578,14 @@ static void gfs2_evict_inode(struct inode *inode) goto alloc_failed; } + /* Deletes should never happen under memory pressure anymore. */ + if (WARN_ON_ONCE(current->flags & PF_MEMALLOC)) + goto out; + /* Must not read inode block until block type has been verified */ error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, &gh); if (unlikely(error)) { + glock_clear_object(ip->i_iopen_gh.gh_gl, ip); ip->i_iopen_gh.gh_flags |= GL_NOCACHE; gfs2_glock_dq_uninit(&ip->i_iopen_gh); goto out; @@ -1562,6 +1601,12 @@ static void gfs2_evict_inode(struct inode *inode) goto out_truncate; } + /* + * The inode may have been recreated in the meantime. + */ + if (inode->i_nlink) + goto out_truncate; + alloc_failed: if (gfs2_holder_initialized(&ip->i_iopen_gh) && test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) { @@ -1595,6 +1640,11 @@ alloc_failed: goto out_unlock; } + /* We're about to clear the bitmap for the dinode, but as soon as we + do, gfs2_create_inode can create another inode at the same block + location and try to set gl_object again. We clear gl_object here so + that subsequent inode creates don't see an old gl_object. */ + glock_clear_object(ip->i_gl, ip); error = gfs2_dinode_dealloc(ip); goto out_unlock; @@ -1623,14 +1673,17 @@ out_unlock: gfs2_rs_deltree(&ip->i_res); if (gfs2_holder_initialized(&ip->i_iopen_gh)) { + glock_clear_object(ip->i_iopen_gh.gh_gl, ip); if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) { ip->i_iopen_gh.gh_flags |= GL_NOCACHE; gfs2_glock_dq(&ip->i_iopen_gh); } gfs2_holder_uninit(&ip->i_iopen_gh); } - if (gfs2_holder_initialized(&gh)) + if (gfs2_holder_initialized(&gh)) { + glock_clear_object(ip->i_gl, ip); gfs2_glock_dq_uninit(&gh); + } if (error && error != GLR_TRYFAILED && error != -EROFS) fs_warn(sdp, "gfs2_evict_inode: %d\n", error); out: @@ -1640,15 +1693,19 @@ out: gfs2_ordered_del_inode(ip); clear_inode(inode); gfs2_dir_hash_inval(ip); - glock_set_object(ip->i_gl, NULL); + glock_clear_object(ip->i_gl, ip); wait_on_bit_io(&ip->i_flags, GIF_GLOP_PENDING, TASK_UNINTERRUPTIBLE); gfs2_glock_add_to_lru(ip->i_gl); - gfs2_glock_put(ip->i_gl); + gfs2_glock_put_eventually(ip->i_gl); ip->i_gl = NULL; if (gfs2_holder_initialized(&ip->i_iopen_gh)) { - glock_set_object(ip->i_iopen_gh.gh_gl, NULL); + struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; + + glock_clear_object(gl, ip); ip->i_iopen_gh.gh_flags |= GL_NOCACHE; + gfs2_glock_hold(gl); gfs2_glock_dq_uninit(&ip->i_iopen_gh); + gfs2_glock_put_eventually(gl); } } diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index c81295f407f6..3926f95a6eb7 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h @@ -151,6 +151,7 @@ extern struct kmem_cache *gfs2_rgrpd_cachep; extern struct kmem_cache *gfs2_quotad_cachep; extern struct kmem_cache *gfs2_qadata_cachep; extern mempool_t *gfs2_page_pool; +extern struct workqueue_struct *gfs2_control_wq; static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt, unsigned int *p) diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 54179554c7d2..ea09e41dbb49 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -25,6 +25,7 @@ #include "meta_io.h" #include "quota.h" #include "rgrp.h" +#include "super.h" #include "trans.h" #include "util.h" @@ -1209,8 +1210,12 @@ int __gfs2_xattr_set(struct inode *inode, const char *name, if (namel > GFS2_EA_MAX_NAME_LEN) return -ERANGE; - if (value == NULL) - return gfs2_xattr_remove(ip, type, name); + if (value == NULL) { + error = gfs2_xattr_remove(ip, type, name); + if (error == -ENODATA && !(flags & XATTR_REPLACE)) + error = 0; + return error; + } if (ea_check_size(sdp, namel, size)) return -ERANGE;