From 196328ec973a74ee52cc282824e72c3824dc1cf5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 Feb 2016 14:58:07 +1100 Subject: [PATCH 1/8] xfs: handle errors from ->free_blocks in xfs_btree_kill_iroot Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_btree.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index a0eb18ce3ad3..3143577930bd 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -3209,6 +3209,7 @@ xfs_btree_kill_iroot( int level; int index; int numrecs; + int error; #ifdef DEBUG union xfs_btree_ptr ptr; int i; @@ -3272,8 +3273,6 @@ xfs_btree_kill_iroot( cpp = xfs_btree_ptr_addr(cur, 1, cblock); #ifdef DEBUG for (i = 0; i < numrecs; i++) { - int error; - error = xfs_btree_check_ptr(cur, cpp, i, level - 1); if (error) { XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); @@ -3283,7 +3282,11 @@ xfs_btree_kill_iroot( #endif xfs_btree_copy_ptrs(cur, pp, cpp, numrecs); - cur->bc_ops->free_block(cur, cbp); + error = cur->bc_ops->free_block(cur, cbp); + if (error) { + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; + } XFS_BTREE_STATS_INC(cur, free); cur->bc_bufs[level - 1] = NULL; From c46ee8ad7856b58eeb383c30ce847897f85c4103 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 Feb 2016 14:58:07 +1100 Subject: [PATCH 2/8] xfs: factor btree block freeing into a helper Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_btree.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 3143577930bd..77afb4a899b9 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -294,6 +294,19 @@ xfs_btree_sblock_verify_crc( return true; } +static int +xfs_btree_free_block( + struct xfs_btree_cur *cur, + struct xfs_buf *bp) +{ + int error; + + error = cur->bc_ops->free_block(cur, bp); + if (!error) + XFS_BTREE_STATS_INC(cur, free); + return error; +} + /* * Delete the btree cursor. */ @@ -3282,12 +3295,11 @@ xfs_btree_kill_iroot( #endif xfs_btree_copy_ptrs(cur, pp, cpp, numrecs); - error = cur->bc_ops->free_block(cur, cbp); + error = xfs_btree_free_block(cur, cbp); if (error) { XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); return error; } - XFS_BTREE_STATS_INC(cur, free); cur->bc_bufs[level - 1] = NULL; be16_add_cpu(&block->bb_level, -1); @@ -3320,14 +3332,12 @@ xfs_btree_kill_root( */ cur->bc_ops->set_root(cur, newroot, -1); - error = cur->bc_ops->free_block(cur, bp); + error = xfs_btree_free_block(cur, bp); if (error) { XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); return error; } - XFS_BTREE_STATS_INC(cur, free); - cur->bc_bufs[level] = NULL; cur->bc_ra[level] = 0; cur->bc_nlevels--; @@ -3833,10 +3843,9 @@ xfs_btree_delrec( } /* Free the deleted block. */ - error = cur->bc_ops->free_block(cur, rbp); + error = xfs_btree_free_block(cur, rbp); if (error) goto error0; - XFS_BTREE_STATS_INC(cur, free); /* * If we joined with the left neighbor, set the buffer in the From edfd9dd549212a0923c9b5b142275dc88912abfa Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 Feb 2016 14:58:07 +1100 Subject: [PATCH 3/8] xfs: move buffer invalidation to xfs_btree_free_block ... instead of leaving it in the methods. Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_alloc_btree.c | 2 -- fs/xfs/libxfs/xfs_bmap_btree.c | 1 - fs/xfs/libxfs/xfs_btree.c | 4 +++- fs/xfs/libxfs/xfs_ialloc_btree.c | 12 ++---------- 4 files changed, 5 insertions(+), 14 deletions(-) diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index 444626ddbd1b..d9b42425291e 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -118,8 +118,6 @@ xfs_allocbt_free_block( xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1, XFS_EXTENT_BUSY_SKIP_DISCARD); xfs_trans_agbtree_delta(cur->bc_tp, -1); - - xfs_trans_binval(cur->bc_tp, bp); return 0; } diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index 1637c37bfbaa..e37508ae589b 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -531,7 +531,6 @@ xfs_bmbt_free_block( xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); - xfs_trans_binval(tp, bp); return 0; } diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 77afb4a899b9..1f88e1ce770f 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -302,8 +302,10 @@ xfs_btree_free_block( int error; error = cur->bc_ops->free_block(cur, bp); - if (!error) + if (!error) { + xfs_trans_binval(cur->bc_tp, bp); XFS_BTREE_STATS_INC(cur, free); + } return error; } diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index c679f3c05b63..89c21d771e35 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -125,16 +125,8 @@ xfs_inobt_free_block( struct xfs_btree_cur *cur, struct xfs_buf *bp) { - xfs_fsblock_t fsbno; - int error; - - fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)); - error = xfs_free_extent(cur->bc_tp, fsbno, 1); - if (error) - return error; - - xfs_trans_binval(cur->bc_tp, bp); - return error; + return xfs_free_extent(cur->bc_tp, + XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1); } STATIC int From de0b85a8cf24f8c535b7f719c454e3951298d17a Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 8 Feb 2016 14:58:07 +1100 Subject: [PATCH 4/8] xfs: remove unused function definitions Old leftovers. Signed-off-by: Eric Sandeen Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_sb.h | 1 - fs/xfs/xfs_fsops.h | 1 - fs/xfs/xfs_mount.h | 1 - 3 files changed, 3 deletions(-) diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h index b25bb9a343f3..961e6475a309 100644 --- a/fs/xfs/libxfs/xfs_sb.h +++ b/fs/xfs/libxfs/xfs_sb.h @@ -27,7 +27,6 @@ extern struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *, xfs_agnumber_t, extern void xfs_perag_put(struct xfs_perag *pag); extern int xfs_initialize_perag_data(struct xfs_mount *, xfs_agnumber_t); -extern void xfs_sb_calc_crc(struct xfs_buf *bp); extern void xfs_log_sb(struct xfs_trans *tp); extern int xfs_sync_sb(struct xfs_mount *mp, bool wait); extern void xfs_sb_mount_common(struct xfs_mount *mp, struct xfs_sb *sbp); diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h index 1b6a98b66886..f32713f14f9a 100644 --- a/fs/xfs/xfs_fsops.h +++ b/fs/xfs/xfs_fsops.h @@ -25,6 +25,5 @@ extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt); extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval, xfs_fsop_resblks_t *outval); extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags); -extern int xfs_fs_log_dummy(struct xfs_mount *mp); #endif /* __XFS_FSOPS_H__ */ diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index b57098481c10..a4e03ab50342 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -327,7 +327,6 @@ extern int xfs_mod_fdblocks(struct xfs_mount *mp, int64_t delta, bool reserved); extern int xfs_mod_frextents(struct xfs_mount *mp, int64_t delta); -extern int xfs_mount_log_sb(xfs_mount_t *); extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int); extern int xfs_readsb(xfs_mount_t *, int); extern void xfs_freesb(xfs_mount_t *); From 18f1df4e00cea2eae41f3e5515b94d1e7127b2b6 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Mon, 8 Feb 2016 14:59:07 +1100 Subject: [PATCH 5/8] xfs: Make xfsaild freezeable again Hendik has reported suspend failures due to xfsaild blocking the freezer to settle down. Jan 17 19:59:56 linux-6380 kernel: PM: Syncing filesystems ... done. Jan 17 19:59:56 linux-6380 kernel: PM: Preparing system for sleep (mem) Jan 17 19:59:56 linux-6380 kernel: Freezing user space processes ... (elapsed 0.001 seconds) done. Jan 17 19:59:56 linux-6380 kernel: Freezing remaining freezable tasks ... Jan 17 19:59:56 linux-6380 kernel: Freezing of tasks failed after 20.002 seconds (1 tasks refusing to freeze, wq_busy=0): Jan 17 19:59:56 linux-6380 kernel: xfsaild/dm-5 S 00000000 0 1293 2 0x00000080 Jan 17 19:59:56 linux-6380 kernel: f0ef5f00 00000046 00000200 00000000 ffff9022 c02d3800 00000000 00000032 Jan 17 19:59:56 linux-6380 kernel: ee0b2400 00000032 f71e0d00 f36fabc0 f0ef2d00 f0ef6000 f0ef2d00 f12f90c0 Jan 17 19:59:56 linux-6380 kernel: f0ef5f0c c0844e44 00000000 f0ef5f6c f811e0be 00000000 00000000 f0ef2d00 Jan 17 19:59:56 linux-6380 kernel: Call Trace: Jan 17 19:59:56 linux-6380 kernel: [] schedule+0x34/0x90 Jan 17 19:59:56 linux-6380 kernel: [] xfsaild+0x5de/0x600 [xfs] Jan 17 19:59:56 linux-6380 kernel: [] kthread+0x9b/0xb0 Jan 17 19:59:56 linux-6380 kernel: [] ret_from_kernel_thread+0x21/0x38 The issue has been there for quite some time but it has been made visible by only by 24ba16bb3d49 ("xfs: clear PF_NOFREEZE for xfsaild kthread") because the suspend started seeing xfsaild. The above commit has missed that the !xfs_ail_min branch might call schedule with TASK_INTERRUPTIBLE without calling try_to_freeze so the pm suspend would wake up the kernel thread over and over again without any progress. What we want here is to use freezable_schedule instead to hide the thread from the suspend. While we are here also change schedule_timeout to freezable variant to prevent from spurious wakeups by suspend. [dchinner: re-add set_freezeable call so the freezer will account properly for this kthread. ] Reported-by: Hendrik Woltersdorf Signed-off-by: Michal Hocko Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_trans_ail.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 4f18fd92ca13..d6c9c3e9e02b 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -497,6 +497,7 @@ xfsaild( long tout = 0; /* milliseconds */ current->flags |= PF_MEMALLOC; + set_freezable(); while (!kthread_should_stop()) { if (tout && tout <= 20) @@ -519,14 +520,14 @@ xfsaild( if (!xfs_ail_min(ailp) && ailp->xa_target == ailp->xa_target_prev) { spin_unlock(&ailp->xa_lock); - schedule(); + freezable_schedule(); tout = 0; continue; } spin_unlock(&ailp->xa_lock); if (tout) - schedule_timeout(msecs_to_jiffies(tout)); + freezable_schedule_timeout(msecs_to_jiffies(tout)); __set_current_state(TASK_RUNNING); From 244efeafb65ad4d98cd0c9463631e3931d813a6e Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 8 Feb 2016 15:00:01 +1100 Subject: [PATCH 6/8] xfs: move struct xfs_attr_shortform to xfs_da_format.h Move the shortform attr structure definition to the same place as the other attribute structure definitions for consistency and also so that xfs/122 verifies the structure size. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_attr_sf.h | 16 ---------------- fs/xfs/libxfs/xfs_da_format.h | 16 ++++++++++++++++ fs/xfs/libxfs/xfs_inode_fork.c | 1 + 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr_sf.h b/fs/xfs/libxfs/xfs_attr_sf.h index 919756e3ba53..90928bbe693c 100644 --- a/fs/xfs/libxfs/xfs_attr_sf.h +++ b/fs/xfs/libxfs/xfs_attr_sf.h @@ -24,22 +24,6 @@ * Small attribute lists are packed as tightly as possible so as * to fit into the literal area of the inode. */ - -/* - * Entries are packed toward the top as tight as possible. - */ -typedef struct xfs_attr_shortform { - struct xfs_attr_sf_hdr { /* constant-structure header block */ - __be16 totsize; /* total bytes in shortform list */ - __u8 count; /* count of active entries */ - } hdr; - struct xfs_attr_sf_entry { - __uint8_t namelen; /* actual length of name (no NULL) */ - __uint8_t valuelen; /* actual length of value (no NULL) */ - __uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */ - __uint8_t nameval[1]; /* name & value bytes concatenated */ - } list[1]; /* variable sized array */ -} xfs_attr_shortform_t; typedef struct xfs_attr_sf_hdr xfs_attr_sf_hdr_t; typedef struct xfs_attr_sf_entry xfs_attr_sf_entry_t; diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h index b14bbd6bb05f..8d4d8bce41bf 100644 --- a/fs/xfs/libxfs/xfs_da_format.h +++ b/fs/xfs/libxfs/xfs_da_format.h @@ -641,6 +641,22 @@ xfs_dir2_block_leaf_p(struct xfs_dir2_block_tail *btp) */ #define XFS_ATTR_LEAF_MAPSIZE 3 /* how many freespace slots */ +/* + * Entries are packed toward the top as tight as possible. + */ +typedef struct xfs_attr_shortform { + struct xfs_attr_sf_hdr { /* constant-structure header block */ + __be16 totsize; /* total bytes in shortform list */ + __u8 count; /* count of active entries */ + } hdr; + struct xfs_attr_sf_entry { + __uint8_t namelen; /* actual length of name (no NULL) */ + __uint8_t valuelen; /* actual length of value (no NULL) */ + __uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */ + __uint8_t nameval[1]; /* name & value bytes concatenated */ + } list[1]; /* variable sized array */ +} xfs_attr_shortform_t; + typedef struct xfs_attr_leaf_map { /* RLE map of free bytes */ __be16 base; /* base of free region */ __be16 size; /* length of free region */ diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index 0defbd02f62d..ef22a78fb569 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -31,6 +31,7 @@ #include "xfs_error.h" #include "xfs_trace.h" #include "xfs_attr_sf.h" +#include "xfs_da_format.h" kmem_zone_t *xfs_ifork_zone; From 60630fe66ed28d43379382645ed349f7d3457330 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Mon, 8 Feb 2016 15:00:02 +1100 Subject: [PATCH 7/8] xfs: clean up unwritten buffers on write failure The xfs_vm_write_failed() handler is currently responsible for cleaning up any delalloc blocks over the range of a failed write beyond EOF. Failure to do so results in warning messages and other inconsistencies between buffer and extent state. The ->releasepage() handler currently warns in the event of a page being released with either unwritten or delalloc buffers, as neither is ever expected by the time a page is released. As has been reproduced by generic/083 on a -bsize=1k fs, it is currently possible to trigger the ->releasepage() warning for a page with unwritten buffers when a filesystem is near ENOSPC. This is reproduced by the following sequence: $ mkfs.xfs -f -b size=1k -d size=100m $ mount /mnt/ $ $ xfs_io -fc "falloc -k 0 1k" /mnt/file $ dd if=/dev/zero of=/mnt/enospc conv=notrunc oflag=append $ $ xfs_io -c "pwrite 512 1k" /mnt/file $ xfs_io -d -c "pwrite 16k 1k" /mnt/file The first pwrite command attempts a block unaligned write across an unwritten block and a hole. The delalloc for the hole fails with ENOSPC and the subsequent error handling does not clean up the unwritten buffer that was instantiated during the first ->get_block() call. The second pwrite triggers a warning as part of the inode mapping invalidation that occurs prior to direct I/O. The releasepage() handler detects the unwritten buffer at this time, warns and prevents the release of the page. To deal with this problem, update xfs_vm_write_failed() to clean up unwritten as well as delalloc buffers that are beyond EOF and within the range of the failed write. Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_aops.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 379c089fb051..4a13f5311a41 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -1783,14 +1783,22 @@ xfs_vm_write_failed( if (block_start >= to) break; - if (!buffer_delay(bh)) + /* + * Process delalloc and unwritten buffers beyond EOF. We can + * encounter unwritten buffers in the event that a file has + * post-EOF unwritten extents and an extending write happens to + * fail (e.g., an unaligned write that also involves a delalloc + * to the same page). + */ + if (!buffer_delay(bh) && !buffer_unwritten(bh)) continue; if (!buffer_new(bh) && block_offset < i_size_read(inode)) continue; - xfs_vm_kill_delalloc_range(inode, block_offset, - block_offset + bh->b_size); + if (buffer_delay(bh)) + xfs_vm_kill_delalloc_range(inode, block_offset, + block_offset + bh->b_size); /* * This buffer does not contain data anymore. make sure anyone @@ -1801,6 +1809,7 @@ xfs_vm_write_failed( clear_buffer_mapped(bh); clear_buffer_new(bh); clear_buffer_dirty(bh); + clear_buffer_unwritten(bh); } } From af055e37a91d215d7174d0b84c86795ca81086a7 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Mon, 8 Feb 2016 15:00:02 +1100 Subject: [PATCH 8/8] xfs: fix xfs_log_ticket leak in xfs_end_io() after fs shutdown If the filesystem has shut down, xfs_end_io() currently sets an error on the ioend and proceeds to ioend destruction. The ioend might contain a truncate transaction if the I/O extended the size of the file. This transaction is only cleaned up in xfs_setfilesize_ioend(), however, which is skipped in this case. This results in an xfs_log_ticket leak message when the associate cache slab is destroyed (e.g., on rmmod). This was originally reproduced by xfs/141 on a distro kernel. The problem is reproducible on an upstream kernel, but not easily detected in current upstream if the xfs_log_ticket cache happens to be merged with another cache. This can be reproduced more deterministically with the 'slab_nomerge' kernel boot option. Update xfs_end_io() to proceed with normal end I/O processing after an error is set on an ioend due to fs shutdown. The I/O type-based processing is already designed to handle an I/O error and ensure that the ioend is cleaned up correctly. Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_aops.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 4a13f5311a41..0c8dacea411e 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -214,10 +214,12 @@ xfs_end_io( struct xfs_inode *ip = XFS_I(ioend->io_inode); int error = 0; - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { + /* + * Set an error if the mount has shut down and proceed with end I/O + * processing so it can perform whatever cleanups are necessary. + */ + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) ioend->io_error = -EIO; - goto done; - } /* * For unwritten extents we need to issue transactions to convert a