From 2ba66237029d1ad6c1a5e2241b0ffbbfff55f750 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 2 Feb 2015 10:02:09 +1100 Subject: [PATCH] xfs: don't allocate an ioend for direct I/O completions Back in the days when the direct I/O ->end_io callback could be called from interrupt context for AIO we needed a structure to hand off to the workqueue, and reused the ioend structure for this purpose. These days ->end_io is always called from user or workqueue context, which allows us to avoid this memory allocation and simplify the code significantly. [dchinner: removed now unused xfs_finish_ioend_sync() function after Brian Foster did an initial review. ] Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_aops.c | 147 +++++++++++++++++++--------------------------- fs/xfs/xfs_aops.h | 3 - 2 files changed, 60 insertions(+), 90 deletions(-) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 18e2f3bbae5e..3a9b7a1b8704 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -135,30 +135,22 @@ xfs_setfilesize_trans_alloc( */ STATIC int xfs_setfilesize( - struct xfs_ioend *ioend) + struct xfs_inode *ip, + struct xfs_trans *tp, + xfs_off_t offset, + size_t size) { - struct xfs_inode *ip = XFS_I(ioend->io_inode); - struct xfs_trans *tp = ioend->io_append_trans; xfs_fsize_t isize; - /* - * The transaction may have been allocated in the I/O submission thread, - * thus we need to mark ourselves as beeing in a transaction manually. - * Similarly for freeze protection. - */ - current_set_flags_nested(&tp->t_pflags, PF_FSTRANS); - rwsem_acquire_read(&VFS_I(ip)->i_sb->s_writers.lock_map[SB_FREEZE_FS-1], - 0, 1, _THIS_IP_); - xfs_ilock(ip, XFS_ILOCK_EXCL); - isize = xfs_new_eof(ip, ioend->io_offset + ioend->io_size); + isize = xfs_new_eof(ip, offset + size); if (!isize) { xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_trans_cancel(tp, 0); return 0; } - trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size); + trace_xfs_setfilesize(ip, offset, size); ip->i_d.di_size = isize; xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); @@ -167,6 +159,25 @@ xfs_setfilesize( return xfs_trans_commit(tp, 0); } +STATIC int +xfs_setfilesize_ioend( + struct xfs_ioend *ioend) +{ + struct xfs_inode *ip = XFS_I(ioend->io_inode); + struct xfs_trans *tp = ioend->io_append_trans; + + /* + * The transaction may have been allocated in the I/O submission thread, + * thus we need to mark ourselves as being in a transaction manually. + * Similarly for freeze protection. + */ + current_set_flags_nested(&tp->t_pflags, PF_FSTRANS); + rwsem_acquire_read(&VFS_I(ip)->i_sb->s_writers.lock_map[SB_FREEZE_FS-1], + 0, 1, _THIS_IP_); + + return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size); +} + /* * Schedule IO completion handling on the final put of an ioend. * @@ -182,8 +193,7 @@ xfs_finish_ioend( if (ioend->io_type == XFS_IO_UNWRITTEN) queue_work(mp->m_unwritten_workqueue, &ioend->io_work); - else if (ioend->io_append_trans || - (ioend->io_isdirect && xfs_ioend_is_append(ioend))) + else if (ioend->io_append_trans) queue_work(mp->m_data_workqueue, &ioend->io_work); else xfs_destroy_ioend(ioend); @@ -215,22 +225,8 @@ xfs_end_io( if (ioend->io_type == XFS_IO_UNWRITTEN) { error = xfs_iomap_write_unwritten(ip, ioend->io_offset, ioend->io_size); - } else if (ioend->io_isdirect && xfs_ioend_is_append(ioend)) { - /* - * For direct I/O we do not know if we need to allocate blocks - * or not so we can't preallocate an append transaction as that - * results in nested reservations and log space deadlocks. Hence - * allocate the transaction here. While this is sub-optimal and - * can block IO completion for some time, we're stuck with doing - * it this way until we can pass the ioend to the direct IO - * allocation callbacks and avoid nesting that way. - */ - error = xfs_setfilesize_trans_alloc(ioend); - if (error) - goto done; - error = xfs_setfilesize(ioend); } else if (ioend->io_append_trans) { - error = xfs_setfilesize(ioend); + error = xfs_setfilesize_ioend(ioend); } else { ASSERT(!xfs_ioend_is_append(ioend)); } @@ -241,17 +237,6 @@ done: xfs_destroy_ioend(ioend); } -/* - * Call IO completion handling in caller context on the final put of an ioend. - */ -STATIC void -xfs_finish_ioend_sync( - struct xfs_ioend *ioend) -{ - if (atomic_dec_and_test(&ioend->io_remaining)) - xfs_end_io(&ioend->io_work); -} - /* * Allocate and initialise an IO completion structure. * We need to track unwritten extent write completion here initially. @@ -273,7 +258,6 @@ xfs_alloc_ioend( * all the I/O from calling the completion routine too early. */ atomic_set(&ioend->io_remaining, 1); - ioend->io_isdirect = 0; ioend->io_error = 0; ioend->io_list = NULL; ioend->io_type = type; @@ -1459,11 +1443,7 @@ xfs_get_blocks_direct( * * If the private argument is non-NULL __xfs_get_blocks signals us that we * need to issue a transaction to convert the range from unwritten to written - * extents. In case this is regular synchronous I/O we just call xfs_end_io - * to do this and we are done. But in case this was a successful AIO - * request this handler is called from interrupt context, from which we - * can't start transactions. In that case offload the I/O completion to - * the workqueues we also use for buffered I/O completion. + * extents. */ STATIC void xfs_end_io_direct_write( @@ -1472,7 +1452,12 @@ xfs_end_io_direct_write( ssize_t size, void *private) { - struct xfs_ioend *ioend = iocb->private; + struct inode *inode = file_inode(iocb->ki_filp); + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + + if (XFS_FORCED_SHUTDOWN(mp)) + return; /* * While the generic direct I/O code updates the inode size, it does @@ -1480,22 +1465,33 @@ xfs_end_io_direct_write( * end_io handler thinks the on-disk size is outside the in-core * size. To prevent this just update it a little bit earlier here. */ - if (offset + size > i_size_read(ioend->io_inode)) - i_size_write(ioend->io_inode, offset + size); + if (offset + size > i_size_read(inode)) + i_size_write(inode, offset + size); /* - * blockdev_direct_IO can return an error even after the I/O - * completion handler was called. Thus we need to protect - * against double-freeing. + * For direct I/O we do not know if we need to allocate blocks or not, + * so we can't preallocate an append transaction, as that results in + * nested reservations and log space deadlocks. Hence allocate the + * transaction here. While this is sub-optimal and can block IO + * completion for some time, we're stuck with doing it this way until + * we can pass the ioend to the direct IO allocation callbacks and + * avoid nesting that way. */ - iocb->private = NULL; + if (private && size > 0) { + xfs_iomap_write_unwritten(ip, offset, size); + } else if (offset + size > ip->i_d.di_size) { + struct xfs_trans *tp; + int error; - ioend->io_offset = offset; - ioend->io_size = size; - if (private && size > 0) - ioend->io_type = XFS_IO_UNWRITTEN; + tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0); + if (error) { + xfs_trans_cancel(tp, 0); + return; + } - xfs_finish_ioend_sync(ioend); + xfs_setfilesize(ip, tp, offset, size); + } } STATIC ssize_t @@ -1507,39 +1503,16 @@ xfs_vm_direct_IO( { struct inode *inode = iocb->ki_filp->f_mapping->host; struct block_device *bdev = xfs_find_bdev_for_inode(inode); - struct xfs_ioend *ioend = NULL; - ssize_t ret; if (rw & WRITE) { - size_t size = iov_iter_count(iter); - - /* - * We cannot preallocate a size update transaction here as we - * don't know whether allocation is necessary or not. Hence we - * can only tell IO completion that one is necessary if we are - * not doing unwritten extent conversion. - */ - iocb->private = ioend = xfs_alloc_ioend(inode, XFS_IO_DIRECT); - if (offset + size > XFS_I(inode)->i_d.di_size) - ioend->io_isdirect = 1; - - ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter, + return __blockdev_direct_IO(rw, iocb, inode, bdev, iter, offset, xfs_get_blocks_direct, xfs_end_io_direct_write, NULL, DIO_ASYNC_EXTEND); - if (ret != -EIOCBQUEUED && iocb->private) - goto out_destroy_ioend; - } else { - ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter, - offset, xfs_get_blocks_direct, - NULL, NULL, 0); } - - return ret; - -out_destroy_ioend: - xfs_destroy_ioend(ioend); - return ret; + return __blockdev_direct_IO(rw, iocb, inode, bdev, iter, + offset, xfs_get_blocks_direct, + NULL, NULL, 0); } /* diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h index f94dd459dff9..ac644e0137a4 100644 --- a/fs/xfs/xfs_aops.h +++ b/fs/xfs/xfs_aops.h @@ -24,14 +24,12 @@ extern mempool_t *xfs_ioend_pool; * Types of I/O for bmap clustering and I/O completion tracking. */ enum { - XFS_IO_DIRECT = 0, /* special case for direct I/O ioends */ XFS_IO_DELALLOC, /* covers delalloc region */ XFS_IO_UNWRITTEN, /* covers allocated but uninitialized data */ XFS_IO_OVERWRITE, /* covers already allocated extent */ }; #define XFS_IO_TYPES \ - { 0, "" }, \ { XFS_IO_DELALLOC, "delalloc" }, \ { XFS_IO_UNWRITTEN, "unwritten" }, \ { XFS_IO_OVERWRITE, "overwrite" } @@ -45,7 +43,6 @@ typedef struct xfs_ioend { unsigned int io_type; /* delalloc / unwritten */ int io_error; /* I/O error code */ atomic_t io_remaining; /* hold count */ - unsigned int io_isdirect : 1;/* direct I/O */ struct inode *io_inode; /* file being written to */ struct buffer_head *io_buffer_head;/* buffer linked list head */ struct buffer_head *io_buffer_tail;/* buffer linked list tail */