From 6fe7b9901400152238e1b76198747f6716c78aad Mon Sep 17 00:00:00 2001 From: Matthew Bobrowski Date: Thu, 19 Sep 2019 15:32:44 -0700 Subject: [PATCH 1/2] iomap: split size and error for iomap_dio_rw ->end_io Modify the calling convention for the iomap_dio_rw ->end_io() callback. Rather than passing either dio->error or dio->size as the 'size' argument, instead pass both the dio->error and the dio->size value separately. In the instance that an error occurred during a write, we currently cannot determine whether any blocks have been allocated beyond the current EOF and data has subsequently been written to these blocks within the ->end_io() callback. As a result, we cannot judge whether we should take the truncate failed write path. Having both dio->error and dio->size will allow us to perform such checks within this callback. Signed-off-by: Matthew Bobrowski [hch: minor cleanups] Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Reviewed-by: Matthew Wilcox (Oracle) --- fs/iomap/direct-io.c | 9 +++------ fs/xfs/xfs_file.c | 8 +++++--- include/linux/iomap.h | 4 ++-- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index 10517cea9682..2ccf1c6460d4 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -77,13 +77,10 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio) loff_t offset = iocb->ki_pos; ssize_t ret; - if (dio->end_io) { - ret = dio->end_io(iocb, - dio->error ? dio->error : dio->size, - dio->flags); - } else { + if (dio->end_io) + ret = dio->end_io(iocb, dio->size, dio->error, dio->flags); + else ret = dio->error; - } if (likely(!ret)) { ret = dio->size; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 28101bbc0b78..74411296f6b5 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -369,21 +369,23 @@ static int xfs_dio_write_end_io( struct kiocb *iocb, ssize_t size, + int error, unsigned flags) { struct inode *inode = file_inode(iocb->ki_filp); struct xfs_inode *ip = XFS_I(inode); loff_t offset = iocb->ki_pos; unsigned int nofs_flag; - int error = 0; trace_xfs_end_io_direct_write(ip, offset, size); if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return -EIO; - if (size <= 0) - return size; + if (error) + return error; + if (!size) + return 0; /* * Capture amount written on completion as we can't reliably account diff --git a/include/linux/iomap.h b/include/linux/iomap.h index bc499ceae392..50bb746d2216 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -188,8 +188,8 @@ sector_t iomap_bmap(struct address_space *mapping, sector_t bno, */ #define IOMAP_DIO_UNWRITTEN (1 << 0) /* covers unwritten extent(s) */ #define IOMAP_DIO_COW (1 << 1) /* covers COW extent(s) */ -typedef int (iomap_dio_end_io_t)(struct kiocb *iocb, ssize_t ret, - unsigned flags); +typedef int (iomap_dio_end_io_t)(struct kiocb *iocb, ssize_t size, int error, + unsigned int flags); ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops, iomap_dio_end_io_t end_io); int iomap_dio_iopoll(struct kiocb *kiocb, bool spin); From 838c4f3d7515efe9d0e32c846fb5d102b6d8a29d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 19 Sep 2019 15:32:45 -0700 Subject: [PATCH 2/2] iomap: move the iomap_dio_rw ->end_io callback into a structure Add a new iomap_dio_ops structure that for now just contains the end_io handler. This avoid storing the function pointer in a mutable structure, which is a possible exploit vector for kernel code execution, and prepares for adding a submit_io handler that btrfs needs. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/iomap/direct-io.c | 21 ++++++++++----------- fs/xfs/xfs_file.c | 6 +++++- include/linux/iomap.h | 10 +++++++--- 3 files changed, 22 insertions(+), 15 deletions(-) diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index 2ccf1c6460d4..1fc28c2da279 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -24,7 +24,7 @@ struct iomap_dio { struct kiocb *iocb; - iomap_dio_end_io_t *end_io; + const struct iomap_dio_ops *dops; loff_t i_size; loff_t size; atomic_t ref; @@ -72,15 +72,14 @@ static void iomap_dio_submit_bio(struct iomap_dio *dio, struct iomap *iomap, static ssize_t iomap_dio_complete(struct iomap_dio *dio) { + const struct iomap_dio_ops *dops = dio->dops; struct kiocb *iocb = dio->iocb; struct inode *inode = file_inode(iocb->ki_filp); loff_t offset = iocb->ki_pos; - ssize_t ret; + ssize_t ret = dio->error; - if (dio->end_io) - ret = dio->end_io(iocb, dio->size, dio->error, dio->flags); - else - ret = dio->error; + if (dops && dops->end_io) + ret = dops->end_io(iocb, dio->size, ret, dio->flags); if (likely(!ret)) { ret = dio->size; @@ -98,9 +97,9 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio) * one is a pretty crazy thing to do, so we don't support it 100%. If * this invalidation fails, tough, the write still worked... * - * And this page cache invalidation has to be after dio->end_io(), as - * some filesystems convert unwritten extents to real allocations in - * end_io() when necessary, otherwise a racing buffer read would cache + * And this page cache invalidation has to be after ->end_io(), as some + * filesystems convert unwritten extents to real allocations in + * ->end_io() when necessary, otherwise a racing buffer read would cache * zeros from unwritten extents. */ if (!dio->error && @@ -393,7 +392,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, */ ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, - const struct iomap_ops *ops, iomap_dio_end_io_t end_io) + const struct iomap_ops *ops, const struct iomap_dio_ops *dops) { struct address_space *mapping = iocb->ki_filp->f_mapping; struct inode *inode = file_inode(iocb->ki_filp); @@ -418,7 +417,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, atomic_set(&dio->ref, 1); dio->size = 0; dio->i_size = i_size_read(inode); - dio->end_io = end_io; + dio->dops = dops; dio->error = 0; dio->flags = 0; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 74411296f6b5..21bd3d575aaa 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -442,6 +442,10 @@ out: return error; } +static const struct iomap_dio_ops xfs_dio_write_ops = { + .end_io = xfs_dio_write_end_io, +}; + /* * xfs_file_dio_aio_write - handle direct IO writes * @@ -542,7 +546,7 @@ xfs_file_dio_aio_write( } trace_xfs_file_direct_write(ip, count, iocb->ki_pos); - ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, xfs_dio_write_end_io); + ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, &xfs_dio_write_ops); /* * If unaligned, this is the only IO in-flight. If it has not yet diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 50bb746d2216..7aa5d6117936 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -188,10 +188,14 @@ sector_t iomap_bmap(struct address_space *mapping, sector_t bno, */ #define IOMAP_DIO_UNWRITTEN (1 << 0) /* covers unwritten extent(s) */ #define IOMAP_DIO_COW (1 << 1) /* covers COW extent(s) */ -typedef int (iomap_dio_end_io_t)(struct kiocb *iocb, ssize_t size, int error, - unsigned int flags); + +struct iomap_dio_ops { + int (*end_io)(struct kiocb *iocb, ssize_t size, int error, + unsigned flags); +}; + ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, - const struct iomap_ops *ops, iomap_dio_end_io_t end_io); + const struct iomap_ops *ops, const struct iomap_dio_ops *dops); int iomap_dio_iopoll(struct kiocb *kiocb, bool spin); #ifdef CONFIG_SWAP