ext4: move inode extension/truncate code out from ->iomap_end() callback

In preparation for implementing the iomap direct I/O modifications,
the inode extension/truncate code needs to be moved out from the
ext4_iomap_end() callback. For direct I/O, if the current code
remained, it would behave incorrrectly. Updating the inode size prior
to converting unwritten extents would potentially allow a racing
direct I/O read to find unwritten extents before being converted
correctly.

The inode extension/truncate code now resides within a new helper
ext4_handle_inode_extension(). This function has been designed so that
it can accommodate for both DAX and direct I/O extension/truncate
operations.

Signed-off-by: Matthew Bobrowski <mbobrowski@mbobrowski.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Ritesh Harjani <riteshh@linux.ibm.com>
Link: https://lore.kernel.org/r/d41ffa26e20b15b12895812c3cad7c91a6a59bc6.1572949325.git.mbobrowski@mbobrowski.org
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
This commit is contained in:
Matthew Bobrowski 2019-11-05 23:01:51 +11:00 committed by Theodore Ts'o
parent b1b4705d54
commit 569342dc24
2 changed files with 89 additions and 48 deletions

View file

@ -33,6 +33,7 @@
#include "ext4_jbd2.h"
#include "xattr.h"
#include "acl.h"
#include "truncate.h"
static bool ext4_dio_supported(struct inode *inode)
{
@ -234,12 +235,95 @@ static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from)
return iov_iter_count(from);
}
static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset,
ssize_t written, size_t count)
{
handle_t *handle;
bool truncate = false;
u8 blkbits = inode->i_blkbits;
ext4_lblk_t written_blk, end_blk;
/*
* Note that EXT4_I(inode)->i_disksize can get extended up to
* inode->i_size while the I/O was running due to writeback of delalloc
* blocks. But, the code in ext4_iomap_alloc() is careful to use
* zeroed/unwritten extents if this is possible; thus we won't leave
* uninitialized blocks in a file even if we didn't succeed in writing
* as much as we intended.
*/
WARN_ON_ONCE(i_size_read(inode) < EXT4_I(inode)->i_disksize);
if (offset + count <= EXT4_I(inode)->i_disksize) {
/*
* We need to ensure that the inode is removed from the orphan
* list if it has been added prematurely, due to writeback of
* delalloc blocks.
*/
if (!list_empty(&EXT4_I(inode)->i_orphan) && inode->i_nlink) {
handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
if (IS_ERR(handle)) {
ext4_orphan_del(NULL, inode);
return PTR_ERR(handle);
}
ext4_orphan_del(handle, inode);
ext4_journal_stop(handle);
}
return written;
}
if (written < 0)
goto truncate;
handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
if (IS_ERR(handle)) {
written = PTR_ERR(handle);
goto truncate;
}
if (ext4_update_inode_size(inode, offset + written))
ext4_mark_inode_dirty(handle, inode);
/*
* We may need to truncate allocated but not written blocks beyond EOF.
*/
written_blk = ALIGN(offset + written, 1 << blkbits);
end_blk = ALIGN(offset + count, 1 << blkbits);
if (written_blk < end_blk && ext4_can_truncate(inode))
truncate = true;
/*
* Remove the inode from the orphan list if it has been extended and
* everything went OK.
*/
if (!truncate && inode->i_nlink)
ext4_orphan_del(handle, inode);
ext4_journal_stop(handle);
if (truncate) {
truncate:
ext4_truncate_failed_write(inode);
/*
* If the truncate operation failed early, then the inode may
* still be on the orphan list. In that case, we need to try
* remove the inode from the in-memory linked list.
*/
if (inode->i_nlink)
ext4_orphan_del(NULL, inode);
}
return written;
}
#ifdef CONFIG_FS_DAX
static ssize_t
ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct inode *inode = file_inode(iocb->ki_filp);
ssize_t ret;
size_t count;
loff_t offset;
struct inode *inode = file_inode(iocb->ki_filp);
if (!inode_trylock(inode)) {
if (iocb->ki_flags & IOCB_NOWAIT)
@ -256,7 +340,10 @@ ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (ret)
goto out;
offset = iocb->ki_pos;
count = iov_iter_count(from);
ret = dax_iomap_rw(iocb, from, &ext4_iomap_ops);
ret = ext4_handle_inode_extension(inode, offset, ret, count);
out:
inode_unlock(inode);
if (ret > 0)

View file

@ -3583,53 +3583,7 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length,
ssize_t written, unsigned flags, struct iomap *iomap)
{
int ret = 0;
handle_t *handle;
int blkbits = inode->i_blkbits;
bool truncate = false;
if (!(flags & IOMAP_WRITE) || (flags & IOMAP_FAULT))
return 0;
handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
goto orphan_del;
}
if (ext4_update_inode_size(inode, offset + written))
ext4_mark_inode_dirty(handle, inode);
/*
* We may need to truncate allocated but not written blocks beyond EOF.
*/
if (iomap->offset + iomap->length >
ALIGN(inode->i_size, 1 << blkbits)) {
ext4_lblk_t written_blk, end_blk;
written_blk = (offset + written) >> blkbits;
end_blk = (offset + length) >> blkbits;
if (written_blk < end_blk && ext4_can_truncate(inode))
truncate = true;
}
/*
* Remove inode from orphan list if we were extending a inode and
* everything went fine.
*/
if (!truncate && inode->i_nlink &&
!list_empty(&EXT4_I(inode)->i_orphan))
ext4_orphan_del(handle, inode);
ext4_journal_stop(handle);
if (truncate) {
ext4_truncate_failed_write(inode);
orphan_del:
/*
* If truncate failed early the inode might still be on the
* orphan list; we need to make sure the inode is removed from
* the orphan list in that case.
*/
if (inode->i_nlink)
ext4_orphan_del(NULL, inode);
}
return ret;
return 0;
}
const struct iomap_ops ext4_iomap_ops = {