1
0
Fork 0

xfs: fixes for 4.19-rc7

Update for 4.19-rc7 to fix numerous file clone and deduplication issues.
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQIcBAABAgAGBQJbvo1jAAoJEK3oKUf0dfodurwP/3OGvJo11XjE5Wuh5tbcSOUm
 duJdJvN+b0gx8Lb1IL2lHaBEt/Pw3PKi+KPx6d+pd47aflCYNMiU7I1kzejxvq9I
 TBVdhxffAmNBU02VU/qmhucMnKfpVr/UX19f0sHZcfXbZkpIuHrSpI25MNSYqbEs
 bRoMDkgbuu377hCJu6tnBAUm38z4iZdfJaGPVmJmuVMn8JJE8KA3Une/daxg0/HY
 zbCMWP3SGJwqx7SyyeurQSkRS/8GG3LgbnYOz0FlaHfBd4JVJscHOZU08nrYmMAN
 9SOowvahaPkDcyO8c6gRkyE6NIbOsb79718g+HTm4eqzyChnh+jnFTzitcTMuK2c
 vjblXZSDKnN/P4UaEEzIAnQ1Eew4WhLrKuBr+3fCr2bKTGfj0iiX36CjEgk1k+Df
 t1DV/Hj6me6crZpWO7GQZVLnDsiJBzaOsIgpYnB3vcJyof/cgm+5bfGFedDZFdpV
 XTh0oBN8B7oQztd4MvcfQOGXSktrMtq+6RomO8kv77mVtHFnjnroKHq/wIft2nyI
 rs7u7DFmCLyB9Lbm8aoeMPo4+0zxTp9385HSJ+XznHcjGXxkxIIGhhdU3omKxANa
 j3UQB1+VRC4lHSluUXOW7vH0j1tX3gaS3z/yJ+gfcAdizUmdp0h0rMx8c9VR8SEJ
 xv/fpbrcKHyBwuNvKtBm
 =yj1P
 -----END PGP SIGNATURE-----

Merge tag 'xfs-fixes-for-4.19-rc7' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Dave writes:
  "xfs: fixes for 4.19-rc7

   Update for 4.19-rc7 to fix numerous file clone and deduplication issues."

* tag 'xfs-fixes-for-4.19-rc7' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  xfs: fix data corruption w/ unaligned reflink ranges
  xfs: fix data corruption w/ unaligned dedupe ranges
  xfs: update ctime and remove suid before cloning files
  xfs: zero posteof blocks when cloning above eof
  xfs: refactor clonerange preparation into a separate helper
hifive-unleashed-5.1
Greg Kroah-Hartman 2018-10-11 07:17:42 +02:00
commit 4718dcad7d
1 changed files with 165 additions and 35 deletions

View File

@ -1220,35 +1220,92 @@ retry:
return 0;
}
/* Unlock both inodes after they've been prepped for a range clone. */
STATIC void
xfs_reflink_remap_unlock(
struct file *file_in,
struct file *file_out)
{
struct inode *inode_in = file_inode(file_in);
struct xfs_inode *src = XFS_I(inode_in);
struct inode *inode_out = file_inode(file_out);
struct xfs_inode *dest = XFS_I(inode_out);
bool same_inode = (inode_in == inode_out);
xfs_iunlock(dest, XFS_MMAPLOCK_EXCL);
if (!same_inode)
xfs_iunlock(src, XFS_MMAPLOCK_SHARED);
inode_unlock(inode_out);
if (!same_inode)
inode_unlock_shared(inode_in);
}
/*
* Link a range of blocks from one file to another.
* If we're reflinking to a point past the destination file's EOF, we must
* zero any speculative post-EOF preallocations that sit between the old EOF
* and the destination file offset.
*/
int
xfs_reflink_remap_range(
static int
xfs_reflink_zero_posteof(
struct xfs_inode *ip,
loff_t pos)
{
loff_t isize = i_size_read(VFS_I(ip));
if (pos <= isize)
return 0;
trace_xfs_zero_eof(ip, isize, pos - isize);
return iomap_zero_range(VFS_I(ip), isize, pos - isize, NULL,
&xfs_iomap_ops);
}
/*
* Prepare two files for range cloning. Upon a successful return both inodes
* will have the iolock and mmaplock held, the page cache of the out file will
* be truncated, and any leases on the out file will have been broken. This
* function borrows heavily from xfs_file_aio_write_checks.
*
* The VFS allows partial EOF blocks to "match" for dedupe even though it hasn't
* checked that the bytes beyond EOF physically match. Hence we cannot use the
* EOF block in the source dedupe range because it's not a complete block match,
* hence can introduce a corruption into the file that has it's block replaced.
*
* In similar fashion, the VFS file cloning also allows partial EOF blocks to be
* "block aligned" for the purposes of cloning entire files. However, if the
* source file range includes the EOF block and it lands within the existing EOF
* of the destination file, then we can expose stale data from beyond the source
* file EOF in the destination file.
*
* XFS doesn't support partial block sharing, so in both cases we have check
* these cases ourselves. For dedupe, we can simply round the length to dedupe
* down to the previous whole block and ignore the partial EOF block. While this
* means we can't dedupe the last block of a file, this is an acceptible
* tradeoff for simplicity on implementation.
*
* For cloning, we want to share the partial EOF block if it is also the new EOF
* block of the destination file. If the partial EOF block lies inside the
* existing destination EOF, then we have to abort the clone to avoid exposing
* stale data in the destination file. Hence we reject these clone attempts with
* -EINVAL in this case.
*/
STATIC int
xfs_reflink_remap_prep(
struct file *file_in,
loff_t pos_in,
struct file *file_out,
loff_t pos_out,
u64 len,
u64 *len,
bool is_dedupe)
{
struct inode *inode_in = file_inode(file_in);
struct xfs_inode *src = XFS_I(inode_in);
struct inode *inode_out = file_inode(file_out);
struct xfs_inode *dest = XFS_I(inode_out);
struct xfs_mount *mp = src->i_mount;
bool same_inode = (inode_in == inode_out);
xfs_fileoff_t sfsbno, dfsbno;
xfs_filblks_t fsblen;
xfs_extlen_t cowextsize;
u64 blkmask = i_blocksize(inode_in) - 1;
ssize_t ret;
if (!xfs_sb_version_hasreflink(&mp->m_sb))
return -EOPNOTSUPP;
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
/* Lock both files against IO */
ret = xfs_iolock_two_inodes_and_break_layout(inode_in, inode_out);
if (ret)
@ -1270,33 +1327,115 @@ xfs_reflink_remap_range(
goto out_unlock;
ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out,
&len, is_dedupe);
len, is_dedupe);
if (ret <= 0)
goto out_unlock;
/*
* If the dedupe data matches, chop off the partial EOF block
* from the source file so we don't try to dedupe the partial
* EOF block.
*/
if (is_dedupe) {
*len &= ~blkmask;
} else if (*len & blkmask) {
/*
* The user is attempting to share a partial EOF block,
* if it's inside the destination EOF then reject it.
*/
if (pos_out + *len < i_size_read(inode_out)) {
ret = -EINVAL;
goto out_unlock;
}
}
/* Attach dquots to dest inode before changing block map */
ret = xfs_qm_dqattach(dest);
if (ret)
goto out_unlock;
trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
/*
* Clear out post-eof preallocations because we don't have page cache
* backing the delayed allocations and they'll never get freed on
* their own.
* Zero existing post-eof speculative preallocations in the destination
* file.
*/
if (xfs_can_free_eofblocks(dest, true)) {
ret = xfs_free_eofblocks(dest);
if (ret)
goto out_unlock;
}
ret = xfs_reflink_zero_posteof(dest, pos_out);
if (ret)
goto out_unlock;
/* Set flags and remap blocks. */
ret = xfs_reflink_set_inode_flag(src, dest);
if (ret)
goto out_unlock;
/* Zap any page cache for the destination file's range. */
truncate_inode_pages_range(&inode_out->i_data, pos_out,
PAGE_ALIGN(pos_out + *len) - 1);
/* If we're altering the file contents... */
if (!is_dedupe) {
/*
* ...update the timestamps (which will grab the ilock again
* from xfs_fs_dirty_inode, so we have to call it before we
* take the ilock).
*/
if (!(file_out->f_mode & FMODE_NOCMTIME)) {
ret = file_update_time(file_out);
if (ret)
goto out_unlock;
}
/*
* ...clear the security bits if the process is not being run
* by root. This keeps people from modifying setuid and setgid
* binaries.
*/
ret = file_remove_privs(file_out);
if (ret)
goto out_unlock;
}
return 1;
out_unlock:
xfs_reflink_remap_unlock(file_in, file_out);
return ret;
}
/*
* Link a range of blocks from one file to another.
*/
int
xfs_reflink_remap_range(
struct file *file_in,
loff_t pos_in,
struct file *file_out,
loff_t pos_out,
u64 len,
bool is_dedupe)
{
struct inode *inode_in = file_inode(file_in);
struct xfs_inode *src = XFS_I(inode_in);
struct inode *inode_out = file_inode(file_out);
struct xfs_inode *dest = XFS_I(inode_out);
struct xfs_mount *mp = src->i_mount;
xfs_fileoff_t sfsbno, dfsbno;
xfs_filblks_t fsblen;
xfs_extlen_t cowextsize;
ssize_t ret;
if (!xfs_sb_version_hasreflink(&mp->m_sb))
return -EOPNOTSUPP;
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
/* Prepare and then clone file data. */
ret = xfs_reflink_remap_prep(file_in, pos_in, file_out, pos_out,
&len, is_dedupe);
if (ret <= 0)
return ret;
trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
dfsbno = XFS_B_TO_FSBT(mp, pos_out);
sfsbno = XFS_B_TO_FSBT(mp, pos_in);
fsblen = XFS_B_TO_FSB(mp, len);
@ -1305,10 +1444,6 @@ xfs_reflink_remap_range(
if (ret)
goto out_unlock;
/* Zap any page cache for the destination file's range. */
truncate_inode_pages_range(&inode_out->i_data, pos_out,
PAGE_ALIGN(pos_out + len) - 1);
/*
* Carry the cowextsize hint from src to dest if we're sharing the
* entire source file to the entire destination file, the source file
@ -1325,12 +1460,7 @@ xfs_reflink_remap_range(
is_dedupe);
out_unlock:
xfs_iunlock(dest, XFS_MMAPLOCK_EXCL);
if (!same_inode)
xfs_iunlock(src, XFS_MMAPLOCK_SHARED);
inode_unlock(inode_out);
if (!same_inode)
inode_unlock_shared(inode_in);
xfs_reflink_remap_unlock(file_in, file_out);
if (ret)
trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
return ret;