alistair23-linux/fs/nfsd/blocklayout.c
Linus Torvalds 7a932516f5 vfs/y2038: inode timestamps conversion to timespec64
This is a late set of changes from Deepa Dinamani doing an automated
 treewide conversion of the inode and iattr structures from 'timespec'
 to 'timespec64', to push the conversion from the VFS layer into the
 individual file systems.
 
 There were no conflicts between this and the contents of linux-next
 until just before the merge window, when we saw multiple problems:
 
 - A minor conflict with my own y2038 fixes, which I could address
   by adding another patch on top here.
 - One semantic conflict with late changes to the NFS tree. I addressed
   this by merging Deepa's original branch on top of the changes that
   now got merged into mainline and making sure the merge commit includes
   the necessary changes as produced by coccinelle.
 - A trivial conflict against the removal of staging/lustre.
 - Multiple conflicts against the VFS changes in the overlayfs tree.
   These are still part of linux-next, but apparently this is no longer
   intended for 4.18 [1], so I am ignoring that part.
 
 As Deepa writes:
 
   The series aims to switch vfs timestamps to use struct timespec64.
   Currently vfs uses struct timespec, which is not y2038 safe.
 
   The series involves the following:
   1. Add vfs helper functions for supporting struct timepec64 timestamps.
   2. Cast prints of vfs timestamps to avoid warnings after the switch.
   3. Simplify code using vfs timestamps so that the actual
      replacement becomes easy.
   4. Convert vfs timestamps to use struct timespec64 using a script.
      This is a flag day patch.
 
   Next steps:
   1. Convert APIs that can handle timespec64, instead of converting
      timestamps at the boundaries.
   2. Update internal data structures to avoid timestamp conversions.
 
 Thomas Gleixner adds:
 
   I think there is no point to drag that out for the next merge window.
   The whole thing needs to be done in one go for the core changes which
   means that you're going to play that catchup game forever. Let's get
   over with it towards the end of the merge window.
 
 [1] https://www.spinics.net/lists/linux-fsdevel/msg128294.html
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQIcBAABAgAGBQJbInZAAAoJEGCrR//JCVInReoQAIlVIIMt5ZX6wmaKbrjy9Itf
 MfgbFihQ/djLnuSPVQ3nztcxF0d66BKHZ9puVjz6+mIHqfDvJTRwZs9nU+sOF/T1
 g78fRkM1cxq6ZCkGYAbzyjyo5aC4PnSMP/NQLmwqvi0MXqqrbDoq5ZdP9DHJw39h
 L9lD8FM/P7T29Fgp9tq/pT5l9X8VU8+s5KQG1uhB5hii4VL6pD6JyLElDita7rg+
 Z7/V7jkxIGEUWF7vGaiR1QTFzEtpUA/exDf9cnsf51OGtK/LJfQ0oiZPPuq3oA/E
 LSbt8YQQObc+dvfnGxwgxEg1k5WP5ekj/Wdibv/+rQKgGyLOTz6Q4xK6r8F2ahxs
 nyZQBdXqHhJYyKr1H1reUH3mrSgQbE5U5R1i3My0xV2dSn+vtK5vgF21v2Ku3A1G
 wJratdtF/kVBzSEQUhsYTw14Un+xhBLRWzcq0cELonqxaKvRQK9r92KHLIWNE7/v
 c0TmhFbkZA+zR8HdsaL3iYf1+0W/eYy8PcvepyldKNeW2pVk3CyvdTfY2Z87G2XK
 tIkK+BUWbG3drEGG3hxZ3757Ln3a9qWyC5ruD3mBVkuug/wekbI8PykYJS7Mx4s/
 WNXl0dAL0Eeu1M8uEJejRAe1Q3eXoMWZbvCYZc+wAm92pATfHVcKwPOh8P7NHlfy
 A3HkjIBrKW5AgQDxfgvm
 =CZX2
 -----END PGP SIGNATURE-----

Merge tag 'vfs-timespec64' of git://git.kernel.org/pub/scm/linux/kernel/git/arnd/playground

Pull inode timestamps conversion to timespec64 from Arnd Bergmann:
 "This is a late set of changes from Deepa Dinamani doing an automated
  treewide conversion of the inode and iattr structures from 'timespec'
  to 'timespec64', to push the conversion from the VFS layer into the
  individual file systems.

  As Deepa writes:

   'The series aims to switch vfs timestamps to use struct timespec64.
    Currently vfs uses struct timespec, which is not y2038 safe.

    The series involves the following:
    1. Add vfs helper functions for supporting struct timepec64
       timestamps.
    2. Cast prints of vfs timestamps to avoid warnings after the switch.
    3. Simplify code using vfs timestamps so that the actual replacement
       becomes easy.
    4. Convert vfs timestamps to use struct timespec64 using a script.
       This is a flag day patch.

    Next steps:
    1. Convert APIs that can handle timespec64, instead of converting
       timestamps at the boundaries.
    2. Update internal data structures to avoid timestamp conversions'

  Thomas Gleixner adds:

   'I think there is no point to drag that out for the next merge
    window. The whole thing needs to be done in one go for the core
    changes which means that you're going to play that catchup game
    forever. Let's get over with it towards the end of the merge window'"

* tag 'vfs-timespec64' of git://git.kernel.org/pub/scm/linux/kernel/git/arnd/playground:
  pstore: Remove bogus format string definition
  vfs: change inode times to use struct timespec64
  pstore: Convert internal records to timespec64
  udf: Simplify calls to udf_disk_stamp_to_time
  fs: nfs: get rid of memcpys for inode times
  ceph: make inode time prints to be long long
  lustre: Use long long type to print inode time
  fs: add timespec64_truncate()
2018-06-15 07:31:07 +09:00

435 lines
11 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2014-2016 Christoph Hellwig.
*/
#include <linux/exportfs.h>
#include <linux/iomap.h>
#include <linux/genhd.h>
#include <linux/slab.h>
#include <linux/pr.h>
#include <linux/nfsd/debug.h>
#include <scsi/scsi_proto.h>
#include <scsi/scsi_common.h>
#include <scsi/scsi_request.h>
#include "blocklayoutxdr.h"
#include "pnfs.h"
#define NFSDDBG_FACILITY NFSDDBG_PNFS
static __be32
nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
struct nfsd4_layoutget *args)
{
struct nfsd4_layout_seg *seg = &args->lg_seg;
struct super_block *sb = inode->i_sb;
u32 block_size = i_blocksize(inode);
struct pnfs_block_extent *bex;
struct iomap iomap;
u32 device_generation = 0;
int error;
if (seg->offset & (block_size - 1)) {
dprintk("pnfsd: I/O misaligned\n");
goto out_layoutunavailable;
}
/*
* Some clients barf on non-zero block numbers for NONE or INVALID
* layouts, so make sure to zero the whole structure.
*/
error = -ENOMEM;
bex = kzalloc(sizeof(*bex), GFP_KERNEL);
if (!bex)
goto out_error;
args->lg_content = bex;
error = sb->s_export_op->map_blocks(inode, seg->offset, seg->length,
&iomap, seg->iomode != IOMODE_READ,
&device_generation);
if (error) {
if (error == -ENXIO)
goto out_layoutunavailable;
goto out_error;
}
if (iomap.length < args->lg_minlength) {
dprintk("pnfsd: extent smaller than minlength\n");
goto out_layoutunavailable;
}
switch (iomap.type) {
case IOMAP_MAPPED:
if (seg->iomode == IOMODE_READ)
bex->es = PNFS_BLOCK_READ_DATA;
else
bex->es = PNFS_BLOCK_READWRITE_DATA;
bex->soff = iomap.addr;
break;
case IOMAP_UNWRITTEN:
if (seg->iomode & IOMODE_RW) {
/*
* Crack monkey special case from section 2.3.1.
*/
if (args->lg_minlength == 0) {
dprintk("pnfsd: no soup for you!\n");
goto out_layoutunavailable;
}
bex->es = PNFS_BLOCK_INVALID_DATA;
bex->soff = iomap.addr;
break;
}
/*FALLTHRU*/
case IOMAP_HOLE:
if (seg->iomode == IOMODE_READ) {
bex->es = PNFS_BLOCK_NONE_DATA;
break;
}
/*FALLTHRU*/
case IOMAP_DELALLOC:
default:
WARN(1, "pnfsd: filesystem returned %d extent\n", iomap.type);
goto out_layoutunavailable;
}
error = nfsd4_set_deviceid(&bex->vol_id, fhp, device_generation);
if (error)
goto out_error;
bex->foff = iomap.offset;
bex->len = iomap.length;
seg->offset = iomap.offset;
seg->length = iomap.length;
dprintk("GET: 0x%llx:0x%llx %d\n", bex->foff, bex->len, bex->es);
return 0;
out_error:
seg->length = 0;
return nfserrno(error);
out_layoutunavailable:
seg->length = 0;
return nfserr_layoutunavailable;
}
static __be32
nfsd4_block_commit_blocks(struct inode *inode, struct nfsd4_layoutcommit *lcp,
struct iomap *iomaps, int nr_iomaps)
{
loff_t new_size = lcp->lc_last_wr + 1;
struct iattr iattr = { .ia_valid = 0 };
struct timespec ts;
int error;
ts = timespec64_to_timespec(inode->i_mtime);
if (lcp->lc_mtime.tv_nsec == UTIME_NOW ||
timespec_compare(&lcp->lc_mtime, &ts) < 0)
lcp->lc_mtime = timespec64_to_timespec(current_time(inode));
iattr.ia_valid |= ATTR_ATIME | ATTR_CTIME | ATTR_MTIME;
iattr.ia_atime = iattr.ia_ctime = iattr.ia_mtime = timespec_to_timespec64(lcp->lc_mtime);
if (new_size > i_size_read(inode)) {
iattr.ia_valid |= ATTR_SIZE;
iattr.ia_size = new_size;
}
error = inode->i_sb->s_export_op->commit_blocks(inode, iomaps,
nr_iomaps, &iattr);
kfree(iomaps);
return nfserrno(error);
}
#ifdef CONFIG_NFSD_BLOCKLAYOUT
static int
nfsd4_block_get_device_info_simple(struct super_block *sb,
struct nfsd4_getdeviceinfo *gdp)
{
struct pnfs_block_deviceaddr *dev;
struct pnfs_block_volume *b;
dev = kzalloc(sizeof(struct pnfs_block_deviceaddr) +
sizeof(struct pnfs_block_volume), GFP_KERNEL);
if (!dev)
return -ENOMEM;
gdp->gd_device = dev;
dev->nr_volumes = 1;
b = &dev->volumes[0];
b->type = PNFS_BLOCK_VOLUME_SIMPLE;
b->simple.sig_len = PNFS_BLOCK_UUID_LEN;
return sb->s_export_op->get_uuid(sb, b->simple.sig, &b->simple.sig_len,
&b->simple.offset);
}
static __be32
nfsd4_block_proc_getdeviceinfo(struct super_block *sb,
struct svc_rqst *rqstp,
struct nfs4_client *clp,
struct nfsd4_getdeviceinfo *gdp)
{
if (sb->s_bdev != sb->s_bdev->bd_contains)
return nfserr_inval;
return nfserrno(nfsd4_block_get_device_info_simple(sb, gdp));
}
static __be32
nfsd4_block_proc_layoutcommit(struct inode *inode,
struct nfsd4_layoutcommit *lcp)
{
struct iomap *iomaps;
int nr_iomaps;
nr_iomaps = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout,
lcp->lc_up_len, &iomaps, i_blocksize(inode));
if (nr_iomaps < 0)
return nfserrno(nr_iomaps);
return nfsd4_block_commit_blocks(inode, lcp, iomaps, nr_iomaps);
}
const struct nfsd4_layout_ops bl_layout_ops = {
/*
* Pretend that we send notification to the client. This is a blatant
* lie to force recent Linux clients to cache our device IDs.
* We rarely ever change the device ID, so the harm of leaking deviceids
* for a while isn't too bad. Unfortunately RFC5661 is a complete mess
* in this regard, but I filed errata 4119 for this a while ago, and
* hopefully the Linux client will eventually start caching deviceids
* without this again.
*/
.notify_types =
NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE,
.proc_getdeviceinfo = nfsd4_block_proc_getdeviceinfo,
.encode_getdeviceinfo = nfsd4_block_encode_getdeviceinfo,
.proc_layoutget = nfsd4_block_proc_layoutget,
.encode_layoutget = nfsd4_block_encode_layoutget,
.proc_layoutcommit = nfsd4_block_proc_layoutcommit,
};
#endif /* CONFIG_NFSD_BLOCKLAYOUT */
#ifdef CONFIG_NFSD_SCSILAYOUT
static int nfsd4_scsi_identify_device(struct block_device *bdev,
struct pnfs_block_volume *b)
{
struct request_queue *q = bdev->bd_disk->queue;
struct request *rq;
struct scsi_request *req;
/*
* The allocation length (passed in bytes 3 and 4 of the INQUIRY
* command descriptor block) specifies the number of bytes that have
* been allocated for the data-in buffer.
* 252 is the highest one-byte value that is a multiple of 4.
* 65532 is the highest two-byte value that is a multiple of 4.
*/
size_t bufflen = 252, maxlen = 65532, len, id_len;
u8 *buf, *d, type, assoc;
int retries = 1, error;
if (WARN_ON_ONCE(!blk_queue_scsi_passthrough(q)))
return -EINVAL;
again:
buf = kzalloc(bufflen, GFP_KERNEL);
if (!buf)
return -ENOMEM;
rq = blk_get_request(q, REQ_OP_SCSI_IN, 0);
if (IS_ERR(rq)) {
error = -ENOMEM;
goto out_free_buf;
}
req = scsi_req(rq);
error = blk_rq_map_kern(q, rq, buf, bufflen, GFP_KERNEL);
if (error)
goto out_put_request;
req->cmd[0] = INQUIRY;
req->cmd[1] = 1;
req->cmd[2] = 0x83;
req->cmd[3] = bufflen >> 8;
req->cmd[4] = bufflen & 0xff;
req->cmd_len = COMMAND_SIZE(INQUIRY);
blk_execute_rq(rq->q, NULL, rq, 1);
if (req->result) {
pr_err("pNFS: INQUIRY 0x83 failed with: %x\n",
req->result);
error = -EIO;
goto out_put_request;
}
len = (buf[2] << 8) + buf[3] + 4;
if (len > bufflen) {
if (len <= maxlen && retries--) {
blk_put_request(rq);
kfree(buf);
bufflen = len;
goto again;
}
pr_err("pNFS: INQUIRY 0x83 response invalid (len = %zd)\n",
len);
goto out_put_request;
}
d = buf + 4;
for (d = buf + 4; d < buf + len; d += id_len + 4) {
id_len = d[3];
type = d[1] & 0xf;
assoc = (d[1] >> 4) & 0x3;
/*
* We only care about a EUI-64 and NAA designator types
* with LU association.
*/
if (assoc != 0x00)
continue;
if (type != 0x02 && type != 0x03)
continue;
if (id_len != 8 && id_len != 12 && id_len != 16)
continue;
b->scsi.code_set = PS_CODE_SET_BINARY;
b->scsi.designator_type = type == 0x02 ?
PS_DESIGNATOR_EUI64 : PS_DESIGNATOR_NAA;
b->scsi.designator_len = id_len;
memcpy(b->scsi.designator, d + 4, id_len);
/*
* If we found a 8 or 12 byte descriptor continue on to
* see if a 16 byte one is available. If we find a
* 16 byte descriptor we're done.
*/
if (id_len == 16)
break;
}
out_put_request:
blk_put_request(rq);
out_free_buf:
kfree(buf);
return error;
}
#define NFSD_MDS_PR_KEY 0x0100000000000000ULL
/*
* We use the client ID as a unique key for the reservations.
* This allows us to easily fence a client when recalls fail.
*/
static u64 nfsd4_scsi_pr_key(struct nfs4_client *clp)
{
return ((u64)clp->cl_clientid.cl_boot << 32) | clp->cl_clientid.cl_id;
}
static int
nfsd4_block_get_device_info_scsi(struct super_block *sb,
struct nfs4_client *clp,
struct nfsd4_getdeviceinfo *gdp)
{
struct pnfs_block_deviceaddr *dev;
struct pnfs_block_volume *b;
const struct pr_ops *ops;
int error;
dev = kzalloc(sizeof(struct pnfs_block_deviceaddr) +
sizeof(struct pnfs_block_volume), GFP_KERNEL);
if (!dev)
return -ENOMEM;
gdp->gd_device = dev;
dev->nr_volumes = 1;
b = &dev->volumes[0];
b->type = PNFS_BLOCK_VOLUME_SCSI;
b->scsi.pr_key = nfsd4_scsi_pr_key(clp);
error = nfsd4_scsi_identify_device(sb->s_bdev, b);
if (error)
return error;
ops = sb->s_bdev->bd_disk->fops->pr_ops;
if (!ops) {
pr_err("pNFS: device %s does not support PRs.\n",
sb->s_id);
return -EINVAL;
}
error = ops->pr_register(sb->s_bdev, 0, NFSD_MDS_PR_KEY, true);
if (error) {
pr_err("pNFS: failed to register key for device %s.\n",
sb->s_id);
return -EINVAL;
}
error = ops->pr_reserve(sb->s_bdev, NFSD_MDS_PR_KEY,
PR_EXCLUSIVE_ACCESS_REG_ONLY, 0);
if (error) {
pr_err("pNFS: failed to reserve device %s.\n",
sb->s_id);
return -EINVAL;
}
return 0;
}
static __be32
nfsd4_scsi_proc_getdeviceinfo(struct super_block *sb,
struct svc_rqst *rqstp,
struct nfs4_client *clp,
struct nfsd4_getdeviceinfo *gdp)
{
if (sb->s_bdev != sb->s_bdev->bd_contains)
return nfserr_inval;
return nfserrno(nfsd4_block_get_device_info_scsi(sb, clp, gdp));
}
static __be32
nfsd4_scsi_proc_layoutcommit(struct inode *inode,
struct nfsd4_layoutcommit *lcp)
{
struct iomap *iomaps;
int nr_iomaps;
nr_iomaps = nfsd4_scsi_decode_layoutupdate(lcp->lc_up_layout,
lcp->lc_up_len, &iomaps, i_blocksize(inode));
if (nr_iomaps < 0)
return nfserrno(nr_iomaps);
return nfsd4_block_commit_blocks(inode, lcp, iomaps, nr_iomaps);
}
static void
nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls)
{
struct nfs4_client *clp = ls->ls_stid.sc_client;
struct block_device *bdev = ls->ls_file->f_path.mnt->mnt_sb->s_bdev;
bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY,
nfsd4_scsi_pr_key(clp), 0, true);
}
const struct nfsd4_layout_ops scsi_layout_ops = {
/*
* Pretend that we send notification to the client. This is a blatant
* lie to force recent Linux clients to cache our device IDs.
* We rarely ever change the device ID, so the harm of leaking deviceids
* for a while isn't too bad. Unfortunately RFC5661 is a complete mess
* in this regard, but I filed errata 4119 for this a while ago, and
* hopefully the Linux client will eventually start caching deviceids
* without this again.
*/
.notify_types =
NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE,
.proc_getdeviceinfo = nfsd4_scsi_proc_getdeviceinfo,
.encode_getdeviceinfo = nfsd4_block_encode_getdeviceinfo,
.proc_layoutget = nfsd4_block_proc_layoutget,
.encode_layoutget = nfsd4_block_encode_layoutget,
.proc_layoutcommit = nfsd4_scsi_proc_layoutcommit,
.fence_client = nfsd4_scsi_fence_client,
};
#endif /* CONFIG_NFSD_SCSILAYOUT */