From 1fcb748d187d0c7732a75a509e924ead6d070e04 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 24 Oct 2018 00:36:12 +0100 Subject: [PATCH 01/26] amd-gpu: Don't undefine READ and WRITE Remove the undefinition of READ and WRITE because these constants may be used elsewhere in subsequently included header files, thus breaking them. These constants don't actually appear to be used in the driver, so the undefinition seems pointless. Fixes: 4562236b3bc0 ("drm/amd/dc: Add dc display driver (v2)") Signed-off-by: David Howells --- drivers/gpu/drm/amd/display/dc/os_types.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/os_types.h b/drivers/gpu/drm/amd/display/dc/os_types.h index a407892905af..c0d9f332baed 100644 --- a/drivers/gpu/drm/amd/display/dc/os_types.h +++ b/drivers/gpu/drm/amd/display/dc/os_types.h @@ -40,8 +40,6 @@ #define LITTLEENDIAN_CPU #endif -#undef READ -#undef WRITE #undef FRAME_SIZE #define dm_output_to_console(fmt, ...) DRM_DEBUG_KMS(fmt, ##__VA_ARGS__) From 00e23707442a75b404392cef1405ab4fd498de6b Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 22 Oct 2018 13:07:28 +0100 Subject: [PATCH 02/26] iov_iter: Use accessor function Use accessor functions to access an iterator's type and direction. This allows for the possibility of using some other method of determining the type of iterator than if-chains with bitwise-AND conditions. Signed-off-by: David Howells --- block/bio.c | 2 +- fs/block_dev.c | 2 +- fs/ceph/file.c | 2 +- fs/cifs/file.c | 4 ++-- fs/cifs/misc.c | 2 +- fs/cifs/smbdirect.c | 17 +++++++++---- fs/direct-io.c | 2 +- fs/fuse/file.c | 2 +- fs/iomap.c | 2 +- include/linux/uio.h | 48 +++++++++++++++++++++++++------------ lib/iov_iter.c | 56 +++++++++++++++++++++---------------------- mm/filemap.c | 2 +- net/9p/trans_virtio.c | 2 +- net/tls/tls_sw.c | 4 ++-- 14 files changed, 87 insertions(+), 60 deletions(-) diff --git a/block/bio.c b/block/bio.c index 0093bed81c0e..c55f36bbe12a 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1255,7 +1255,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q, /* * success */ - if (((iter->type & WRITE) && (!map_data || !map_data->null_mapped)) || + if ((iov_iter_rw(iter) == WRITE && (!map_data || !map_data->null_mapped)) || (map_data && map_data->from_user)) { ret = bio_copy_from_iter(bio, iter); if (ret) diff --git a/fs/block_dev.c b/fs/block_dev.c index 38b8ce05cbc7..a80b4f0ee7c4 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -349,7 +349,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) dio->size = 0; dio->multi_bio = false; - dio->should_dirty = is_read && (iter->type == ITER_IOVEC); + dio->should_dirty = is_read && iter_is_iovec(iter); blk_start_plug(&plug); for (;;) { diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 92ab20433682..524ecc95b04d 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -658,7 +658,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to, if (ret < 0) return ret; - if (unlikely(to->type & ITER_PIPE)) { + if (unlikely(iov_iter_is_pipe(to))) { size_t page_off; ret = iov_iter_get_pages_alloc(to, &pages, len, &page_off); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 8d41ca7bfcf1..dcdbcb6f09f8 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2990,7 +2990,7 @@ cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter) size_t copy = min_t(size_t, remaining, PAGE_SIZE); size_t written; - if (unlikely(iter->type & ITER_PIPE)) { + if (unlikely(iov_iter_is_pipe(iter))) { void *addr = kmap_atomic(page); written = copy_to_iter(addr, copy, iter); @@ -3302,7 +3302,7 @@ ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to) if (!is_sync_kiocb(iocb)) ctx->iocb = iocb; - if (to->type == ITER_IOVEC) + if (iter_is_iovec(to)) ctx->should_dirty = true; rc = setup_aio_ctx_iter(ctx, to, READ); diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 6926685e513c..7b5b960a04b8 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -786,7 +786,7 @@ setup_aio_ctx_iter(struct cifs_aio_ctx *ctx, struct iov_iter *iter, int rw) struct page **pages = NULL; struct bio_vec *bv = NULL; - if (iter->type & ITER_KVEC) { + if (iov_iter_is_kvec(iter)) { memcpy(&ctx->iter, iter, sizeof(struct iov_iter)); ctx->len = count; iov_iter_advance(iter, count); diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c index 5fdb9a509a97..5b05bf255268 100644 --- a/fs/cifs/smbdirect.c +++ b/fs/cifs/smbdirect.c @@ -2054,14 +2054,22 @@ int smbd_recv(struct smbd_connection *info, struct msghdr *msg) info->smbd_recv_pending++; - switch (msg->msg_iter.type) { - case READ | ITER_KVEC: + if (iov_iter_rw(&msg->msg_iter) == WRITE) { + /* It's a bug in upper layer to get there */ + cifs_dbg(VFS, "CIFS: invalid msg iter dir %u\n", + iov_iter_rw(&msg->msg_iter)); + rc = -EINVAL; + goto out; + } + + switch (iov_iter_type(&msg->msg_iter)) { + case ITER_KVEC: buf = msg->msg_iter.kvec->iov_base; to_read = msg->msg_iter.kvec->iov_len; rc = smbd_recv_buf(info, buf, to_read); break; - case READ | ITER_BVEC: + case ITER_BVEC: page = msg->msg_iter.bvec->bv_page; page_offset = msg->msg_iter.bvec->bv_offset; to_read = msg->msg_iter.bvec->bv_len; @@ -2071,10 +2079,11 @@ int smbd_recv(struct smbd_connection *info, struct msghdr *msg) default: /* It's a bug in upper layer to get there */ cifs_dbg(VFS, "CIFS: invalid msg type %d\n", - msg->msg_iter.type); + iov_iter_type(&msg->msg_iter)); rc = -EINVAL; } +out: info->smbd_recv_pending--; wake_up(&info->wait_smbd_recv_pending); diff --git a/fs/direct-io.c b/fs/direct-io.c index 093fb54cd316..722d17c88edb 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -1313,7 +1313,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, spin_lock_init(&dio->bio_lock); dio->refcount = 1; - dio->should_dirty = (iter->type == ITER_IOVEC); + dio->should_dirty = iter_is_iovec(iter) && iov_iter_rw(iter) == READ; sdio.iter = iter; sdio.final_block_in_request = end >> blkbits; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 32d0b883e74f..c9ccd45156dc 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1271,7 +1271,7 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii, ssize_t ret = 0; /* Special case for kernel I/O: can copy directly into the buffer */ - if (ii->type & ITER_KVEC) { + if (iov_iter_is_kvec(ii)) { unsigned long user_addr = fuse_get_user_addr(ii); size_t frag_size = fuse_get_frag_size(ii, *nbytesp); diff --git a/fs/iomap.c b/fs/iomap.c index ec15cf2ec696..2c53400aa802 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -1795,7 +1795,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, if (pos >= dio->i_size) goto out_free_dio; - if (iter->type == ITER_IOVEC) + if (iter_is_iovec(iter) && iov_iter_rw(iter) == READ) dio->flags |= IOMAP_DIO_DIRTY; } else { flags |= IOMAP_WRITE; diff --git a/include/linux/uio.h b/include/linux/uio.h index 422b1c01ee0d..fcabc959c794 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -21,7 +21,7 @@ struct kvec { size_t iov_len; }; -enum { +enum iter_type { ITER_IOVEC = 0, ITER_KVEC = 2, ITER_BVEC = 4, @@ -47,6 +47,36 @@ struct iov_iter { }; }; +static inline enum iter_type iov_iter_type(const struct iov_iter *i) +{ + return i->type & ~(READ | WRITE); +} + +static inline bool iter_is_iovec(const struct iov_iter *i) +{ + return iov_iter_type(i) == ITER_IOVEC; +} + +static inline bool iov_iter_is_kvec(const struct iov_iter *i) +{ + return iov_iter_type(i) == ITER_KVEC; +} + +static inline bool iov_iter_is_bvec(const struct iov_iter *i) +{ + return iov_iter_type(i) == ITER_BVEC; +} + +static inline bool iov_iter_is_pipe(const struct iov_iter *i) +{ + return iov_iter_type(i) == ITER_PIPE; +} + +static inline unsigned char iov_iter_rw(const struct iov_iter *i) +{ + return i->type & (READ | WRITE); +} + /* * Total number of bytes covered by an iovec. * @@ -74,7 +104,8 @@ static inline struct iovec iov_iter_iovec(const struct iov_iter *iter) } #define iov_for_each(iov, iter, start) \ - if (!((start).type & (ITER_BVEC | ITER_PIPE))) \ + if (iov_iter_type(start) == ITER_IOVEC || \ + iov_iter_type(start) == ITER_KVEC) \ for (iter = (start); \ (iter).count && \ ((iov = iov_iter_iovec(&(iter))), 1); \ @@ -202,19 +233,6 @@ static inline size_t iov_iter_count(const struct iov_iter *i) return i->count; } -static inline bool iter_is_iovec(const struct iov_iter *i) -{ - return !(i->type & (ITER_BVEC | ITER_KVEC | ITER_PIPE)); -} - -/* - * Get one of READ or WRITE out of iter->type without any other flags OR'd in - * with it. - * - * The ?: is just for type safety. - */ -#define iov_iter_rw(i) ((0 ? (struct iov_iter *)0 : (i))->type & (READ | WRITE)) - /* * Cap the iov_iter by given limit; note that the second argument is * *not* the new size - it's upper limit for such. Passing it a value diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 8be175df3075..42d39116a556 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -558,7 +558,7 @@ static size_t copy_pipe_to_iter(const void *addr, size_t bytes, size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { const char *from = addr; - if (unlikely(i->type & ITER_PIPE)) + if (unlikely(iov_iter_is_pipe(i))) return copy_pipe_to_iter(addr, bytes, i); if (iter_is_iovec(i)) might_fault(); @@ -658,7 +658,7 @@ size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i) const char *from = addr; unsigned long rem, curr_addr, s_addr = (unsigned long) addr; - if (unlikely(i->type & ITER_PIPE)) + if (unlikely(iov_iter_is_pipe(i))) return copy_pipe_to_iter_mcsafe(addr, bytes, i); if (iter_is_iovec(i)) might_fault(); @@ -692,7 +692,7 @@ EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe); size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) { char *to = addr; - if (unlikely(i->type & ITER_PIPE)) { + if (unlikely(iov_iter_is_pipe(i))) { WARN_ON(1); return 0; } @@ -712,7 +712,7 @@ EXPORT_SYMBOL(_copy_from_iter); bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i) { char *to = addr; - if (unlikely(i->type & ITER_PIPE)) { + if (unlikely(iov_iter_is_pipe(i))) { WARN_ON(1); return false; } @@ -739,7 +739,7 @@ EXPORT_SYMBOL(_copy_from_iter_full); size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) { char *to = addr; - if (unlikely(i->type & ITER_PIPE)) { + if (unlikely(iov_iter_is_pipe(i))) { WARN_ON(1); return 0; } @@ -773,7 +773,7 @@ EXPORT_SYMBOL(_copy_from_iter_nocache); size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) { char *to = addr; - if (unlikely(i->type & ITER_PIPE)) { + if (unlikely(iov_iter_is_pipe(i))) { WARN_ON(1); return 0; } @@ -794,7 +794,7 @@ EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache); bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i) { char *to = addr; - if (unlikely(i->type & ITER_PIPE)) { + if (unlikely(iov_iter_is_pipe(i))) { WARN_ON(1); return false; } @@ -836,7 +836,7 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, size_t wanted = copy_to_iter(kaddr + offset, bytes, i); kunmap_atomic(kaddr); return wanted; - } else if (likely(!(i->type & ITER_PIPE))) + } else if (likely(!iov_iter_is_pipe(i))) return copy_page_to_iter_iovec(page, offset, bytes, i); else return copy_page_to_iter_pipe(page, offset, bytes, i); @@ -848,7 +848,7 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, { if (unlikely(!page_copy_sane(page, offset, bytes))) return 0; - if (unlikely(i->type & ITER_PIPE)) { + if (unlikely(iov_iter_is_pipe(i))) { WARN_ON(1); return 0; } @@ -888,7 +888,7 @@ static size_t pipe_zero(size_t bytes, struct iov_iter *i) size_t iov_iter_zero(size_t bytes, struct iov_iter *i) { - if (unlikely(i->type & ITER_PIPE)) + if (unlikely(iov_iter_is_pipe(i))) return pipe_zero(bytes, i); iterate_and_advance(i, bytes, v, clear_user(v.iov_base, v.iov_len), @@ -908,7 +908,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page, kunmap_atomic(kaddr); return 0; } - if (unlikely(i->type & ITER_PIPE)) { + if (unlikely(iov_iter_is_pipe(i))) { kunmap_atomic(kaddr); WARN_ON(1); return 0; @@ -972,7 +972,7 @@ static void pipe_advance(struct iov_iter *i, size_t size) void iov_iter_advance(struct iov_iter *i, size_t size) { - if (unlikely(i->type & ITER_PIPE)) { + if (unlikely(iov_iter_is_pipe(i))) { pipe_advance(i, size); return; } @@ -987,7 +987,7 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll) if (WARN_ON(unroll > MAX_RW_COUNT)) return; i->count += unroll; - if (unlikely(i->type & ITER_PIPE)) { + if (unlikely(iov_iter_is_pipe(i))) { struct pipe_inode_info *pipe = i->pipe; int idx = i->idx; size_t off = i->iov_offset; @@ -1016,7 +1016,7 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll) return; } unroll -= i->iov_offset; - if (i->type & ITER_BVEC) { + if (iov_iter_is_bvec(i)) { const struct bio_vec *bvec = i->bvec; while (1) { size_t n = (--bvec)->bv_len; @@ -1049,11 +1049,11 @@ EXPORT_SYMBOL(iov_iter_revert); */ size_t iov_iter_single_seg_count(const struct iov_iter *i) { - if (unlikely(i->type & ITER_PIPE)) + if (unlikely(iov_iter_is_pipe(i))) return i->count; // it is a silly place, anyway if (i->nr_segs == 1) return i->count; - else if (i->type & ITER_BVEC) + else if (iov_iter_is_bvec(i)) return min(i->count, i->bvec->bv_len - i->iov_offset); else return min(i->count, i->iov->iov_len - i->iov_offset); @@ -1106,7 +1106,7 @@ unsigned long iov_iter_alignment(const struct iov_iter *i) unsigned long res = 0; size_t size = i->count; - if (unlikely(i->type & ITER_PIPE)) { + if (unlikely(iov_iter_is_pipe(i))) { if (size && i->iov_offset && allocated(&i->pipe->bufs[i->idx])) return size | i->iov_offset; return size; @@ -1125,7 +1125,7 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i) unsigned long res = 0; size_t size = i->count; - if (unlikely(i->type & ITER_PIPE)) { + if (unlikely(iov_iter_is_pipe(i))) { WARN_ON(1); return ~0U; } @@ -1193,7 +1193,7 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, if (maxsize > i->count) maxsize = i->count; - if (unlikely(i->type & ITER_PIPE)) + if (unlikely(iov_iter_is_pipe(i))) return pipe_get_pages(i, pages, maxsize, maxpages, start); iterate_all_kinds(i, maxsize, v, ({ unsigned long addr = (unsigned long)v.iov_base; @@ -1205,7 +1205,7 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, len = maxpages * PAGE_SIZE; addr &= ~(PAGE_SIZE - 1); n = DIV_ROUND_UP(len, PAGE_SIZE); - res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages); + res = get_user_pages_fast(addr, n, iov_iter_rw(i) != WRITE, pages); if (unlikely(res < 0)) return res; return (res == n ? len : res * PAGE_SIZE) - *start; @@ -1270,7 +1270,7 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, if (maxsize > i->count) maxsize = i->count; - if (unlikely(i->type & ITER_PIPE)) + if (unlikely(iov_iter_is_pipe(i))) return pipe_get_pages_alloc(i, pages, maxsize, start); iterate_all_kinds(i, maxsize, v, ({ unsigned long addr = (unsigned long)v.iov_base; @@ -1283,7 +1283,7 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, p = get_pages_array(n); if (!p) return -ENOMEM; - res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, p); + res = get_user_pages_fast(addr, n, iov_iter_rw(i) != WRITE, p); if (unlikely(res < 0)) { kvfree(p); return res; @@ -1313,7 +1313,7 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, __wsum sum, next; size_t off = 0; sum = *csum; - if (unlikely(i->type & ITER_PIPE)) { + if (unlikely(iov_iter_is_pipe(i))) { WARN_ON(1); return 0; } @@ -1355,7 +1355,7 @@ bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, __wsum sum, next; size_t off = 0; sum = *csum; - if (unlikely(i->type & ITER_PIPE)) { + if (unlikely(iov_iter_is_pipe(i))) { WARN_ON(1); return false; } @@ -1400,7 +1400,7 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum, __wsum sum, next; size_t off = 0; sum = *csum; - if (unlikely(i->type & ITER_PIPE)) { + if (unlikely(iov_iter_is_pipe(i))) { WARN_ON(1); /* for now */ return 0; } @@ -1443,7 +1443,7 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages) if (!size) return 0; - if (unlikely(i->type & ITER_PIPE)) { + if (unlikely(iov_iter_is_pipe(i))) { struct pipe_inode_info *pipe = i->pipe; size_t off; int idx; @@ -1481,11 +1481,11 @@ EXPORT_SYMBOL(iov_iter_npages); const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) { *new = *old; - if (unlikely(new->type & ITER_PIPE)) { + if (unlikely(iov_iter_is_pipe(new))) { WARN_ON(1); return NULL; } - if (new->type & ITER_BVEC) + if (iov_iter_is_bvec(new)) return new->bvec = kmemdup(new->bvec, new->nr_segs * sizeof(struct bio_vec), flags); diff --git a/mm/filemap.c b/mm/filemap.c index 52517f28e6f4..bdeee0168ea7 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2122,7 +2122,7 @@ find_page: !mapping->a_ops->is_partially_uptodate) goto page_not_up_to_date; /* pipes can't handle partially uptodate pages */ - if (unlikely(iter->type & ITER_PIPE)) + if (unlikely(iov_iter_is_pipe(iter))) goto page_not_up_to_date; if (!trylock_page(page)) goto page_not_up_to_date; diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 7728b0acde09..4d7d2070e9c8 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -322,7 +322,7 @@ static int p9_get_mapped_pages(struct virtio_chan *chan, if (!iov_iter_count(data)) return 0; - if (!(data->type & ITER_KVEC)) { + if (iov_iter_is_kvec(data)) { int n; /* * We allow only p9_max_pages pinned. We wait for the diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index a525fc4c2a4b..ad64b9c8b600 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -799,7 +799,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) struct crypto_tfm *tfm = crypto_aead_tfm(ctx->aead_send); bool async_capable = tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC; unsigned char record_type = TLS_RECORD_TYPE_DATA; - bool is_kvec = msg->msg_iter.type & ITER_KVEC; + bool is_kvec = iov_iter_is_kvec(&msg->msg_iter); bool eor = !(msg->msg_flags & MSG_MORE); size_t try_to_copy, copied = 0; struct sk_msg *msg_pl, *msg_en; @@ -1457,7 +1457,7 @@ int tls_sw_recvmsg(struct sock *sk, bool cmsg = false; int target, err = 0; long timeo; - bool is_kvec = msg->msg_iter.type & ITER_KVEC; + bool is_kvec = iov_iter_is_kvec(&msg->msg_iter); int num_async = 0; flags |= nonblock; From aa563d7bca6e882ec2bdae24603c8f016401a144 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 20 Oct 2018 00:57:56 +0100 Subject: [PATCH 03/26] iov_iter: Separate type from direction and use accessor functions In the iov_iter struct, separate the iterator type from the iterator direction and use accessor functions to access them in most places. Convert a bunch of places to use switch-statements to access them rather then chains of bitwise-AND statements. This makes it easier to add further iterator types. Also, this can be more efficient as to implement a switch of small contiguous integers, the compiler can use ~50% fewer compare instructions than it has to use bitwise-and instructions. Further, cease passing the iterator type into the iterator setup function. The iterator function can set that itself. Only the direction is required. Signed-off-by: David Howells --- drivers/block/drbd/drbd_main.c | 2 +- drivers/block/drbd/drbd_receiver.c | 2 +- drivers/block/loop.c | 9 ++++---- drivers/block/nbd.c | 12 +++++----- drivers/fsi/fsi-sbefifo.c | 4 ++-- drivers/isdn/mISDN/l1oip_core.c | 3 +-- drivers/misc/vmw_vmci/vmci_queue_pair.c | 6 ++--- drivers/nvme/target/io-cmd-file.c | 2 +- drivers/target/iscsi/iscsi_target_util.c | 6 ++--- drivers/target/target_core_file.c | 6 ++--- drivers/usb/usbip/usbip_common.c | 2 +- drivers/xen/pvcalls-back.c | 8 +++---- fs/9p/vfs_addr.c | 4 ++-- fs/9p/vfs_dir.c | 2 +- fs/9p/xattr.c | 4 ++-- fs/afs/rxrpc.c | 15 ++++++------- fs/ceph/file.c | 5 ++--- fs/cifs/connect.c | 4 ++-- fs/cifs/misc.c | 2 +- fs/cifs/smb2ops.c | 4 ++-- fs/cifs/transport.c | 8 +++---- fs/dlm/lowcomms.c | 2 +- fs/nfsd/vfs.c | 4 ++-- fs/ocfs2/cluster/tcp.c | 2 +- fs/orangefs/inode.c | 2 +- fs/splice.c | 7 +++--- include/linux/uio.h | 10 ++++----- lib/iov_iter.c | 28 +++++++++++++----------- mm/page_io.c | 2 +- net/9p/client.c | 2 +- net/bluetooth/6lowpan.c | 2 +- net/bluetooth/a2mp.c | 2 +- net/bluetooth/smp.c | 2 +- net/ceph/messenger.c | 6 ++--- net/netfilter/ipvs/ip_vs_sync.c | 2 +- net/smc/smc_clc.c | 4 ++-- net/socket.c | 6 ++--- net/sunrpc/svcsock.c | 2 +- net/tipc/topsrv.c | 2 +- net/tls/tls_device.c | 4 ++-- 40 files changed, 96 insertions(+), 105 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index ef8212a4b73e..ded9735d44af 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1856,7 +1856,7 @@ int drbd_send(struct drbd_connection *connection, struct socket *sock, /* THINK if (signal_pending) return ... ? */ - iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iov, 1, size); + iov_iter_kvec(&msg.msg_iter, WRITE, &iov, 1, size); if (sock == connection->data.socket) { rcu_read_lock(); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 75f6b47169e6..fcc70642b004 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -516,7 +516,7 @@ static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flag struct msghdr msg = { .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL) }; - iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, size); + iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, size); return sock_recvmsg(sock, &msg, msg.msg_flags); } diff --git a/drivers/block/loop.c b/drivers/block/loop.c index ea9debf59b22..cb0cc8685076 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -268,7 +268,7 @@ static int lo_write_bvec(struct file *file, struct bio_vec *bvec, loff_t *ppos) struct iov_iter i; ssize_t bw; - iov_iter_bvec(&i, ITER_BVEC | WRITE, bvec, 1, bvec->bv_len); + iov_iter_bvec(&i, WRITE, bvec, 1, bvec->bv_len); file_start_write(file); bw = vfs_iter_write(file, &i, ppos, 0); @@ -346,7 +346,7 @@ static int lo_read_simple(struct loop_device *lo, struct request *rq, ssize_t len; rq_for_each_segment(bvec, rq, iter) { - iov_iter_bvec(&i, ITER_BVEC, &bvec, 1, bvec.bv_len); + iov_iter_bvec(&i, READ, &bvec, 1, bvec.bv_len); len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0); if (len < 0) return len; @@ -387,7 +387,7 @@ static int lo_read_transfer(struct loop_device *lo, struct request *rq, b.bv_offset = 0; b.bv_len = bvec.bv_len; - iov_iter_bvec(&i, ITER_BVEC, &b, 1, b.bv_len); + iov_iter_bvec(&i, READ, &b, 1, b.bv_len); len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0); if (len < 0) { ret = len; @@ -554,8 +554,7 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, } atomic_set(&cmd->ref, 2); - iov_iter_bvec(&iter, ITER_BVEC | rw, bvec, - segments, blk_rq_bytes(rq)); + iov_iter_bvec(&iter, rw, bvec, segments, blk_rq_bytes(rq)); iter.iov_offset = offset; cmd->iocb.ki_pos = pos; diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 14a51254c3db..4d4d6129ff66 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -473,7 +473,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) u32 nbd_cmd_flags = 0; int sent = nsock->sent, skip = 0; - iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request)); + iov_iter_kvec(&from, WRITE, &iov, 1, sizeof(request)); switch (req_op(req)) { case REQ_OP_DISCARD: @@ -564,8 +564,7 @@ send_pages: dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n", req, bvec.bv_len); - iov_iter_bvec(&from, ITER_BVEC | WRITE, - &bvec, 1, bvec.bv_len); + iov_iter_bvec(&from, WRITE, &bvec, 1, bvec.bv_len); if (skip) { if (skip >= iov_iter_count(&from)) { skip -= iov_iter_count(&from); @@ -624,7 +623,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) int ret = 0; reply.magic = 0; - iov_iter_kvec(&to, READ | ITER_KVEC, &iov, 1, sizeof(reply)); + iov_iter_kvec(&to, READ, &iov, 1, sizeof(reply)); result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL); if (result <= 0) { if (!nbd_disconnected(config)) @@ -678,8 +677,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) struct bio_vec bvec; rq_for_each_segment(bvec, req, iter) { - iov_iter_bvec(&to, ITER_BVEC | READ, - &bvec, 1, bvec.bv_len); + iov_iter_bvec(&to, READ, &bvec, 1, bvec.bv_len); result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL); if (result <= 0) { dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n", @@ -1073,7 +1071,7 @@ static void send_disconnects(struct nbd_device *nbd) for (i = 0; i < config->num_connections; i++) { struct nbd_sock *nsock = config->socks[i]; - iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request)); + iov_iter_kvec(&from, WRITE, &iov, 1, sizeof(request)); mutex_lock(&nsock->tx_lock); ret = sock_xmit(nbd, i, 1, &from, 0, NULL); if (ret <= 0) diff --git a/drivers/fsi/fsi-sbefifo.c b/drivers/fsi/fsi-sbefifo.c index ae861342626e..d92f5b87c251 100644 --- a/drivers/fsi/fsi-sbefifo.c +++ b/drivers/fsi/fsi-sbefifo.c @@ -638,7 +638,7 @@ static void sbefifo_collect_async_ffdc(struct sbefifo *sbefifo) } ffdc_iov.iov_base = ffdc; ffdc_iov.iov_len = SBEFIFO_MAX_FFDC_SIZE; - iov_iter_kvec(&ffdc_iter, WRITE | ITER_KVEC, &ffdc_iov, 1, SBEFIFO_MAX_FFDC_SIZE); + iov_iter_kvec(&ffdc_iter, WRITE, &ffdc_iov, 1, SBEFIFO_MAX_FFDC_SIZE); cmd[0] = cpu_to_be32(2); cmd[1] = cpu_to_be32(SBEFIFO_CMD_GET_SBE_FFDC); rc = sbefifo_do_command(sbefifo, cmd, 2, &ffdc_iter); @@ -735,7 +735,7 @@ int sbefifo_submit(struct device *dev, const __be32 *command, size_t cmd_len, rbytes = (*resp_len) * sizeof(__be32); resp_iov.iov_base = response; resp_iov.iov_len = rbytes; - iov_iter_kvec(&resp_iter, WRITE | ITER_KVEC, &resp_iov, 1, rbytes); + iov_iter_kvec(&resp_iter, WRITE, &resp_iov, 1, rbytes); /* Perform the command */ mutex_lock(&sbefifo->lock); diff --git a/drivers/isdn/mISDN/l1oip_core.c b/drivers/isdn/mISDN/l1oip_core.c index b05022f94f18..072bb5e36c18 100644 --- a/drivers/isdn/mISDN/l1oip_core.c +++ b/drivers/isdn/mISDN/l1oip_core.c @@ -718,8 +718,7 @@ l1oip_socket_thread(void *data) printk(KERN_DEBUG "%s: socket created and open\n", __func__); while (!signal_pending(current)) { - iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, - recvbuf_size); + iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, recvbuf_size); recvlen = sock_recvmsg(socket, &msg, 0); if (recvlen > 0) { l1oip_socket_parse(hc, &sin_rx, recvbuf, recvlen); diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c b/drivers/misc/vmw_vmci/vmci_queue_pair.c index bd52f29b4a4e..264f4ed8eef2 100644 --- a/drivers/misc/vmw_vmci/vmci_queue_pair.c +++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c @@ -3030,7 +3030,7 @@ ssize_t vmci_qpair_enqueue(struct vmci_qp *qpair, if (!qpair || !buf) return VMCI_ERROR_INVALID_ARGS; - iov_iter_kvec(&from, WRITE | ITER_KVEC, &v, 1, buf_size); + iov_iter_kvec(&from, WRITE, &v, 1, buf_size); qp_lock(qpair); @@ -3074,7 +3074,7 @@ ssize_t vmci_qpair_dequeue(struct vmci_qp *qpair, if (!qpair || !buf) return VMCI_ERROR_INVALID_ARGS; - iov_iter_kvec(&to, READ | ITER_KVEC, &v, 1, buf_size); + iov_iter_kvec(&to, READ, &v, 1, buf_size); qp_lock(qpair); @@ -3119,7 +3119,7 @@ ssize_t vmci_qpair_peek(struct vmci_qp *qpair, if (!qpair || !buf) return VMCI_ERROR_INVALID_ARGS; - iov_iter_kvec(&to, READ | ITER_KVEC, &v, 1, buf_size); + iov_iter_kvec(&to, READ, &v, 1, buf_size); qp_lock(qpair); diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index 81a9dc5290a8..97f282eca3c2 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -101,7 +101,7 @@ static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos, rw = READ; } - iov_iter_bvec(&iter, ITER_BVEC | rw, req->f.bvec, nr_segs, count); + iov_iter_bvec(&iter, rw, req->f.bvec, nr_segs, count); iocb->ki_pos = pos; iocb->ki_filp = req->ns->file; diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c index 49be1e41290c..991482576417 100644 --- a/drivers/target/iscsi/iscsi_target_util.c +++ b/drivers/target/iscsi/iscsi_target_util.c @@ -1258,8 +1258,7 @@ static int iscsit_do_rx_data( return -1; memset(&msg, 0, sizeof(struct msghdr)); - iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, - count->iov, count->iov_count, data); + iov_iter_kvec(&msg.msg_iter, READ, count->iov, count->iov_count, data); while (msg_data_left(&msg)) { rx_loop = sock_recvmsg(conn->sock, &msg, MSG_WAITALL); @@ -1315,8 +1314,7 @@ int tx_data( memset(&msg, 0, sizeof(struct msghdr)); - iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, - iov, iov_count, data); + iov_iter_kvec(&msg.msg_iter, WRITE, iov, iov_count, data); while (msg_data_left(&msg)) { int tx_loop = sock_sendmsg(conn->sock, &msg); diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index 16751ae55d7b..49b110d1b972 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -303,7 +303,7 @@ fd_execute_rw_aio(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, len += sg->length; } - iov_iter_bvec(&iter, ITER_BVEC | is_write, bvec, sgl_nents, len); + iov_iter_bvec(&iter, is_write, bvec, sgl_nents, len); aio_cmd->cmd = cmd; aio_cmd->len = len; @@ -353,7 +353,7 @@ static int fd_do_rw(struct se_cmd *cmd, struct file *fd, len += sg->length; } - iov_iter_bvec(&iter, ITER_BVEC, bvec, sgl_nents, len); + iov_iter_bvec(&iter, READ, bvec, sgl_nents, len); if (is_write) ret = vfs_iter_write(fd, &iter, &pos, 0); else @@ -490,7 +490,7 @@ fd_execute_write_same(struct se_cmd *cmd) len += se_dev->dev_attrib.block_size; } - iov_iter_bvec(&iter, ITER_BVEC, bvec, nolb, len); + iov_iter_bvec(&iter, READ, bvec, nolb, len); ret = vfs_iter_write(fd_dev->fd_file, &iter, &pos, 0); kfree(bvec); diff --git a/drivers/usb/usbip/usbip_common.c b/drivers/usb/usbip/usbip_common.c index 9756752c0681..45da3e01c7b0 100644 --- a/drivers/usb/usbip/usbip_common.c +++ b/drivers/usb/usbip/usbip_common.c @@ -309,7 +309,7 @@ int usbip_recv(struct socket *sock, void *buf, int size) if (!sock || !buf || !size) return -EINVAL; - iov_iter_kvec(&msg.msg_iter, READ|ITER_KVEC, &iov, 1, size); + iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, size); usbip_dbg_xmit("enter\n"); diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c index b1092fbefa63..2e5d845b5091 100644 --- a/drivers/xen/pvcalls-back.c +++ b/drivers/xen/pvcalls-back.c @@ -137,13 +137,13 @@ static void pvcalls_conn_back_read(void *opaque) if (masked_prod < masked_cons) { vec[0].iov_base = data->in + masked_prod; vec[0].iov_len = wanted; - iov_iter_kvec(&msg.msg_iter, ITER_KVEC|WRITE, vec, 1, wanted); + iov_iter_kvec(&msg.msg_iter, WRITE, vec, 1, wanted); } else { vec[0].iov_base = data->in + masked_prod; vec[0].iov_len = array_size - masked_prod; vec[1].iov_base = data->in; vec[1].iov_len = wanted - vec[0].iov_len; - iov_iter_kvec(&msg.msg_iter, ITER_KVEC|WRITE, vec, 2, wanted); + iov_iter_kvec(&msg.msg_iter, WRITE, vec, 2, wanted); } atomic_set(&map->read, 0); @@ -195,13 +195,13 @@ static void pvcalls_conn_back_write(struct sock_mapping *map) if (pvcalls_mask(prod, array_size) > pvcalls_mask(cons, array_size)) { vec[0].iov_base = data->out + pvcalls_mask(cons, array_size); vec[0].iov_len = size; - iov_iter_kvec(&msg.msg_iter, ITER_KVEC|READ, vec, 1, size); + iov_iter_kvec(&msg.msg_iter, READ, vec, 1, size); } else { vec[0].iov_base = data->out + pvcalls_mask(cons, array_size); vec[0].iov_len = array_size - pvcalls_mask(cons, array_size); vec[1].iov_base = data->out; vec[1].iov_len = size - vec[0].iov_len; - iov_iter_kvec(&msg.msg_iter, ITER_KVEC|READ, vec, 2, size); + iov_iter_kvec(&msg.msg_iter, READ, vec, 2, size); } atomic_set(&map->write, 0); diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index e1cbdfdb7c68..0bcbcc20f769 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -65,7 +65,7 @@ static int v9fs_fid_readpage(struct p9_fid *fid, struct page *page) if (retval == 0) return retval; - iov_iter_bvec(&to, ITER_BVEC | READ, &bvec, 1, PAGE_SIZE); + iov_iter_bvec(&to, READ, &bvec, 1, PAGE_SIZE); retval = p9_client_read(fid, page_offset(page), &to, &err); if (err) { @@ -175,7 +175,7 @@ static int v9fs_vfs_writepage_locked(struct page *page) bvec.bv_page = page; bvec.bv_offset = 0; bvec.bv_len = len; - iov_iter_bvec(&from, ITER_BVEC | WRITE, &bvec, 1, len); + iov_iter_bvec(&from, WRITE, &bvec, 1, len); /* We should have writeback_fid always set */ BUG_ON(!v9inode->writeback_fid); diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index b0405d6aac85..d5db3c968a03 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c @@ -133,7 +133,7 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx) if (rdir->tail == rdir->head) { struct iov_iter to; int n; - iov_iter_kvec(&to, READ | ITER_KVEC, &kvec, 1, buflen); + iov_iter_kvec(&to, READ, &kvec, 1, buflen); n = p9_client_read(file->private_data, ctx->pos, &to, &err); if (err) diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c index 352abc39e891..ac8ff8ca4c11 100644 --- a/fs/9p/xattr.c +++ b/fs/9p/xattr.c @@ -32,7 +32,7 @@ ssize_t v9fs_fid_xattr_get(struct p9_fid *fid, const char *name, struct iov_iter to; int err; - iov_iter_kvec(&to, READ | ITER_KVEC, &kvec, 1, buffer_size); + iov_iter_kvec(&to, READ, &kvec, 1, buffer_size); attr_fid = p9_client_xattrwalk(fid, name, &attr_size); if (IS_ERR(attr_fid)) { @@ -107,7 +107,7 @@ int v9fs_fid_xattr_set(struct p9_fid *fid, const char *name, struct iov_iter from; int retval, err; - iov_iter_kvec(&from, WRITE | ITER_KVEC, &kvec, 1, value_len); + iov_iter_kvec(&from, WRITE, &kvec, 1, value_len); p9_debug(P9_DEBUG_VFS, "name = %s value_len = %zu flags = %d\n", name, value_len, flags); diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 77a83790a31f..639c16882e93 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -286,7 +286,7 @@ static void afs_load_bvec(struct afs_call *call, struct msghdr *msg, offset = 0; } - iov_iter_bvec(&msg->msg_iter, WRITE | ITER_BVEC, bv, nr, bytes); + iov_iter_bvec(&msg->msg_iter, WRITE, bv, nr, bytes); } /* @@ -401,8 +401,7 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, msg.msg_name = NULL; msg.msg_namelen = 0; - iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iov, 1, - call->request_size); + iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, call->request_size); msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = MSG_WAITALL | (call->send_pages ? MSG_MORE : 0); @@ -432,7 +431,7 @@ error_do_abort: rxrpc_kernel_abort_call(call->net->socket, rxcall, RX_USER_ABORT, ret, "KSD"); } else { - iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, NULL, 0, 0); + iov_iter_kvec(&msg.msg_iter, READ, NULL, 0, 0); rxrpc_kernel_recv_data(call->net->socket, rxcall, &msg.msg_iter, false, &call->abort_code, &call->service_id); @@ -468,7 +467,7 @@ static void afs_deliver_to_call(struct afs_call *call) if (state == AFS_CALL_SV_AWAIT_ACK) { struct iov_iter iter; - iov_iter_kvec(&iter, READ | ITER_KVEC, NULL, 0, 0); + iov_iter_kvec(&iter, READ, NULL, 0, 0); ret = rxrpc_kernel_recv_data(call->net->socket, call->rxcall, &iter, false, &remote_abort, @@ -825,7 +824,7 @@ void afs_send_empty_reply(struct afs_call *call) msg.msg_name = NULL; msg.msg_namelen = 0; - iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, NULL, 0, 0); + iov_iter_kvec(&msg.msg_iter, WRITE, NULL, 0, 0); msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; @@ -864,7 +863,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) iov[0].iov_len = len; msg.msg_name = NULL; msg.msg_namelen = 0; - iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iov, 1, len); + iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, len); msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; @@ -905,7 +904,7 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count, iov.iov_base = buf + call->offset; iov.iov_len = count - call->offset; - iov_iter_kvec(&iter, ITER_KVEC | READ, &iov, 1, count - call->offset); + iov_iter_kvec(&iter, READ, &iov, 1, count - call->offset); ret = rxrpc_kernel_recv_data(net->socket, call->rxcall, &iter, want_more, &remote_abort, diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 524ecc95b04d..5dd433aa9b23 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -821,7 +821,7 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req) aio_req->total_len = rc + zlen; } - iov_iter_bvec(&i, ITER_BVEC, osd_data->bvec_pos.bvecs, + iov_iter_bvec(&i, READ, osd_data->bvec_pos.bvecs, osd_data->num_bvecs, osd_data->bvec_pos.iter.bi_size); iov_iter_advance(&i, rc); @@ -1044,8 +1044,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, int zlen = min_t(size_t, len - ret, size - pos - ret); - iov_iter_bvec(&i, ITER_BVEC, bvecs, num_pages, - len); + iov_iter_bvec(&i, READ, bvecs, num_pages, len); iov_iter_advance(&i, ret); iov_iter_zero(zlen, &i); ret += zlen; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 52d71b64c0c6..11bcd2fb90b1 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -588,7 +588,7 @@ cifs_read_from_socket(struct TCP_Server_Info *server, char *buf, { struct msghdr smb_msg; struct kvec iov = {.iov_base = buf, .iov_len = to_read}; - iov_iter_kvec(&smb_msg.msg_iter, READ | ITER_KVEC, &iov, 1, to_read); + iov_iter_kvec(&smb_msg.msg_iter, READ, &iov, 1, to_read); return cifs_readv_from_socket(server, &smb_msg); } @@ -600,7 +600,7 @@ cifs_read_page_from_socket(struct TCP_Server_Info *server, struct page *page, struct msghdr smb_msg; struct bio_vec bv = { .bv_page = page, .bv_len = to_read, .bv_offset = page_offset}; - iov_iter_bvec(&smb_msg.msg_iter, READ | ITER_BVEC, &bv, 1, to_read); + iov_iter_bvec(&smb_msg.msg_iter, READ, &bv, 1, to_read); return cifs_readv_from_socket(server, &smb_msg); } diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 7b5b960a04b8..10cff44832d8 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -857,7 +857,7 @@ setup_aio_ctx_iter(struct cifs_aio_ctx *ctx, struct iov_iter *iter, int rw) ctx->bv = bv; ctx->len = saved_len - count; ctx->npages = npages; - iov_iter_bvec(&ctx->iter, ITER_BVEC | rw, ctx->bv, npages, ctx->len); + iov_iter_bvec(&ctx->iter, rw, ctx->bv, npages, ctx->len); return 0; } diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 89985a0a6819..26f8a65b8722 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -2962,13 +2962,13 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid, return 0; } - iov_iter_bvec(&iter, WRITE | ITER_BVEC, bvec, npages, data_len); + iov_iter_bvec(&iter, WRITE, bvec, npages, data_len); } else if (buf_len >= data_offset + data_len) { /* read response payload is in buf */ WARN_ONCE(npages > 0, "read data can be either in buf or in pages"); iov.iov_base = buf + data_offset; iov.iov_len = data_len; - iov_iter_kvec(&iter, WRITE | ITER_KVEC, &iov, 1, data_len); + iov_iter_kvec(&iter, WRITE, &iov, 1, data_len); } else { /* read response payload cannot be in both buf and pages */ WARN_ONCE(1, "buf can not contain only a part of read data"); diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index b48f43963da6..146b618802a5 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -307,8 +307,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, .iov_base = &rfc1002_marker, .iov_len = 4 }; - iov_iter_kvec(&smb_msg.msg_iter, WRITE | ITER_KVEC, &hiov, - 1, 4); + iov_iter_kvec(&smb_msg.msg_iter, WRITE, &hiov, 1, 4); rc = smb_send_kvec(server, &smb_msg, &sent); if (rc < 0) goto uncork; @@ -329,8 +328,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, size += iov[i].iov_len; } - iov_iter_kvec(&smb_msg.msg_iter, WRITE | ITER_KVEC, - iov, n_vec, size); + iov_iter_kvec(&smb_msg.msg_iter, WRITE, iov, n_vec, size); rc = smb_send_kvec(server, &smb_msg, &sent); if (rc < 0) @@ -346,7 +344,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, rqst_page_get_length(&rqst[j], i, &bvec.bv_len, &bvec.bv_offset); - iov_iter_bvec(&smb_msg.msg_iter, WRITE | ITER_BVEC, + iov_iter_bvec(&smb_msg.msg_iter, WRITE, &bvec, 1, bvec.bv_len); rc = smb_send_kvec(server, &smb_msg, &sent); if (rc < 0) diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index a5e4a221435c..76976d6e50f9 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -674,7 +674,7 @@ static int receive_from_sock(struct connection *con) nvec = 2; } len = iov[0].iov_len + iov[1].iov_len; - iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, iov, nvec, len); + iov_iter_kvec(&msg.msg_iter, READ, iov, nvec, len); r = ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT | MSG_NOSIGNAL); if (ret <= 0) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index b53e76391e52..8b90f5480904 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -923,7 +923,7 @@ __be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp, int host_err; trace_nfsd_read_vector(rqstp, fhp, offset, *count); - iov_iter_kvec(&iter, READ | ITER_KVEC, vec, vlen, *count); + iov_iter_kvec(&iter, READ, vec, vlen, *count); host_err = vfs_iter_read(file, &iter, &offset, 0); return nfsd_finish_read(rqstp, fhp, file, offset, count, host_err); } @@ -999,7 +999,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, if (stable && !use_wgather) flags |= RWF_SYNC; - iov_iter_kvec(&iter, WRITE | ITER_KVEC, vec, vlen, *cnt); + iov_iter_kvec(&iter, WRITE, vec, vlen, *cnt); host_err = vfs_iter_write(file, &iter, &pos, flags); if (host_err < 0) goto out_nfserr; diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 7d9eea7d4a87..e9f236af1927 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -916,7 +916,7 @@ static int o2net_recv_tcp_msg(struct socket *sock, void *data, size_t len) { struct kvec vec = { .iov_len = len, .iov_base = data, }; struct msghdr msg = { .msg_flags = MSG_DONTWAIT, }; - iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1, len); + iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, len); return sock_recvmsg(sock, &msg, MSG_DONTWAIT); } diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index 31932879b716..136a8bdc1d91 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -25,7 +25,7 @@ static int read_one_page(struct page *page) struct iov_iter to; struct bio_vec bv = {.bv_page = page, .bv_len = PAGE_SIZE}; - iov_iter_bvec(&to, ITER_BVEC | READ, &bv, 1, PAGE_SIZE); + iov_iter_bvec(&to, READ, &bv, 1, PAGE_SIZE); gossip_debug(GOSSIP_INODE_DEBUG, "orangefs_readpage called with page %p\n", diff --git a/fs/splice.c b/fs/splice.c index b3daa971f597..3553f1956508 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -301,7 +301,7 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, struct kiocb kiocb; int idx, ret; - iov_iter_pipe(&to, ITER_PIPE | READ, pipe, len); + iov_iter_pipe(&to, READ, pipe, len); idx = to.idx; init_sync_kiocb(&kiocb, in); kiocb.ki_pos = *ppos; @@ -386,7 +386,7 @@ static ssize_t default_file_splice_read(struct file *in, loff_t *ppos, */ offset = *ppos & ~PAGE_MASK; - iov_iter_pipe(&to, ITER_PIPE | READ, pipe, len + offset); + iov_iter_pipe(&to, READ, pipe, len + offset); res = iov_iter_get_pages_alloc(&to, &pages, len + offset, &base); if (res <= 0) @@ -745,8 +745,7 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out, left -= this_len; } - iov_iter_bvec(&from, ITER_BVEC | WRITE, array, n, - sd.total_len - left); + iov_iter_bvec(&from, WRITE, array, n, sd.total_len - left); ret = vfs_iter_write(out, &from, &sd.pos, 0); if (ret <= 0) break; diff --git a/include/linux/uio.h b/include/linux/uio.h index fcabc959c794..3d8f1acc142c 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -29,7 +29,7 @@ enum iter_type { }; struct iov_iter { - int type; + unsigned int type; size_t iov_offset; size_t count; union { @@ -212,13 +212,13 @@ size_t copy_to_iter_mcsafe(void *addr, size_t bytes, struct iov_iter *i) size_t iov_iter_zero(size_t bytes, struct iov_iter *); unsigned long iov_iter_alignment(const struct iov_iter *i); unsigned long iov_iter_gap_alignment(const struct iov_iter *i); -void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov, +void iov_iter_init(struct iov_iter *i, unsigned int direction, const struct iovec *iov, unsigned long nr_segs, size_t count); -void iov_iter_kvec(struct iov_iter *i, int direction, const struct kvec *kvec, +void iov_iter_kvec(struct iov_iter *i, unsigned int direction, const struct kvec *kvec, unsigned long nr_segs, size_t count); -void iov_iter_bvec(struct iov_iter *i, int direction, const struct bio_vec *bvec, +void iov_iter_bvec(struct iov_iter *i, unsigned int direction, const struct bio_vec *bvec, unsigned long nr_segs, size_t count); -void iov_iter_pipe(struct iov_iter *i, int direction, struct pipe_inode_info *pipe, +void iov_iter_pipe(struct iov_iter *i, unsigned int direction, struct pipe_inode_info *pipe, size_t count); ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages, size_t maxsize, unsigned maxpages, size_t *start); diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 42d39116a556..c42b928b15ef 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -428,17 +428,19 @@ int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes) } EXPORT_SYMBOL(iov_iter_fault_in_readable); -void iov_iter_init(struct iov_iter *i, int direction, +void iov_iter_init(struct iov_iter *i, unsigned int direction, const struct iovec *iov, unsigned long nr_segs, size_t count) { + WARN_ON(direction & ~(READ | WRITE)); + direction &= READ | WRITE; + /* It will get better. Eventually... */ if (uaccess_kernel()) { - direction |= ITER_KVEC; - i->type = direction; + i->type = ITER_KVEC | direction; i->kvec = (struct kvec *)iov; } else { - i->type = direction; + i->type = ITER_IOVEC | direction; i->iov = iov; } i->nr_segs = nr_segs; @@ -1060,12 +1062,12 @@ size_t iov_iter_single_seg_count(const struct iov_iter *i) } EXPORT_SYMBOL(iov_iter_single_seg_count); -void iov_iter_kvec(struct iov_iter *i, int direction, +void iov_iter_kvec(struct iov_iter *i, unsigned int direction, const struct kvec *kvec, unsigned long nr_segs, size_t count) { - BUG_ON(!(direction & ITER_KVEC)); - i->type = direction; + WARN_ON(direction & ~(READ | WRITE)); + i->type = ITER_KVEC | (direction & (READ | WRITE)); i->kvec = kvec; i->nr_segs = nr_segs; i->iov_offset = 0; @@ -1073,12 +1075,12 @@ void iov_iter_kvec(struct iov_iter *i, int direction, } EXPORT_SYMBOL(iov_iter_kvec); -void iov_iter_bvec(struct iov_iter *i, int direction, +void iov_iter_bvec(struct iov_iter *i, unsigned int direction, const struct bio_vec *bvec, unsigned long nr_segs, size_t count) { - BUG_ON(!(direction & ITER_BVEC)); - i->type = direction; + WARN_ON(direction & ~(READ | WRITE)); + i->type = ITER_BVEC | (direction & (READ | WRITE)); i->bvec = bvec; i->nr_segs = nr_segs; i->iov_offset = 0; @@ -1086,13 +1088,13 @@ void iov_iter_bvec(struct iov_iter *i, int direction, } EXPORT_SYMBOL(iov_iter_bvec); -void iov_iter_pipe(struct iov_iter *i, int direction, +void iov_iter_pipe(struct iov_iter *i, unsigned int direction, struct pipe_inode_info *pipe, size_t count) { - BUG_ON(direction != ITER_PIPE); + BUG_ON(direction != READ); WARN_ON(pipe->nrbufs == pipe->buffers); - i->type = direction; + i->type = ITER_PIPE | READ; i->pipe = pipe; i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); i->iov_offset = 0; diff --git a/mm/page_io.c b/mm/page_io.c index aafd19ec1db4..86de453a60cf 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -294,7 +294,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, }; struct iov_iter from; - iov_iter_bvec(&from, ITER_BVEC | WRITE, &bv, 1, PAGE_SIZE); + iov_iter_bvec(&from, WRITE, &bv, 1, PAGE_SIZE); init_sync_kiocb(&kiocb, swap_file); kiocb.ki_pos = page_file_offset(page); diff --git a/net/9p/client.c b/net/9p/client.c index deae53a7dffc..a9cd1401bd09 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -2070,7 +2070,7 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) struct kvec kv = {.iov_base = data, .iov_len = count}; struct iov_iter to; - iov_iter_kvec(&to, READ | ITER_KVEC, &kv, 1, count); + iov_iter_kvec(&to, READ, &kv, 1, count); p9_debug(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %d\n", fid->fid, (unsigned long long) offset, count); diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 4e2576fc0c59..828e87fe8027 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -467,7 +467,7 @@ static int send_pkt(struct l2cap_chan *chan, struct sk_buff *skb, iv.iov_len = skb->len; memset(&msg, 0, sizeof(msg)); - iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iv, 1, skb->len); + iov_iter_kvec(&msg.msg_iter, WRITE, &iv, 1, skb->len); err = l2cap_chan_send(chan, &msg, skb->len); if (err > 0) { diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c index 51c2cf2d8923..58fc6333d412 100644 --- a/net/bluetooth/a2mp.c +++ b/net/bluetooth/a2mp.c @@ -63,7 +63,7 @@ static void a2mp_send(struct amp_mgr *mgr, u8 code, u8 ident, u16 len, void *dat memset(&msg, 0, sizeof(msg)); - iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iv, 1, total_len); + iov_iter_kvec(&msg.msg_iter, WRITE, &iv, 1, total_len); l2cap_chan_send(chan, &msg, total_len); diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index a1c1b7e8a45c..c822e626761b 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -622,7 +622,7 @@ static void smp_send_cmd(struct l2cap_conn *conn, u8 code, u16 len, void *data) memset(&msg, 0, sizeof(msg)); - iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iv, 2, 1 + len); + iov_iter_kvec(&msg.msg_iter, WRITE, iv, 2, 1 + len); l2cap_chan_send(chan, &msg, 1 + len); diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 0a187196aeed..e493ff77b378 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -526,7 +526,7 @@ static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len) if (!buf) msg.msg_flags |= MSG_TRUNC; - iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, len); + iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, len); r = sock_recvmsg(sock, &msg, msg.msg_flags); if (r == -EAGAIN) r = 0; @@ -545,7 +545,7 @@ static int ceph_tcp_recvpage(struct socket *sock, struct page *page, int r; BUG_ON(page_offset + length > PAGE_SIZE); - iov_iter_bvec(&msg.msg_iter, READ | ITER_BVEC, &bvec, 1, length); + iov_iter_bvec(&msg.msg_iter, READ, &bvec, 1, length); r = sock_recvmsg(sock, &msg, msg.msg_flags); if (r == -EAGAIN) r = 0; @@ -607,7 +607,7 @@ static int ceph_tcp_sendpage(struct socket *sock, struct page *page, else msg.msg_flags |= MSG_EOR; /* superfluous, but what the hell */ - iov_iter_bvec(&msg.msg_iter, WRITE | ITER_BVEC, &bvec, 1, size); + iov_iter_bvec(&msg.msg_iter, WRITE, &bvec, 1, size); ret = sock_sendmsg(sock, &msg); if (ret == -EAGAIN) ret = 0; diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index d4020c5e831d..2526be6b3d90 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1616,7 +1616,7 @@ ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen) EnterFunction(7); /* Receive a packet */ - iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, buflen); + iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, buflen); len = sock_recvmsg(sock, &msg, MSG_DONTWAIT); if (len < 0) return len; diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 52241d679cc9..89c3a8c7859a 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -286,7 +286,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, */ krflags = MSG_PEEK | MSG_WAITALL; smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME; - iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1, + iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, sizeof(struct smc_clc_msg_hdr)); len = sock_recvmsg(smc->clcsock, &msg, krflags); if (signal_pending(current)) { @@ -325,7 +325,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, /* receive the complete CLC message */ memset(&msg, 0, sizeof(struct msghdr)); - iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1, datlen); + iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, datlen); krflags = MSG_WAITALL; len = sock_recvmsg(smc->clcsock, &msg, krflags); if (len < datlen || !smc_clc_msg_hdr_valid(clcm)) { diff --git a/net/socket.c b/net/socket.c index b68801c7d0ab..fae408abea54 100644 --- a/net/socket.c +++ b/net/socket.c @@ -635,7 +635,7 @@ EXPORT_SYMBOL(sock_sendmsg); int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t size) { - iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size); + iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size); return sock_sendmsg(sock, msg); } EXPORT_SYMBOL(kernel_sendmsg); @@ -648,7 +648,7 @@ int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg, if (!sock->ops->sendmsg_locked) return sock_no_sendmsg_locked(sk, msg, size); - iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size); + iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size); return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg)); } @@ -823,7 +823,7 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg, mm_segment_t oldfs = get_fs(); int result; - iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, vec, num, size); + iov_iter_kvec(&msg->msg_iter, READ, vec, num, size); set_fs(KERNEL_DS); result = sock_recvmsg(sock, msg, flags); set_fs(oldfs); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 5445145e639c..0b46ec0bf74e 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -338,7 +338,7 @@ static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, rqstp->rq_xprt_hlen = 0; clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); - iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, iov, nr, buflen); + iov_iter_kvec(&msg.msg_iter, READ, iov, nr, buflen); len = sock_recvmsg(svsk->sk_sock, &msg, msg.msg_flags); /* If we read a full record, then assume there may be more * data to read (stream based sockets only!) diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c index d8956f7daac4..afa02eeec403 100644 --- a/net/tipc/topsrv.c +++ b/net/tipc/topsrv.c @@ -394,7 +394,7 @@ static int tipc_conn_rcv_from_sock(struct tipc_conn *con) iov.iov_base = &s; iov.iov_len = sizeof(s); msg.msg_name = NULL; - iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, iov.iov_len); + iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, iov.iov_len); ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT); if (ret == -EWOULDBLOCK) return -EWOULDBLOCK; diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 276edbc04f38..d753e362d2d9 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -489,7 +489,7 @@ int tls_device_sendpage(struct sock *sk, struct page *page, iov.iov_base = kaddr + offset; iov.iov_len = size; - iov_iter_kvec(&msg_iter, WRITE | ITER_KVEC, &iov, 1, size); + iov_iter_kvec(&msg_iter, WRITE, &iov, 1, size); rc = tls_push_data(sk, &msg_iter, size, flags, TLS_RECORD_TYPE_DATA); kunmap(page); @@ -538,7 +538,7 @@ static int tls_device_push_pending_record(struct sock *sk, int flags) { struct iov_iter msg_iter; - iov_iter_kvec(&msg_iter, WRITE | ITER_KVEC, NULL, 0, 0); + iov_iter_kvec(&msg_iter, WRITE, NULL, 0, 0); return tls_push_data(sk, &msg_iter, 0, flags, TLS_RECORD_TYPE_DATA); } From 9ea9ce0427aab02a2fd88fc608267cf6952119f1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 20 Oct 2018 00:57:56 +0100 Subject: [PATCH 04/26] iov_iter: Add I/O discard iterator Add a new iterator, ITER_DISCARD, that can only be used in READ mode and just discards any data copied to it. This is useful in a network filesystem for discarding any unwanted data sent by a server. Signed-off-by: David Howells --- include/linux/uio.h | 7 ++++++ lib/iov_iter.c | 55 +++++++++++++++++++++++++++++++++++++++------ 2 files changed, 55 insertions(+), 7 deletions(-) diff --git a/include/linux/uio.h b/include/linux/uio.h index 3d8f1acc142c..55ce99ddb912 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -26,6 +26,7 @@ enum iter_type { ITER_KVEC = 2, ITER_BVEC = 4, ITER_PIPE = 8, + ITER_DISCARD = 16, }; struct iov_iter { @@ -72,6 +73,11 @@ static inline bool iov_iter_is_pipe(const struct iov_iter *i) return iov_iter_type(i) == ITER_PIPE; } +static inline bool iov_iter_is_discard(const struct iov_iter *i) +{ + return iov_iter_type(i) == ITER_DISCARD; +} + static inline unsigned char iov_iter_rw(const struct iov_iter *i) { return i->type & (READ | WRITE); @@ -220,6 +226,7 @@ void iov_iter_bvec(struct iov_iter *i, unsigned int direction, const struct bio_ unsigned long nr_segs, size_t count); void iov_iter_pipe(struct iov_iter *i, unsigned int direction, struct pipe_inode_info *pipe, size_t count); +void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count); ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages, size_t maxsize, unsigned maxpages, size_t *start); ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, diff --git a/lib/iov_iter.c b/lib/iov_iter.c index c42b928b15ef..7ebccb5c1637 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -83,6 +83,7 @@ const struct kvec *kvec; \ struct kvec v; \ iterate_kvec(i, n, v, kvec, skip, (K)) \ + } else if (unlikely(i->type & ITER_DISCARD)) { \ } else { \ const struct iovec *iov; \ struct iovec v; \ @@ -114,6 +115,8 @@ } \ i->nr_segs -= kvec - i->kvec; \ i->kvec = kvec; \ + } else if (unlikely(i->type & ITER_DISCARD)) { \ + skip += n; \ } else { \ const struct iovec *iov; \ struct iovec v; \ @@ -838,7 +841,9 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, size_t wanted = copy_to_iter(kaddr + offset, bytes, i); kunmap_atomic(kaddr); return wanted; - } else if (likely(!iov_iter_is_pipe(i))) + } else if (unlikely(iov_iter_is_discard(i))) + return bytes; + else if (likely(!iov_iter_is_pipe(i))) return copy_page_to_iter_iovec(page, offset, bytes, i); else return copy_page_to_iter_pipe(page, offset, bytes, i); @@ -850,7 +855,7 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, { if (unlikely(!page_copy_sane(page, offset, bytes))) return 0; - if (unlikely(iov_iter_is_pipe(i))) { + if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { WARN_ON(1); return 0; } @@ -910,7 +915,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page, kunmap_atomic(kaddr); return 0; } - if (unlikely(iov_iter_is_pipe(i))) { + if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { kunmap_atomic(kaddr); WARN_ON(1); return 0; @@ -978,6 +983,10 @@ void iov_iter_advance(struct iov_iter *i, size_t size) pipe_advance(i, size); return; } + if (unlikely(iov_iter_is_discard(i))) { + i->count -= size; + return; + } iterate_and_advance(i, size, v, 0, 0, 0) } EXPORT_SYMBOL(iov_iter_advance); @@ -1013,6 +1022,8 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll) pipe_truncate(i); return; } + if (unlikely(iov_iter_is_discard(i))) + return; if (unroll <= i->iov_offset) { i->iov_offset -= unroll; return; @@ -1055,6 +1066,8 @@ size_t iov_iter_single_seg_count(const struct iov_iter *i) return i->count; // it is a silly place, anyway if (i->nr_segs == 1) return i->count; + if (unlikely(iov_iter_is_discard(i))) + return i->count; else if (iov_iter_is_bvec(i)) return min(i->count, i->bvec->bv_len - i->iov_offset); else @@ -1103,6 +1116,24 @@ void iov_iter_pipe(struct iov_iter *i, unsigned int direction, } EXPORT_SYMBOL(iov_iter_pipe); +/** + * iov_iter_discard - Initialise an I/O iterator that discards data + * @i: The iterator to initialise. + * @direction: The direction of the transfer. + * @count: The size of the I/O buffer in bytes. + * + * Set up an I/O iterator that just discards everything that's written to it. + * It's only available as a READ iterator. + */ +void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count) +{ + BUG_ON(direction != READ); + i->type = ITER_DISCARD | READ; + i->count = count; + i->iov_offset = 0; +} +EXPORT_SYMBOL(iov_iter_discard); + unsigned long iov_iter_alignment(const struct iov_iter *i) { unsigned long res = 0; @@ -1127,7 +1158,7 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i) unsigned long res = 0; size_t size = i->count; - if (unlikely(iov_iter_is_pipe(i))) { + if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { WARN_ON(1); return ~0U; } @@ -1197,6 +1228,9 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, if (unlikely(iov_iter_is_pipe(i))) return pipe_get_pages(i, pages, maxsize, maxpages, start); + if (unlikely(iov_iter_is_discard(i))) + return -EFAULT; + iterate_all_kinds(i, maxsize, v, ({ unsigned long addr = (unsigned long)v.iov_base; size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); @@ -1274,6 +1308,9 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, if (unlikely(iov_iter_is_pipe(i))) return pipe_get_pages_alloc(i, pages, maxsize, start); + if (unlikely(iov_iter_is_discard(i))) + return -EFAULT; + iterate_all_kinds(i, maxsize, v, ({ unsigned long addr = (unsigned long)v.iov_base; size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); @@ -1315,7 +1352,7 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, __wsum sum, next; size_t off = 0; sum = *csum; - if (unlikely(iov_iter_is_pipe(i))) { + if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { WARN_ON(1); return 0; } @@ -1357,7 +1394,7 @@ bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, __wsum sum, next; size_t off = 0; sum = *csum; - if (unlikely(iov_iter_is_pipe(i))) { + if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { WARN_ON(1); return false; } @@ -1402,7 +1439,7 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum, __wsum sum, next; size_t off = 0; sum = *csum; - if (unlikely(iov_iter_is_pipe(i))) { + if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { WARN_ON(1); /* for now */ return 0; } @@ -1444,6 +1481,8 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages) if (!size) return 0; + if (unlikely(iov_iter_is_discard(i))) + return 0; if (unlikely(iov_iter_is_pipe(i))) { struct pipe_inode_info *pipe = i->pipe; @@ -1487,6 +1526,8 @@ const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) WARN_ON(1); return NULL; } + if (unlikely(iov_iter_is_discard(new))) + return NULL; if (iov_iter_is_bvec(new)) return new->bvec = kmemdup(new->bvec, new->nr_segs * sizeof(struct bio_vec), From 160cb9574b550426122422444b8f19d614505f81 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 20 Oct 2018 00:57:56 +0100 Subject: [PATCH 05/26] afs: Better tracing of protocol errors Include the site of detection of AFS protocol errors in trace lines to better be able to determine what went wrong. Signed-off-by: David Howells --- fs/afs/cmservice.c | 6 +- fs/afs/fsclient.c | 117 ++++++++++++++++++++++--------------- fs/afs/inode.c | 2 +- fs/afs/internal.h | 2 +- fs/afs/rxrpc.c | 5 +- fs/afs/vlclient.c | 30 ++++++---- include/trace/events/afs.h | 54 ++++++++++++++--- 7 files changed, 146 insertions(+), 70 deletions(-) diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 9e51d6fe7e8f..58f79301a716 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -189,7 +189,8 @@ static int afs_deliver_cb_callback(struct afs_call *call) call->count = ntohl(call->tmp); _debug("FID count: %u", call->count); if (call->count > AFSCBMAX) - return afs_protocol_error(call, -EBADMSG); + return afs_protocol_error(call, -EBADMSG, + afs_eproto_cb_fid_count); call->buffer = kmalloc(array3_size(call->count, 3, 4), GFP_KERNEL); @@ -234,7 +235,8 @@ static int afs_deliver_cb_callback(struct afs_call *call) call->count2 = ntohl(call->tmp); _debug("CB count: %u", call->count2); if (call->count2 != call->count && call->count2 != 0) - return afs_protocol_error(call, -EBADMSG); + return afs_protocol_error(call, -EBADMSG, + afs_eproto_cb_count); call->offset = 0; call->unmarshall++; diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 50929cb91732..d9a5815945dc 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -233,7 +233,7 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call, bad: xdr_dump_bad(*_bp); - return afs_protocol_error(call, -EBADMSG); + return afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status); } /* @@ -399,9 +399,10 @@ static int afs_deliver_fs_fetch_status_vnode(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - if (afs_decode_status(call, &bp, &vnode->status, vnode, - &call->expected_version, NULL) < 0) - return afs_protocol_error(call, -EBADMSG); + ret = afs_decode_status(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL); + if (ret < 0) + return ret; xdr_decode_AFSCallBack(call, vnode, &bp); if (call->reply[1]) xdr_decode_AFSVolSync(&bp, call->reply[1]); @@ -580,9 +581,10 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) return ret; bp = call->buffer; - if (afs_decode_status(call, &bp, &vnode->status, vnode, - &vnode->status.data_version, req) < 0) - return afs_protocol_error(call, -EBADMSG); + ret = afs_decode_status(call, &bp, &vnode->status, vnode, + &vnode->status.data_version, req); + if (ret < 0) + return ret; xdr_decode_AFSCallBack(call, vnode, &bp); if (call->reply[1]) xdr_decode_AFSVolSync(&bp, call->reply[1]); @@ -733,10 +735,13 @@ static int afs_deliver_fs_create_vnode(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; xdr_decode_AFSFid(&bp, call->reply[1]); - if (afs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL) < 0 || - afs_decode_status(call, &bp, &vnode->status, vnode, - &call->expected_version, NULL) < 0) - return afs_protocol_error(call, -EBADMSG); + ret = afs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL); + if (ret < 0) + return ret; + ret = afs_decode_status(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL); + if (ret < 0) + return ret; xdr_decode_AFSCallBack_raw(&bp, call->reply[3]); /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ @@ -839,9 +844,10 @@ static int afs_deliver_fs_remove(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - if (afs_decode_status(call, &bp, &vnode->status, vnode, - &call->expected_version, NULL) < 0) - return afs_protocol_error(call, -EBADMSG); + ret = afs_decode_status(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL); + if (ret < 0) + return ret; /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ _leave(" = 0 [done]"); @@ -929,10 +935,13 @@ static int afs_deliver_fs_link(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - if (afs_decode_status(call, &bp, &vnode->status, vnode, NULL, NULL) < 0 || - afs_decode_status(call, &bp, &dvnode->status, dvnode, - &call->expected_version, NULL) < 0) - return afs_protocol_error(call, -EBADMSG); + ret = afs_decode_status(call, &bp, &vnode->status, vnode, NULL, NULL); + if (ret < 0) + return ret; + ret = afs_decode_status(call, &bp, &dvnode->status, dvnode, + &call->expected_version, NULL); + if (ret < 0) + return ret; /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ _leave(" = 0 [done]"); @@ -1016,10 +1025,13 @@ static int afs_deliver_fs_symlink(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; xdr_decode_AFSFid(&bp, call->reply[1]); - if (afs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL) || - afs_decode_status(call, &bp, &vnode->status, vnode, - &call->expected_version, NULL) < 0) - return afs_protocol_error(call, -EBADMSG); + ret = afs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL); + if (ret < 0) + return ret; + ret = afs_decode_status(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL); + if (ret < 0) + return ret; /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ _leave(" = 0 [done]"); @@ -1122,13 +1134,16 @@ static int afs_deliver_fs_rename(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - if (afs_decode_status(call, &bp, &orig_dvnode->status, orig_dvnode, - &call->expected_version, NULL) < 0) - return afs_protocol_error(call, -EBADMSG); - if (new_dvnode != orig_dvnode && - afs_decode_status(call, &bp, &new_dvnode->status, new_dvnode, - &call->expected_version_2, NULL) < 0) - return afs_protocol_error(call, -EBADMSG); + ret = afs_decode_status(call, &bp, &orig_dvnode->status, orig_dvnode, + &call->expected_version, NULL); + if (ret < 0) + return ret; + if (new_dvnode != orig_dvnode) { + ret = afs_decode_status(call, &bp, &new_dvnode->status, new_dvnode, + &call->expected_version_2, NULL); + if (ret < 0) + return ret; + } /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ _leave(" = 0 [done]"); @@ -1231,9 +1246,10 @@ static int afs_deliver_fs_store_data(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - if (afs_decode_status(call, &bp, &vnode->status, vnode, - &call->expected_version, NULL) < 0) - return afs_protocol_error(call, -EBADMSG); + ret = afs_decode_status(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL); + if (ret < 0) + return ret; /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ afs_pages_written_back(vnode, call); @@ -1407,9 +1423,10 @@ static int afs_deliver_fs_store_status(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - if (afs_decode_status(call, &bp, &vnode->status, vnode, - &call->expected_version, NULL) < 0) - return afs_protocol_error(call, -EBADMSG); + ret = afs_decode_status(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL); + if (ret < 0) + return ret; /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ _leave(" = 0 [done]"); @@ -1612,7 +1629,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) call->count = ntohl(call->tmp); _debug("volname length: %u", call->count); if (call->count >= AFSNAMEMAX) - return afs_protocol_error(call, -EBADMSG); + return afs_protocol_error(call, -EBADMSG, + afs_eproto_volname_len); call->offset = 0; call->unmarshall++; @@ -1659,7 +1677,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) call->count = ntohl(call->tmp); _debug("offline msg length: %u", call->count); if (call->count >= AFSNAMEMAX) - return afs_protocol_error(call, -EBADMSG); + return afs_protocol_error(call, -EBADMSG, + afs_eproto_offline_msg_len); call->offset = 0; call->unmarshall++; @@ -1706,7 +1725,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) call->count = ntohl(call->tmp); _debug("motd length: %u", call->count); if (call->count >= AFSNAMEMAX) - return afs_protocol_error(call, -EBADMSG); + return afs_protocol_error(call, -EBADMSG, + afs_eproto_motd_len); call->offset = 0; call->unmarshall++; @@ -2109,8 +2129,10 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - afs_decode_status(call, &bp, status, vnode, - &call->expected_version, NULL); + ret = afs_decode_status(call, &bp, status, vnode, + &call->expected_version, NULL); + if (ret < 0) + return ret; callback[call->count].version = ntohl(bp[0]); callback[call->count].expiry = ntohl(bp[1]); callback[call->count].type = ntohl(bp[2]); @@ -2206,7 +2228,8 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call) tmp = ntohl(call->tmp); _debug("status count: %u/%u", tmp, call->count2); if (tmp != call->count2) - return afs_protocol_error(call, -EBADMSG); + return afs_protocol_error(call, -EBADMSG, + afs_eproto_ibulkst_count); call->count = 0; call->unmarshall++; @@ -2221,10 +2244,11 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call) bp = call->buffer; statuses = call->reply[1]; - if (afs_decode_status(call, &bp, &statuses[call->count], - call->count == 0 ? vnode : NULL, - NULL, NULL) < 0) - return afs_protocol_error(call, -EBADMSG); + ret = afs_decode_status(call, &bp, &statuses[call->count], + call->count == 0 ? vnode : NULL, + NULL, NULL); + if (ret < 0) + return ret; call->count++; if (call->count < call->count2) @@ -2244,7 +2268,8 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call) tmp = ntohl(call->tmp); _debug("CB count: %u", tmp); if (tmp != call->count2) - return afs_protocol_error(call, -EBADMSG); + return afs_protocol_error(call, -EBADMSG, + afs_eproto_ibulkst_cb_count); call->count = 0; call->unmarshall++; more_cbs: diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 479b7fdda124..ab4e7a15c205 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -82,7 +82,7 @@ static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key) default: printk("kAFS: AFS vnode with undefined type\n"); read_sequnlock_excl(&vnode->cb_lock); - return afs_protocol_error(NULL, -EBADMSG); + return afs_protocol_error(NULL, -EBADMSG, afs_eproto_file_type); } inode->i_blocks = 0; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 72de1f157d20..73770a2a2311 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -931,7 +931,7 @@ extern void afs_flat_call_destructor(struct afs_call *); extern void afs_send_empty_reply(struct afs_call *); extern void afs_send_simple_reply(struct afs_call *, const void *, size_t); extern int afs_extract_data(struct afs_call *, void *, size_t, bool); -extern int afs_protocol_error(struct afs_call *, int); +extern int afs_protocol_error(struct afs_call *, int, enum afs_eproto_cause); static inline int afs_transfer_reply(struct afs_call *call) { diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 639c16882e93..9757c2d953f1 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -939,8 +939,9 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count, /* * Log protocol error production. */ -noinline int afs_protocol_error(struct afs_call *call, int error) +noinline int afs_protocol_error(struct afs_call *call, int error, + enum afs_eproto_cause cause) { - trace_afs_protocol_error(call, error, __builtin_return_address(0)); + trace_afs_protocol_error(call, error, cause); return error; } diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c index c3b740813fc7..d0f95c4ab05e 100644 --- a/fs/afs/vlclient.c +++ b/fs/afs/vlclient.c @@ -451,7 +451,8 @@ again: call->count2 = ntohl(*bp); /* Type or next count */ if (call->count > YFS_MAXENDPOINTS) - return afs_protocol_error(call, -EBADMSG); + return afs_protocol_error(call, -EBADMSG, + afs_eproto_yvl_fsendpt_num); alist = afs_alloc_addrlist(call->count, FS_SERVICE, AFS_FS_PORT); if (!alist) @@ -475,7 +476,8 @@ again: size = sizeof(__be32) * (1 + 4 + 1); break; default: - return afs_protocol_error(call, -EBADMSG); + return afs_protocol_error(call, -EBADMSG, + afs_eproto_yvl_fsendpt_type); } size += sizeof(__be32); @@ -488,18 +490,21 @@ again: switch (call->count2) { case YFS_ENDPOINT_IPV4: if (ntohl(bp[0]) != sizeof(__be32) * 2) - return afs_protocol_error(call, -EBADMSG); + return afs_protocol_error(call, -EBADMSG, + afs_eproto_yvl_fsendpt4_len); afs_merge_fs_addr4(alist, bp[1], ntohl(bp[2])); bp += 3; break; case YFS_ENDPOINT_IPV6: if (ntohl(bp[0]) != sizeof(__be32) * 5) - return afs_protocol_error(call, -EBADMSG); + return afs_protocol_error(call, -EBADMSG, + afs_eproto_yvl_fsendpt6_len); afs_merge_fs_addr6(alist, bp + 1, ntohl(bp[5])); bp += 6; break; default: - return afs_protocol_error(call, -EBADMSG); + return afs_protocol_error(call, -EBADMSG, + afs_eproto_yvl_fsendpt_type); } /* Got either the type of the next entry or the count of @@ -518,7 +523,8 @@ again: if (!call->count) goto end; if (call->count > YFS_MAXENDPOINTS) - return afs_protocol_error(call, -EBADMSG); + return afs_protocol_error(call, -EBADMSG, + afs_eproto_yvl_vlendpt_type); call->unmarshall = 3; @@ -546,7 +552,8 @@ again: size = sizeof(__be32) * (1 + 4 + 1); break; default: - return afs_protocol_error(call, -EBADMSG); + return afs_protocol_error(call, -EBADMSG, + afs_eproto_yvl_vlendpt_type); } if (call->count > 1) @@ -559,16 +566,19 @@ again: switch (call->count2) { case YFS_ENDPOINT_IPV4: if (ntohl(bp[0]) != sizeof(__be32) * 2) - return afs_protocol_error(call, -EBADMSG); + return afs_protocol_error(call, -EBADMSG, + afs_eproto_yvl_vlendpt4_len); bp += 3; break; case YFS_ENDPOINT_IPV6: if (ntohl(bp[0]) != sizeof(__be32) * 5) - return afs_protocol_error(call, -EBADMSG); + return afs_protocol_error(call, -EBADMSG, + afs_eproto_yvl_vlendpt6_len); bp += 6; break; default: - return afs_protocol_error(call, -EBADMSG); + return afs_protocol_error(call, -EBADMSG, + afs_eproto_yvl_vlendpt_type); } /* Got either the type of the next entry or the count of diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index d0a341bc4540..5c60ade2c7d8 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -84,6 +84,25 @@ enum afs_edit_dir_reason { afs_edit_dir_for_unlink, }; +enum afs_eproto_cause { + afs_eproto_bad_status, + afs_eproto_cb_count, + afs_eproto_cb_fid_count, + afs_eproto_file_type, + afs_eproto_ibulkst_cb_count, + afs_eproto_ibulkst_count, + afs_eproto_motd_len, + afs_eproto_offline_msg_len, + afs_eproto_volname_len, + afs_eproto_yvl_fsendpt4_len, + afs_eproto_yvl_fsendpt6_len, + afs_eproto_yvl_fsendpt_num, + afs_eproto_yvl_fsendpt_type, + afs_eproto_yvl_vlendpt4_len, + afs_eproto_yvl_vlendpt6_len, + afs_eproto_yvl_vlendpt_type, +}; + #endif /* end __AFS_DECLARE_TRACE_ENUMS_ONCE_ONLY */ /* @@ -146,6 +165,24 @@ enum afs_edit_dir_reason { EM(afs_edit_dir_for_symlink, "Symlnk") \ E_(afs_edit_dir_for_unlink, "Unlink") +#define afs_eproto_causes \ + EM(afs_eproto_bad_status, "BadStatus") \ + EM(afs_eproto_cb_count, "CbCount") \ + EM(afs_eproto_cb_fid_count, "CbFidCount") \ + EM(afs_eproto_file_type, "FileTYpe") \ + EM(afs_eproto_ibulkst_cb_count, "IBS.CbCount") \ + EM(afs_eproto_ibulkst_count, "IBS.FidCount") \ + EM(afs_eproto_motd_len, "MotdLen") \ + EM(afs_eproto_offline_msg_len, "OfflineMsgLen") \ + EM(afs_eproto_volname_len, "VolNameLen") \ + EM(afs_eproto_yvl_fsendpt4_len, "YVL.FsEnd4Len") \ + EM(afs_eproto_yvl_fsendpt6_len, "YVL.FsEnd6Len") \ + EM(afs_eproto_yvl_fsendpt_num, "YVL.FsEndCount") \ + EM(afs_eproto_yvl_fsendpt_type, "YVL.FsEndType") \ + EM(afs_eproto_yvl_vlendpt4_len, "YVL.VlEnd4Len") \ + EM(afs_eproto_yvl_vlendpt6_len, "YVL.VlEnd6Len") \ + E_(afs_eproto_yvl_vlendpt_type, "YVL.VlEndType") + /* * Export enum symbols via userspace. @@ -555,24 +592,25 @@ TRACE_EVENT(afs_edit_dir, ); TRACE_EVENT(afs_protocol_error, - TP_PROTO(struct afs_call *call, int error, const void *where), + TP_PROTO(struct afs_call *call, int error, enum afs_eproto_cause cause), - TP_ARGS(call, error, where), + TP_ARGS(call, error, cause), TP_STRUCT__entry( - __field(unsigned int, call ) - __field(int, error ) - __field(const void *, where ) + __field(unsigned int, call ) + __field(int, error ) + __field(enum afs_eproto_cause, cause ) ), TP_fast_assign( __entry->call = call ? call->debug_id : 0; __entry->error = error; - __entry->where = where; + __entry->cause = cause; ), - TP_printk("c=%08x r=%d sp=%pSR", - __entry->call, __entry->error, __entry->where) + TP_printk("c=%08x r=%d %s", + __entry->call, __entry->error, + __print_symbolic(__entry->cause, afs_eproto_causes)) ); TRACE_EVENT(afs_cm_no_server, From 12bdcf333fe13ece2a487a699b1a0f4c5dbb594b Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 20 Oct 2018 00:57:56 +0100 Subject: [PATCH 06/26] afs: Set up the iov_iter before calling afs_extract_data() afs_extract_data sets up a temporary iov_iter and passes it to AF_RXRPC each time it is called to describe the remaining buffer to be filled. Instead: (1) Put an iterator in the afs_call struct. (2) Set the iterator for each marshalling stage to load data into the appropriate places. A number of convenience functions are provided to this end (eg. afs_extract_to_buf()). This iterator is then passed to afs_extract_data(). (3) Use the new ITER_DISCARD iterator to discard any excess data provided by FetchData. Signed-off-by: David Howells --- fs/afs/cmservice.c | 40 +++--- fs/afs/fsclient.c | 280 +++++++++++++------------------------ fs/afs/internal.h | 56 +++++++- fs/afs/rxrpc.c | 41 ++---- fs/afs/vlclient.c | 104 ++++++-------- include/trace/events/afs.h | 22 +-- 6 files changed, 235 insertions(+), 308 deletions(-) diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 58f79301a716..4db62ae8dc1a 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -176,13 +176,13 @@ static int afs_deliver_cb_callback(struct afs_call *call) switch (call->unmarshall) { case 0: - call->offset = 0; + afs_extract_to_tmp(call); call->unmarshall++; /* extract the FID array and its count in two steps */ case 1: _debug("extract FID count"); - ret = afs_extract_data(call, &call->tmp, 4, true); + ret = afs_extract_data(call, true); if (ret < 0) return ret; @@ -196,13 +196,12 @@ static int afs_deliver_cb_callback(struct afs_call *call) GFP_KERNEL); if (!call->buffer) return -ENOMEM; - call->offset = 0; + afs_extract_to_buf(call, call->count * 3 * 4); call->unmarshall++; case 2: _debug("extract FID array"); - ret = afs_extract_data(call, call->buffer, - call->count * 3 * 4, true); + ret = afs_extract_data(call, true); if (ret < 0) return ret; @@ -222,13 +221,13 @@ static int afs_deliver_cb_callback(struct afs_call *call) cb->cb.type = AFSCM_CB_UNTYPED; } - call->offset = 0; + afs_extract_to_tmp(call); call->unmarshall++; /* extract the callback array and its count in two steps */ case 3: _debug("extract CB count"); - ret = afs_extract_data(call, &call->tmp, 4, true); + ret = afs_extract_data(call, true); if (ret < 0) return ret; @@ -237,13 +236,12 @@ static int afs_deliver_cb_callback(struct afs_call *call) if (call->count2 != call->count && call->count2 != 0) return afs_protocol_error(call, -EBADMSG, afs_eproto_cb_count); - call->offset = 0; + afs_extract_to_buf(call, call->count2 * 3 * 4); call->unmarshall++; case 4: _debug("extract CB array"); - ret = afs_extract_data(call, call->buffer, - call->count2 * 3 * 4, false); + ret = afs_extract_data(call, false); if (ret < 0) return ret; @@ -256,7 +254,6 @@ static int afs_deliver_cb_callback(struct afs_call *call) cb->cb.type = ntohl(*bp++); } - call->offset = 0; call->unmarshall++; case 5: break; @@ -303,7 +300,8 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call) rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx); - ret = afs_extract_data(call, NULL, 0, false); + afs_extract_discard(call, 0); + ret = afs_extract_data(call, false); if (ret < 0) return ret; @@ -332,16 +330,15 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call) switch (call->unmarshall) { case 0: - call->offset = 0; call->buffer = kmalloc_array(11, sizeof(__be32), GFP_KERNEL); if (!call->buffer) return -ENOMEM; + afs_extract_to_buf(call, 11 * sizeof(__be32)); call->unmarshall++; case 1: _debug("extract UUID"); - ret = afs_extract_data(call, call->buffer, - 11 * sizeof(__be32), false); + ret = afs_extract_data(call, false); switch (ret) { case 0: break; case -EAGAIN: return 0; @@ -364,7 +361,6 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call) for (loop = 0; loop < 6; loop++) r->node[loop] = ntohl(b[loop + 5]); - call->offset = 0; call->unmarshall++; case 2: @@ -407,7 +403,8 @@ static int afs_deliver_cb_probe(struct afs_call *call) _enter(""); - ret = afs_extract_data(call, NULL, 0, false); + afs_extract_discard(call, 0); + ret = afs_extract_data(call, false); if (ret < 0) return ret; @@ -455,16 +452,15 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call) switch (call->unmarshall) { case 0: - call->offset = 0; call->buffer = kmalloc_array(11, sizeof(__be32), GFP_KERNEL); if (!call->buffer) return -ENOMEM; + afs_extract_to_buf(call, 11 * sizeof(__be32)); call->unmarshall++; case 1: _debug("extract UUID"); - ret = afs_extract_data(call, call->buffer, - 11 * sizeof(__be32), false); + ret = afs_extract_data(call, false); switch (ret) { case 0: break; case -EAGAIN: return 0; @@ -487,7 +483,6 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call) for (loop = 0; loop < 6; loop++) r->node[loop] = ntohl(b[loop + 5]); - call->offset = 0; call->unmarshall++; case 2: @@ -572,7 +567,8 @@ static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call) _enter(""); - ret = afs_extract_data(call, NULL, 0, false); + afs_extract_discard(call, 0); + ret = afs_extract_data(call, false); if (ret < 0) return ret; diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index d9a5815945dc..fe2e9e39b388 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -20,12 +20,6 @@ static const struct afs_fid afs_zero_fid; -/* - * We need somewhere to discard into in case the server helpfully returns more - * than we asked for in FS.FetchData{,64}. - */ -static u8 afs_discard_buffer[64]; - static inline void afs_use_fs_server(struct afs_call *call, struct afs_cb_interest *cbi) { call->cbi = afs_get_cb_interest(cbi); @@ -469,114 +463,93 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) struct afs_read *req = call->reply[2]; const __be32 *bp; unsigned int size; - void *buffer; int ret; - _enter("{%u,%zu/%u;%llu/%llu}", - call->unmarshall, call->offset, call->count, - req->remain, req->actual_len); + _enter("{%u,%zu/%llu}", + call->unmarshall, iov_iter_count(&call->iter), req->actual_len); switch (call->unmarshall) { case 0: req->actual_len = 0; - call->offset = 0; + req->index = 0; + req->offset = req->pos & (PAGE_SIZE - 1); call->unmarshall++; - if (call->operation_ID != FSFETCHDATA64) { - call->unmarshall++; - goto no_msw; + if (call->operation_ID == FSFETCHDATA64) { + afs_extract_to_tmp64(call); + } else { + call->tmp_u = htonl(0); + afs_extract_to_tmp(call); } - /* extract the upper part of the returned data length of an - * FSFETCHDATA64 op (which should always be 0 using this - * client) */ - case 1: - _debug("extract data length (MSW)"); - ret = afs_extract_data(call, &call->tmp, 4, true); - if (ret < 0) - return ret; - - req->actual_len = ntohl(call->tmp); - req->actual_len <<= 32; - call->offset = 0; - call->unmarshall++; - - no_msw: /* extract the returned data length */ - case 2: + case 1: _debug("extract data length"); - ret = afs_extract_data(call, &call->tmp, 4, true); + ret = afs_extract_data(call, true); if (ret < 0) return ret; - req->actual_len |= ntohl(call->tmp); + req->actual_len = be64_to_cpu(call->tmp64); _debug("DATA length: %llu", req->actual_len); - - req->remain = req->actual_len; - call->offset = req->pos & (PAGE_SIZE - 1); - req->index = 0; - if (req->actual_len == 0) + req->remain = min(req->len, req->actual_len); + if (req->remain == 0) goto no_more_data; + call->unmarshall++; begin_page: ASSERTCMP(req->index, <, req->nr_pages); - if (req->remain > PAGE_SIZE - call->offset) - size = PAGE_SIZE - call->offset; + if (req->remain > PAGE_SIZE - req->offset) + size = PAGE_SIZE - req->offset; else size = req->remain; - call->count = call->offset + size; - ASSERTCMP(call->count, <=, PAGE_SIZE); - req->remain -= size; + call->bvec[0].bv_len = size; + call->bvec[0].bv_offset = req->offset; + call->bvec[0].bv_page = req->pages[req->index]; + iov_iter_bvec(&call->iter, READ, call->bvec, 1, size); + ASSERTCMP(size, <=, PAGE_SIZE); /* extract the returned data */ - case 3: - _debug("extract data %llu/%llu %zu/%u", - req->remain, req->actual_len, call->offset, call->count); + case 2: + _debug("extract data %zu/%llu", + iov_iter_count(&call->iter), req->remain); - buffer = kmap(req->pages[req->index]); - ret = afs_extract_data(call, buffer, call->count, true); - kunmap(req->pages[req->index]); + ret = afs_extract_data(call, true); if (ret < 0) return ret; - if (call->offset == PAGE_SIZE) { + req->remain -= call->bvec[0].bv_len; + req->offset += call->bvec[0].bv_len; + ASSERTCMP(req->offset, <=, PAGE_SIZE); + if (req->offset == PAGE_SIZE) { + req->offset = 0; if (req->page_done) req->page_done(call, req); req->index++; - if (req->remain > 0) { - call->offset = 0; - if (req->index >= req->nr_pages) { - call->unmarshall = 4; - goto begin_discard; - } + if (req->remain > 0) goto begin_page; - } } - goto no_more_data; + + ASSERTCMP(req->remain, ==, 0); + if (req->actual_len <= req->len) + goto no_more_data; /* Discard any excess data the server gave us */ - begin_discard: - case 4: - size = min_t(loff_t, sizeof(afs_discard_buffer), req->remain); - call->count = size; - _debug("extract discard %llu/%llu %zu/%u", - req->remain, req->actual_len, call->offset, call->count); + iov_iter_discard(&call->iter, READ, req->actual_len - req->len); + call->unmarshall = 3; + case 3: + _debug("extract discard %zu/%llu", + iov_iter_count(&call->iter), req->actual_len - req->len); - call->offset = 0; - ret = afs_extract_data(call, afs_discard_buffer, call->count, true); - req->remain -= call->offset; + ret = afs_extract_data(call, true); if (ret < 0) return ret; - if (req->remain > 0) - goto begin_discard; no_more_data: - call->offset = 0; - call->unmarshall = 5; + call->unmarshall = 4; + afs_extract_to_buf(call, (21 + 3 + 6) * 4); /* extract the metadata */ - case 5: - ret = afs_extract_data(call, call->buffer, - (21 + 3 + 6) * 4, false); + case 4: + ret = afs_extract_data(call, false); if (ret < 0) return ret; @@ -589,20 +562,19 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) if (call->reply[1]) xdr_decode_AFSVolSync(&bp, call->reply[1]); - call->offset = 0; call->unmarshall++; - case 6: + case 5: break; } for (; req->index < req->nr_pages; req->index++) { - if (call->count < PAGE_SIZE) + if (req->offset < PAGE_SIZE) zero_user_segment(req->pages[req->index], - call->count, PAGE_SIZE); + req->offset, PAGE_SIZE); if (req->page_done) req->page_done(call, req); - call->count = 0; + req->offset = 0; } _leave(" = 0 [done]"); @@ -1598,31 +1570,31 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) { const __be32 *bp; char *p; + u32 size; int ret; _enter("{%u}", call->unmarshall); switch (call->unmarshall) { case 0: - call->offset = 0; call->unmarshall++; + afs_extract_to_buf(call, 12 * 4); /* extract the returned status record */ case 1: _debug("extract status"); - ret = afs_extract_data(call, call->buffer, - 12 * 4, true); + ret = afs_extract_data(call, true); if (ret < 0) return ret; bp = call->buffer; xdr_decode_AFSFetchVolumeStatus(&bp, call->reply[1]); - call->offset = 0; call->unmarshall++; + afs_extract_to_tmp(call); /* extract the volume name length */ case 2: - ret = afs_extract_data(call, &call->tmp, 4, true); + ret = afs_extract_data(call, true); if (ret < 0) return ret; @@ -1631,46 +1603,26 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) if (call->count >= AFSNAMEMAX) return afs_protocol_error(call, -EBADMSG, afs_eproto_volname_len); - call->offset = 0; + size = (call->count + 3) & ~3; /* It's padded */ + afs_extract_begin(call, call->reply[2], size); call->unmarshall++; /* extract the volume name */ case 3: _debug("extract volname"); - if (call->count > 0) { - ret = afs_extract_data(call, call->reply[2], - call->count, true); - if (ret < 0) - return ret; - } + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; p = call->reply[2]; p[call->count] = 0; _debug("volname '%s'", p); - - call->offset = 0; + afs_extract_to_tmp(call); call->unmarshall++; - /* extract the volume name padding */ - if ((call->count & 3) == 0) { - call->unmarshall++; - goto no_volname_padding; - } - call->count = 4 - (call->count & 3); - - case 4: - ret = afs_extract_data(call, call->buffer, - call->count, true); - if (ret < 0) - return ret; - - call->offset = 0; - call->unmarshall++; - no_volname_padding: - /* extract the offline message length */ - case 5: - ret = afs_extract_data(call, &call->tmp, 4, true); + case 4: + ret = afs_extract_data(call, true); if (ret < 0) return ret; @@ -1679,46 +1631,27 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) if (call->count >= AFSNAMEMAX) return afs_protocol_error(call, -EBADMSG, afs_eproto_offline_msg_len); - call->offset = 0; + size = (call->count + 3) & ~3; /* It's padded */ + afs_extract_begin(call, call->reply[2], size); call->unmarshall++; /* extract the offline message */ - case 6: + case 5: _debug("extract offline"); - if (call->count > 0) { - ret = afs_extract_data(call, call->reply[2], - call->count, true); - if (ret < 0) - return ret; - } + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; p = call->reply[2]; p[call->count] = 0; _debug("offline '%s'", p); - call->offset = 0; + afs_extract_to_tmp(call); call->unmarshall++; - /* extract the offline message padding */ - if ((call->count & 3) == 0) { - call->unmarshall++; - goto no_offline_padding; - } - call->count = 4 - (call->count & 3); - - case 7: - ret = afs_extract_data(call, call->buffer, - call->count, true); - if (ret < 0) - return ret; - - call->offset = 0; - call->unmarshall++; - no_offline_padding: - /* extract the message of the day length */ - case 8: - ret = afs_extract_data(call, &call->tmp, 4, true); + case 6: + ret = afs_extract_data(call, true); if (ret < 0) return ret; @@ -1727,38 +1660,24 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) if (call->count >= AFSNAMEMAX) return afs_protocol_error(call, -EBADMSG, afs_eproto_motd_len); - call->offset = 0; + size = (call->count + 3) & ~3; /* It's padded */ + afs_extract_begin(call, call->reply[2], size); call->unmarshall++; /* extract the message of the day */ - case 9: + case 7: _debug("extract motd"); - if (call->count > 0) { - ret = afs_extract_data(call, call->reply[2], - call->count, true); - if (ret < 0) - return ret; - } + ret = afs_extract_data(call, false); + if (ret < 0) + return ret; p = call->reply[2]; p[call->count] = 0; _debug("motd '%s'", p); - call->offset = 0; call->unmarshall++; - /* extract the message of the day padding */ - call->count = (4 - (call->count & 3)) & 3; - - case 10: - ret = afs_extract_data(call, call->buffer, - call->count, false); - if (ret < 0) - return ret; - - call->offset = 0; - call->unmarshall++; - case 11: + case 8: break; } @@ -2024,19 +1943,16 @@ static int afs_deliver_fs_get_capabilities(struct afs_call *call) u32 count; int ret; - _enter("{%u,%zu/%u}", call->unmarshall, call->offset, call->count); + _enter("{%u,%zu}", call->unmarshall, iov_iter_count(&call->iter)); -again: switch (call->unmarshall) { case 0: - call->offset = 0; + afs_extract_to_tmp(call); call->unmarshall++; /* Extract the capabilities word count */ case 1: - ret = afs_extract_data(call, &call->tmp, - 1 * sizeof(__be32), - true); + ret = afs_extract_data(call, true); if (ret < 0) return ret; @@ -2044,24 +1960,17 @@ again: call->count = count; call->count2 = count; - call->offset = 0; + iov_iter_discard(&call->iter, READ, count * sizeof(__be32)); call->unmarshall++; /* Extract capabilities words */ case 2: - count = min(call->count, 16U); - ret = afs_extract_data(call, call->buffer, - count * sizeof(__be32), - call->count > 16); + ret = afs_extract_data(call, false); if (ret < 0) return ret; /* TODO: Examine capabilities */ - call->count -= count; - if (call->count > 0) - goto again; - call->offset = 0; call->unmarshall++; break; } @@ -2215,13 +2124,13 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call) switch (call->unmarshall) { case 0: - call->offset = 0; + afs_extract_to_tmp(call); call->unmarshall++; /* Extract the file status count and array in two steps */ case 1: _debug("extract status count"); - ret = afs_extract_data(call, &call->tmp, 4, true); + ret = afs_extract_data(call, true); if (ret < 0) return ret; @@ -2234,11 +2143,11 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call) call->count = 0; call->unmarshall++; more_counts: - call->offset = 0; + afs_extract_to_buf(call, 21 * sizeof(__be32)); case 2: _debug("extract status array %u", call->count); - ret = afs_extract_data(call, call->buffer, 21 * 4, true); + ret = afs_extract_data(call, true); if (ret < 0) return ret; @@ -2256,12 +2165,12 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call) call->count = 0; call->unmarshall++; - call->offset = 0; + afs_extract_to_tmp(call); /* Extract the callback count and array in two steps */ case 3: _debug("extract CB count"); - ret = afs_extract_data(call, &call->tmp, 4, true); + ret = afs_extract_data(call, true); if (ret < 0) return ret; @@ -2273,11 +2182,11 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call) call->count = 0; call->unmarshall++; more_cbs: - call->offset = 0; + afs_extract_to_buf(call, 3 * sizeof(__be32)); case 4: _debug("extract CB array"); - ret = afs_extract_data(call, call->buffer, 3 * 4, true); + ret = afs_extract_data(call, true); if (ret < 0) return ret; @@ -2294,11 +2203,11 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call) if (call->count < call->count2) goto more_cbs; - call->offset = 0; + afs_extract_to_buf(call, 6 * sizeof(__be32)); call->unmarshall++; case 5: - ret = afs_extract_data(call, call->buffer, 6 * 4, false); + ret = afs_extract_data(call, false); if (ret < 0) return ret; @@ -2306,7 +2215,6 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call) if (call->reply[3]) xdr_decode_AFSVolSync(&bp, call->reply[3]); - call->offset = 0; call->unmarshall++; case 6: diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 73770a2a2311..36e9cc74ac11 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -98,11 +98,16 @@ struct afs_call { struct afs_cb_interest *cbi; /* Callback interest for server used */ void *request; /* request data (first part) */ struct address_space *mapping; /* Pages being written from */ + struct iov_iter iter; /* Buffer iterator */ + struct iov_iter *_iter; /* Iterator currently in use */ + union { /* Convenience for ->iter */ + struct kvec kvec[1]; + struct bio_vec bvec[1]; + }; void *buffer; /* reply receive buffer */ void *reply[4]; /* Where to put the reply */ pgoff_t first; /* first page in mapping to deal with */ pgoff_t last; /* last page in mapping to deal with */ - size_t offset; /* offset into received data store */ atomic_t usage; enum afs_call_state state; spinlock_t state_lock; @@ -127,7 +132,13 @@ struct afs_call { unsigned int debug_id; /* Trace ID */ u32 operation_ID; /* operation ID for an incoming call */ u32 count; /* count for use in unmarshalling */ - __be32 tmp; /* place to extract temporary data */ + union { /* place to extract temporary data */ + struct { + __be32 tmp_u; + __be32 tmp; + } __attribute__((packed)); + __be64 tmp64; + }; afs_dataversion_t expected_version; /* Updated version expected from store */ afs_dataversion_t expected_version_2; /* 2nd updated version expected from store */ }; @@ -185,6 +196,7 @@ struct afs_read { refcount_t usage; unsigned int index; /* Which page we're reading into */ unsigned int nr_pages; + unsigned int offset; /* offset into current page */ void (*page_done)(struct afs_call *, struct afs_read *); struct page **pages; struct page *array[]; @@ -550,6 +562,15 @@ struct afs_vnode { afs_callback_type_t cb_type; /* type of callback */ }; +static inline struct fscache_cookie *afs_vnode_cache(struct afs_vnode *vnode) +{ +#ifdef CONFIG_AFS_FSCACHE + return vnode->cache; +#else + return NULL; +#endif +} + /* * cached security record for one user's attempt to access a vnode */ @@ -930,12 +951,39 @@ extern struct afs_call *afs_alloc_flat_call(struct afs_net *, extern void afs_flat_call_destructor(struct afs_call *); extern void afs_send_empty_reply(struct afs_call *); extern void afs_send_simple_reply(struct afs_call *, const void *, size_t); -extern int afs_extract_data(struct afs_call *, void *, size_t, bool); +extern int afs_extract_data(struct afs_call *, bool); extern int afs_protocol_error(struct afs_call *, int, enum afs_eproto_cause); +static inline void afs_extract_begin(struct afs_call *call, void *buf, size_t size) +{ + call->kvec[0].iov_base = buf; + call->kvec[0].iov_len = size; + iov_iter_kvec(&call->iter, READ, call->kvec, 1, size); +} + +static inline void afs_extract_to_tmp(struct afs_call *call) +{ + afs_extract_begin(call, &call->tmp, sizeof(call->tmp)); +} + +static inline void afs_extract_to_tmp64(struct afs_call *call) +{ + afs_extract_begin(call, &call->tmp64, sizeof(call->tmp64)); +} + +static inline void afs_extract_discard(struct afs_call *call, size_t size) +{ + iov_iter_discard(&call->iter, READ, size); +} + +static inline void afs_extract_to_buf(struct afs_call *call, size_t size) +{ + afs_extract_begin(call, call->buffer, size); +} + static inline int afs_transfer_reply(struct afs_call *call) { - return afs_extract_data(call, call->buffer, call->reply_max, false); + return afs_extract_data(call, false); } static inline bool afs_check_call_state(struct afs_call *call, diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 9757c2d953f1..a3904a8315de 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -143,6 +143,7 @@ static struct afs_call *afs_alloc_call(struct afs_net *net, INIT_WORK(&call->async_work, afs_process_async_call); init_waitqueue_head(&call->waitq); spin_lock_init(&call->state_lock); + call->_iter = &call->iter; o = atomic_inc_return(&net->nr_outstanding_calls); trace_afs_call(call, afs_call_trace_alloc, 1, o, @@ -233,6 +234,7 @@ struct afs_call *afs_alloc_flat_call(struct afs_net *net, goto nomem_free; } + afs_extract_to_buf(call, call->reply_max); call->operation_ID = type->op; init_waitqueue_head(&call->waitq); return call; @@ -465,14 +467,12 @@ static void afs_deliver_to_call(struct afs_call *call) state == AFS_CALL_SV_AWAIT_ACK ) { if (state == AFS_CALL_SV_AWAIT_ACK) { - struct iov_iter iter; - - iov_iter_kvec(&iter, READ, NULL, 0, 0); + iov_iter_kvec(&call->iter, READ, NULL, 0, 0); ret = rxrpc_kernel_recv_data(call->net->socket, - call->rxcall, &iter, false, - &remote_abort, + call->rxcall, &call->iter, + false, &remote_abort, &call->service_id); - trace_afs_recv_data(call, 0, 0, false, ret); + trace_afs_receive_data(call, &call->iter, false, ret); if (ret == -EINPROGRESS || ret == -EAGAIN) return; @@ -516,7 +516,7 @@ static void afs_deliver_to_call(struct afs_call *call) if (state != AFS_CALL_CL_AWAIT_REPLY) abort_code = RXGEN_SS_UNMARSHAL; rxrpc_kernel_abort_call(call->net->socket, call->rxcall, - abort_code, -EBADMSG, "KUM"); + abort_code, ret, "KUM"); goto local_abort; } } @@ -727,6 +727,7 @@ void afs_charge_preallocation(struct work_struct *work) call->async = true; call->state = AFS_CALL_SV_AWAIT_OP_ID; init_waitqueue_head(&call->waitq); + afs_extract_to_tmp(call); } if (rxrpc_kernel_charge_accept(net->socket, @@ -772,18 +773,15 @@ static int afs_deliver_cm_op_id(struct afs_call *call) { int ret; - _enter("{%zu}", call->offset); - - ASSERTCMP(call->offset, <, 4); + _enter("{%zu}", iov_iter_count(call->_iter)); /* the operation ID forms the first four bytes of the request data */ - ret = afs_extract_data(call, &call->tmp, 4, true); + ret = afs_extract_data(call, true); if (ret < 0) return ret; call->operation_ID = ntohl(call->tmp); afs_set_call_state(call, AFS_CALL_SV_AWAIT_OP_ID, AFS_CALL_SV_AWAIT_REQUEST); - call->offset = 0; /* ask the cache manager to route the call (it'll change the call type * if successful) */ @@ -887,30 +885,19 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) /* * Extract a piece of data from the received data socket buffers. */ -int afs_extract_data(struct afs_call *call, void *buf, size_t count, - bool want_more) +int afs_extract_data(struct afs_call *call, bool want_more) { struct afs_net *net = call->net; - struct iov_iter iter; - struct kvec iov; + struct iov_iter *iter = call->_iter; enum afs_call_state state; u32 remote_abort = 0; int ret; - _enter("{%s,%zu},,%zu,%d", - call->type->name, call->offset, count, want_more); + _enter("{%s,%zu},%d", call->type->name, iov_iter_count(iter), want_more); - ASSERTCMP(call->offset, <=, count); - - iov.iov_base = buf + call->offset; - iov.iov_len = count - call->offset; - iov_iter_kvec(&iter, READ, &iov, 1, count - call->offset); - - ret = rxrpc_kernel_recv_data(net->socket, call->rxcall, &iter, + ret = rxrpc_kernel_recv_data(net->socket, call->rxcall, iter, want_more, &remote_abort, &call->service_id); - call->offset += (count - call->offset) - iov_iter_count(&iter); - trace_afs_recv_data(call, count, call->offset, want_more, ret); if (ret == 0 || ret == -EAGAIN) return ret; diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c index d0f95c4ab05e..e18c51742daa 100644 --- a/fs/afs/vlclient.c +++ b/fs/afs/vlclient.c @@ -187,19 +187,18 @@ static int afs_deliver_vl_get_addrs_u(struct afs_call *call) u32 uniquifier, nentries, count; int i, ret; - _enter("{%u,%zu/%u}", call->unmarshall, call->offset, call->count); + _enter("{%u,%zu/%u}", + call->unmarshall, iov_iter_count(call->_iter), call->count); -again: switch (call->unmarshall) { case 0: - call->offset = 0; + afs_extract_to_buf(call, + sizeof(struct afs_uuid__xdr) + 3 * sizeof(__be32)); call->unmarshall++; /* Extract the returned uuid, uniquifier, nentries and blkaddrs size */ case 1: - ret = afs_extract_data(call, call->buffer, - sizeof(struct afs_uuid__xdr) + 3 * sizeof(__be32), - true); + ret = afs_extract_data(call, true); if (ret < 0) return ret; @@ -216,28 +215,28 @@ again: call->reply[0] = alist; call->count = count; call->count2 = nentries; - call->offset = 0; call->unmarshall++; + more_entries: + count = min(call->count, 4U); + afs_extract_to_buf(call, count * sizeof(__be32)); + /* Extract entries */ case 2: - count = min(call->count, 4U); - ret = afs_extract_data(call, call->buffer, - count * sizeof(__be32), - call->count > 4); + ret = afs_extract_data(call, call->count > 4); if (ret < 0) return ret; alist = call->reply[0]; bp = call->buffer; + count = min(call->count, 4U); for (i = 0; i < count; i++) if (alist->nr_addrs < call->count2) afs_merge_fs_addr4(alist, *bp++, AFS_FS_PORT); call->count -= count; if (call->count > 0) - goto again; - call->offset = 0; + goto more_entries; call->unmarshall++; break; } @@ -318,44 +317,35 @@ static int afs_deliver_vl_get_capabilities(struct afs_call *call) u32 count; int ret; - _enter("{%u,%zu/%u}", call->unmarshall, call->offset, call->count); + _enter("{%u,%zu/%u}", + call->unmarshall, iov_iter_count(call->_iter), call->count); -again: switch (call->unmarshall) { case 0: - call->offset = 0; + afs_extract_to_tmp(call); call->unmarshall++; /* Extract the capabilities word count */ case 1: - ret = afs_extract_data(call, &call->tmp, - 1 * sizeof(__be32), - true); + ret = afs_extract_data(call, true); if (ret < 0) return ret; count = ntohl(call->tmp); - call->count = count; call->count2 = count; - call->offset = 0; + call->unmarshall++; + afs_extract_discard(call, count * sizeof(__be32)); /* Extract capabilities words */ case 2: - count = min(call->count, 16U); - ret = afs_extract_data(call, call->buffer, - count * sizeof(__be32), - call->count > 16); + ret = afs_extract_data(call, false); if (ret < 0) return ret; /* TODO: Examine capabilities */ - call->count -= count; - if (call->count > 0) - goto again; - call->offset = 0; call->unmarshall++; break; } @@ -426,22 +416,19 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call) u32 uniquifier, size; int ret; - _enter("{%u,%zu/%u,%u}", call->unmarshall, call->offset, call->count, call->count2); + _enter("{%u,%zu,%u}", + call->unmarshall, iov_iter_count(call->_iter), call->count2); -again: switch (call->unmarshall) { case 0: - call->offset = 0; + afs_extract_to_buf(call, sizeof(uuid_t) + 3 * sizeof(__be32)); call->unmarshall = 1; /* Extract the returned uuid, uniquifier, fsEndpoints count and * either the first fsEndpoint type or the volEndpoints * count if there are no fsEndpoints. */ case 1: - ret = afs_extract_data(call, call->buffer, - sizeof(uuid_t) + - 3 * sizeof(__be32), - true); + ret = afs_extract_data(call, true); if (ret < 0) return ret; @@ -459,15 +446,11 @@ again: return -ENOMEM; alist->version = uniquifier; call->reply[0] = alist; - call->offset = 0; if (call->count == 0) goto extract_volendpoints; - call->unmarshall = 2; - - /* Extract fsEndpoints[] entries */ - case 2: + next_fsendpoint: switch (call->count2) { case YFS_ENDPOINT_IPV4: size = sizeof(__be32) * (1 + 1 + 1); @@ -481,7 +464,12 @@ again: } size += sizeof(__be32); - ret = afs_extract_data(call, call->buffer, size, true); + afs_extract_to_buf(call, size); + call->unmarshall = 2; + + /* Extract fsEndpoints[] entries */ + case 2: + ret = afs_extract_data(call, true); if (ret < 0) return ret; @@ -512,10 +500,9 @@ again: */ call->count2 = ntohl(*bp++); - call->offset = 0; call->count--; if (call->count > 0) - goto again; + goto next_fsendpoint; extract_volendpoints: /* Extract the list of volEndpoints. */ @@ -526,6 +513,7 @@ again: return afs_protocol_error(call, -EBADMSG, afs_eproto_yvl_vlendpt_type); + afs_extract_to_buf(call, 1 * sizeof(__be32)); call->unmarshall = 3; /* Extract the type of volEndpoints[0]. Normally we would @@ -533,17 +521,14 @@ again: * data of the current one, but this is the first... */ case 3: - ret = afs_extract_data(call, call->buffer, sizeof(__be32), true); + ret = afs_extract_data(call, true); if (ret < 0) return ret; bp = call->buffer; - call->count2 = ntohl(*bp++); - call->offset = 0; - call->unmarshall = 4; - /* Extract volEndpoints[] entries */ - case 4: + next_volendpoint: + call->count2 = ntohl(*bp++); switch (call->count2) { case YFS_ENDPOINT_IPV4: size = sizeof(__be32) * (1 + 1 + 1); @@ -557,8 +542,13 @@ again: } if (call->count > 1) - size += sizeof(__be32); - ret = afs_extract_data(call, call->buffer, size, true); + size += sizeof(__be32); /* Get next type too */ + afs_extract_to_buf(call, size); + call->unmarshall = 4; + + /* Extract volEndpoints[] entries */ + case 4: + ret = afs_extract_data(call, true); if (ret < 0) return ret; @@ -584,19 +574,17 @@ again: /* Got either the type of the next entry or the count of * volEndpoints if no more fsEndpoints. */ - call->offset = 0; call->count--; - if (call->count > 0) { - call->count2 = ntohl(*bp++); - goto again; - } + if (call->count > 0) + goto next_volendpoint; end: + afs_extract_discard(call, 0); call->unmarshall = 5; /* Done */ case 5: - ret = afs_extract_data(call, call->buffer, 0, false); + ret = afs_extract_data(call, false); if (ret < 0) return ret; call->unmarshall = 6; diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 5c60ade2c7d8..5e0f8dcede26 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -207,17 +207,16 @@ afs_edit_dir_reasons; #define EM(a, b) { a, b }, #define E_(a, b) { a, b } -TRACE_EVENT(afs_recv_data, - TP_PROTO(struct afs_call *call, unsigned count, unsigned offset, +TRACE_EVENT(afs_receive_data, + TP_PROTO(struct afs_call *call, struct iov_iter *iter, bool want_more, int ret), - TP_ARGS(call, count, offset, want_more, ret), + TP_ARGS(call, iter, want_more, ret), TP_STRUCT__entry( + __field(loff_t, remain ) __field(unsigned int, call ) __field(enum afs_call_state, state ) - __field(unsigned int, count ) - __field(unsigned int, offset ) __field(unsigned short, unmarshall ) __field(bool, want_more ) __field(int, ret ) @@ -227,17 +226,18 @@ TRACE_EVENT(afs_recv_data, __entry->call = call->debug_id; __entry->state = call->state; __entry->unmarshall = call->unmarshall; - __entry->count = count; - __entry->offset = offset; + __entry->remain = iov_iter_count(iter); __entry->want_more = want_more; __entry->ret = ret; ), - TP_printk("c=%08x s=%u u=%u %u/%u wm=%u ret=%d", + TP_printk("c=%08x r=%llu u=%u w=%u s=%u ret=%d", __entry->call, - __entry->state, __entry->unmarshall, - __entry->offset, __entry->count, - __entry->want_more, __entry->ret) + __entry->remain, + __entry->unmarshall, + __entry->want_more, + __entry->state, + __entry->ret) ); TRACE_EVENT(afs_notify_call, From e7f680f45bd1deb4ca479c2348b395e1a4d44b17 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 20 Oct 2018 00:57:57 +0100 Subject: [PATCH 07/26] afs: Improve FS server rotation error handling Improve the error handling in FS server rotation by: (1) Cache the latest useful error value for the fs operation as a whole in struct afs_fs_cursor separately from the error cached in the afs_addr_cursor struct. The one in the address cursor gets clobbered occasionally. Copy over the error to the fs operation only when it's something we'd be interested in passing to userspace. (2) Make it so that EDESTADDRREQ is the default that is seen only if no addresses are available to be accessed. (3) When calling utility functions, such as checking a volume status or probing a fileserver, don't let a successful result clobber the cached error in the cursor; instead, stash the result in a temporary variable until it has been assessed. (4) Don't return ETIMEDOUT or ETIME if a better error, such as ENETUNREACH, is already cached. (5) On leaving the rotation loop, turn any remote abort code into a more useful error than ECONNABORTED. Fixes: d2ddc776a458 ("afs: Overhaul volume and server record caching and fileserver rotation") Signed-off-by: David Howells --- fs/afs/addr_list.c | 4 +- fs/afs/internal.h | 1 + fs/afs/rotate.c | 95 ++++++++++++++++++++++++++-------------------- 3 files changed, 55 insertions(+), 45 deletions(-) diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c index 55a756c60746..7b34fad4f8f5 100644 --- a/fs/afs/addr_list.c +++ b/fs/afs/addr_list.c @@ -318,10 +318,8 @@ bool afs_iterate_addresses(struct afs_addr_cursor *ac) if (ac->index == ac->alist->nr_addrs) ac->index = 0; - if (ac->index == ac->start) { - ac->error = -EDESTADDRREQ; + if (ac->index == ac->start) return false; - } } ac->begun = true; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 36e9cc74ac11..81936a4d5035 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -629,6 +629,7 @@ struct afs_fs_cursor { unsigned int cb_break_2; /* cb_break + cb_s_break (2nd vnode) */ unsigned char start; /* Initial index in server list */ unsigned char index; /* Number of servers tried beyond start */ + short error; unsigned short flags; #define AFS_FS_CURSOR_STOP 0x0001 /* Set to cease iteration */ #define AFS_FS_CURSOR_VBUSY 0x0002 /* Set if seen VBUSY */ diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c index 1faef56b12bd..d7cbc3c230ee 100644 --- a/fs/afs/rotate.c +++ b/fs/afs/rotate.c @@ -39,9 +39,10 @@ bool afs_begin_vnode_operation(struct afs_fs_cursor *fc, struct afs_vnode *vnode fc->vnode = vnode; fc->key = key; fc->ac.error = SHRT_MAX; + fc->error = -EDESTADDRREQ; if (mutex_lock_interruptible(&vnode->io_lock) < 0) { - fc->ac.error = -EINTR; + fc->error = -EINTR; fc->flags |= AFS_FS_CURSOR_STOP; return false; } @@ -80,7 +81,7 @@ static bool afs_start_fs_iteration(struct afs_fs_cursor *fc, * and have to return an error. */ if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) { - fc->ac.error = -ESTALE; + fc->error = -ESTALE; return false; } @@ -127,7 +128,7 @@ static bool afs_sleep_and_retry(struct afs_fs_cursor *fc) { msleep_interruptible(1000); if (signal_pending(current)) { - fc->ac.error = -ERESTARTSYS; + fc->error = -ERESTARTSYS; return false; } @@ -143,11 +144,12 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc) struct afs_addr_list *alist; struct afs_server *server; struct afs_vnode *vnode = fc->vnode; + int error = fc->ac.error; _enter("%u/%u,%u/%u,%d,%d", fc->index, fc->start, fc->ac.index, fc->ac.start, - fc->ac.error, fc->ac.abort_code); + error, fc->ac.abort_code); if (fc->flags & AFS_FS_CURSOR_STOP) { _leave(" = f [stopped]"); @@ -155,15 +157,16 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc) } /* Evaluate the result of the previous operation, if there was one. */ - switch (fc->ac.error) { + switch (error) { case SHRT_MAX: goto start; case 0: default: /* Success or local failure. Stop. */ + fc->error = error; fc->flags |= AFS_FS_CURSOR_STOP; - _leave(" = f [okay/local %d]", fc->ac.error); + _leave(" = f [okay/local %d]", error); return false; case -ECONNABORTED: @@ -178,7 +181,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc) * - May indicate that the fileserver couldn't attach to the vol. */ if (fc->flags & AFS_FS_CURSOR_VNOVOL) { - fc->ac.error = -EREMOTEIO; + fc->error = -EREMOTEIO; goto next_server; } @@ -187,12 +190,12 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc) write_unlock(&vnode->volume->servers_lock); set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags); - fc->ac.error = afs_check_volume_status(vnode->volume, fc->key); - if (fc->ac.error < 0) - goto failed; + error = afs_check_volume_status(vnode->volume, fc->key); + if (error < 0) + goto failed_set_error; if (test_bit(AFS_VOLUME_DELETED, &vnode->volume->flags)) { - fc->ac.error = -ENOMEDIUM; + fc->error = -ENOMEDIUM; goto failed; } @@ -200,7 +203,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc) * it's the fileserver having trouble. */ if (vnode->volume->servers == fc->server_list) { - fc->ac.error = -EREMOTEIO; + fc->error = -EREMOTEIO; goto next_server; } @@ -215,7 +218,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc) case VONLINE: case VDISKFULL: case VOVERQUOTA: - fc->ac.error = afs_abort_to_error(fc->ac.abort_code); + fc->error = afs_abort_to_error(fc->ac.abort_code); goto next_server; case VOFFLINE: @@ -224,11 +227,11 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc) clear_bit(AFS_VOLUME_BUSY, &vnode->volume->flags); } if (fc->flags & AFS_FS_CURSOR_NO_VSLEEP) { - fc->ac.error = -EADV; + fc->error = -EADV; goto failed; } if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) { - fc->ac.error = -ESTALE; + fc->error = -ESTALE; goto failed; } goto busy; @@ -240,7 +243,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc) * have a file lock we need to maintain. */ if (fc->flags & AFS_FS_CURSOR_NO_VSLEEP) { - fc->ac.error = -EBUSY; + fc->error = -EBUSY; goto failed; } if (!test_and_set_bit(AFS_VOLUME_BUSY, &vnode->volume->flags)) { @@ -269,16 +272,16 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc) * honour, just in case someone sets up a loop. */ if (fc->flags & AFS_FS_CURSOR_VMOVED) { - fc->ac.error = -EREMOTEIO; + fc->error = -EREMOTEIO; goto failed; } fc->flags |= AFS_FS_CURSOR_VMOVED; set_bit(AFS_VOLUME_WAIT, &vnode->volume->flags); set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags); - fc->ac.error = afs_check_volume_status(vnode->volume, fc->key); - if (fc->ac.error < 0) - goto failed; + error = afs_check_volume_status(vnode->volume, fc->key); + if (error < 0) + goto failed_set_error; /* If the server list didn't change, then the VLDB is * out of sync with the fileservers. This is hopefully @@ -290,7 +293,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc) * TODO: Retry a few times with sleeps. */ if (vnode->volume->servers == fc->server_list) { - fc->ac.error = -ENOMEDIUM; + fc->error = -ENOMEDIUM; goto failed; } @@ -299,20 +302,25 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc) default: clear_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags); clear_bit(AFS_VOLUME_BUSY, &vnode->volume->flags); - fc->ac.error = afs_abort_to_error(fc->ac.abort_code); + fc->error = afs_abort_to_error(fc->ac.abort_code); goto failed; } + case -ETIMEDOUT: + case -ETIME: + if (fc->error != -EDESTADDRREQ) + goto iterate_address; + /* Fall through */ case -ENETUNREACH: case -EHOSTUNREACH: case -ECONNREFUSED: - case -ETIMEDOUT: - case -ETIME: _debug("no conn"); + fc->error = error; goto iterate_address; case -ECONNRESET: _debug("call reset"); + fc->error = error; goto failed; } @@ -328,9 +336,9 @@ start: /* See if we need to do an update of the volume record. Note that the * volume may have moved or even have been deleted. */ - fc->ac.error = afs_check_volume_status(vnode->volume, fc->key); - if (fc->ac.error < 0) - goto failed; + error = afs_check_volume_status(vnode->volume, fc->key); + if (error < 0) + goto failed_set_error; if (!afs_start_fs_iteration(fc, vnode)) goto failed; @@ -354,10 +362,10 @@ use_server: * break request before we've finished decoding the reply and * installing the vnode. */ - fc->ac.error = afs_register_server_cb_interest(vnode, fc->server_list, - fc->index); - if (fc->ac.error < 0) - goto failed; + error = afs_register_server_cb_interest(vnode, fc->server_list, + fc->index); + if (error < 0) + goto failed_set_error; fc->cbi = afs_get_cb_interest(vnode->cb_interest); @@ -422,13 +430,14 @@ next_server: if (fc->flags & AFS_FS_CURSOR_VBUSY) goto restart_from_beginning; - fc->ac.error = -EDESTADDRREQ; goto failed; +failed_set_error: + fc->error = error; failed: fc->flags |= AFS_FS_CURSOR_STOP; afs_end_cursor(&fc->ac); - _leave(" = f [failed %d]", fc->ac.error); + _leave(" = f [failed %d]", fc->error); return false; } @@ -442,13 +451,14 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc) struct afs_vnode *vnode = fc->vnode; struct afs_cb_interest *cbi = vnode->cb_interest; struct afs_addr_list *alist; + int error = fc->ac.error; _enter(""); - switch (fc->ac.error) { + switch (error) { case SHRT_MAX: if (!cbi) { - fc->ac.error = -ESTALE; + fc->error = -ESTALE; fc->flags |= AFS_FS_CURSOR_STOP; return false; } @@ -461,7 +471,7 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc) afs_get_addrlist(alist); read_unlock(&cbi->server->fs_lock); if (!alist) { - fc->ac.error = -ESTALE; + fc->error = -ESTALE; fc->flags |= AFS_FS_CURSOR_STOP; return false; } @@ -475,11 +485,13 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc) case 0: default: /* Success or local failure. Stop. */ + fc->error = error; fc->flags |= AFS_FS_CURSOR_STOP; - _leave(" = f [okay/local %d]", fc->ac.error); + _leave(" = f [okay/local %d]", error); return false; case -ECONNABORTED: + fc->error = afs_abort_to_error(fc->ac.abort_code); fc->flags |= AFS_FS_CURSOR_STOP; _leave(" = f [abort]"); return false; @@ -490,6 +502,7 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc) case -ETIMEDOUT: case -ETIME: _debug("no conn"); + fc->error = error; goto iterate_address; } @@ -512,7 +525,6 @@ iterate_address: int afs_end_vnode_operation(struct afs_fs_cursor *fc) { struct afs_net *net = afs_v2net(fc->vnode); - int ret; mutex_unlock(&fc->vnode->io_lock); @@ -520,9 +532,8 @@ int afs_end_vnode_operation(struct afs_fs_cursor *fc) afs_put_cb_interest(net, fc->cbi); afs_put_serverlist(net, fc->server_list); - ret = fc->ac.error; - if (ret == -ECONNABORTED) - afs_abort_to_error(fc->ac.abort_code); + if (fc->error == -ECONNABORTED) + fc->error = afs_abort_to_error(fc->ac.abort_code); - return fc->ac.error; + return fc->error; } From 0a5143f2f89cc88d8a3eada8e8ccd86c1e988257 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 20 Oct 2018 00:57:57 +0100 Subject: [PATCH 08/26] afs: Implement VL server rotation Track VL servers as independent entities rather than lumping all their addresses together into one set and implement server-level rotation by: (1) Add the concept of a VL server list, where each server has its own separate address list. This code is similar to the FS server list. (2) Use the DNS resolver to retrieve a set of servers and their associated addresses, ports, preference and weight ratings. (3) In the case of a legacy DNS resolver or an address list given directly through /proc/net/afs/cells, create a list containing just a dummy server record and attach all the addresses to that. (4) Implement a simple rotation policy, for the moment ignoring the priorities and weights assigned to the servers. (5) Show the address list through /proc/net/afs//vlservers. This also displays the source and status of the data as indicated by the upcall. Signed-off-by: David Howells --- fs/afs/Makefile | 2 + fs/afs/addr_list.c | 165 +++++++++++----------- fs/afs/cell.c | 37 ++--- fs/afs/dynroot.c | 2 +- fs/afs/internal.h | 114 +++++++++++++-- fs/afs/proc.c | 90 +++++++++--- fs/afs/server.c | 42 ++---- fs/afs/vl_list.c | 336 +++++++++++++++++++++++++++++++++++++++++++++ fs/afs/vl_rotate.c | 251 +++++++++++++++++++++++++++++++++ fs/afs/vlclient.c | 32 ++--- fs/afs/volume.c | 52 +++---- 11 files changed, 905 insertions(+), 218 deletions(-) create mode 100644 fs/afs/vl_list.c create mode 100644 fs/afs/vl_rotate.c diff --git a/fs/afs/Makefile b/fs/afs/Makefile index 546874057bd3..03e9f7afea1b 100644 --- a/fs/afs/Makefile +++ b/fs/afs/Makefile @@ -29,6 +29,8 @@ kafs-y := \ super.o \ netdevices.o \ vlclient.o \ + vl_rotate.o \ + vl_list.o \ volume.o \ write.o \ xattr.o diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c index 7b34fad4f8f5..3f60b4012587 100644 --- a/fs/afs/addr_list.c +++ b/fs/afs/addr_list.c @@ -64,19 +64,25 @@ struct afs_addr_list *afs_alloc_addrlist(unsigned int nr, /* * Parse a text string consisting of delimited addresses. */ -struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len, - char delim, - unsigned short service, - unsigned short port) +struct afs_vlserver_list *afs_parse_text_addrs(struct afs_net *net, + const char *text, size_t len, + char delim, + unsigned short service, + unsigned short port) { + struct afs_vlserver_list *vllist; struct afs_addr_list *alist; const char *p, *end = text + len; + const char *problem; unsigned int nr = 0; + int ret = -ENOMEM; _enter("%*.*s,%c", (int)len, (int)len, text, delim); - if (!len) + if (!len) { + _leave(" = -EDESTADDRREQ [empty]"); return ERR_PTR(-EDESTADDRREQ); + } if (delim == ':' && (memchr(text, ',', len) || !memchr(text, '.', len))) delim = ','; @@ -84,18 +90,24 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len, /* Count the addresses */ p = text; do { - if (!*p) - return ERR_PTR(-EINVAL); + if (!*p) { + problem = "nul"; + goto inval; + } if (*p == delim) continue; nr++; if (*p == '[') { p++; - if (p == end) - return ERR_PTR(-EINVAL); + if (p == end) { + problem = "brace1"; + goto inval; + } p = memchr(p, ']', end - p); - if (!p) - return ERR_PTR(-EINVAL); + if (!p) { + problem = "brace2"; + goto inval; + } p++; if (p >= end) break; @@ -109,10 +121,19 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len, _debug("%u/%u addresses", nr, AFS_MAX_ADDRESSES); - alist = afs_alloc_addrlist(nr, service, port); - if (!alist) + vllist = afs_alloc_vlserver_list(1); + if (!vllist) return ERR_PTR(-ENOMEM); + vllist->nr_servers = 1; + vllist->servers[0].server = afs_alloc_vlserver("", 7, AFS_VL_PORT); + if (!vllist->servers[0].server) + goto error_vl; + + alist = afs_alloc_addrlist(nr, service, AFS_VL_PORT); + if (!alist) + goto error; + /* Extract the addresses */ p = text; do { @@ -135,17 +156,21 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len, break; } - if (in4_pton(p, q - p, (u8 *)&x[0], -1, &stop)) + if (in4_pton(p, q - p, (u8 *)&x[0], -1, &stop)) { family = AF_INET; - else if (in6_pton(p, q - p, (u8 *)x, -1, &stop)) + } else if (in6_pton(p, q - p, (u8 *)x, -1, &stop)) { family = AF_INET6; - else - goto bad_address; - - if (stop != q) + } else { + problem = "family"; goto bad_address; + } p = q; + if (stop != p) { + problem = "nostop"; + goto bad_address; + } + if (q < end && *q == ']') p++; @@ -154,18 +179,23 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len, /* Port number specification "+1234" */ xport = 0; p++; - if (p >= end || !isdigit(*p)) + if (p >= end || !isdigit(*p)) { + problem = "port"; goto bad_address; + } do { xport *= 10; xport += *p - '0'; - if (xport > 65535) + if (xport > 65535) { + problem = "pval"; goto bad_address; + } p++; } while (p < end && isdigit(*p)); } else if (*p == delim) { p++; } else { + problem = "weird"; goto bad_address; } } @@ -177,12 +207,23 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len, } while (p < end); + rcu_assign_pointer(vllist->servers[0].server->addresses, alist); _leave(" = [nr %u]", alist->nr_addrs); - return alist; + return vllist; -bad_address: - kfree(alist); +inval: + _leave(" = -EINVAL [%s %zu %*.*s]", + problem, p - text, (int)len, (int)len, text); return ERR_PTR(-EINVAL); +bad_address: + _leave(" = -EINVAL [%s %zu %*.*s]", + problem, p - text, (int)len, (int)len, text); + ret = -EINVAL; +error: + afs_put_addrlist(alist); +error_vl: + afs_put_vlserverlist(net, vllist); + return ERR_PTR(ret); } /* @@ -201,30 +242,34 @@ static int afs_cmp_addr_list(const struct afs_addr_list *a1, /* * Perform a DNS query for VL servers and build a up an address list. */ -struct afs_addr_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry) +struct afs_vlserver_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry) { - struct afs_addr_list *alist; - char *vllist = NULL; + struct afs_vlserver_list *vllist; + char *result = NULL; int ret; _enter("%s", cell->name); - ret = dns_query("afsdb", cell->name, cell->name_len, - "", &vllist, _expiry); - if (ret < 0) + ret = dns_query("afsdb", cell->name, cell->name_len, "srv=1", + &result, _expiry); + if (ret < 0) { + _leave(" = %d [dns]", ret); return ERR_PTR(ret); - - alist = afs_parse_text_addrs(vllist, strlen(vllist), ',', - VL_SERVICE, AFS_VL_PORT); - if (IS_ERR(alist)) { - kfree(vllist); - if (alist != ERR_PTR(-ENOMEM)) - pr_err("Failed to parse DNS data\n"); - return alist; } - kfree(vllist); - return alist; + if (*_expiry == 0) + *_expiry = ktime_get_real_seconds() + 60; + + if (ret > 1 && result[0] == 0) + vllist = afs_extract_vlserver_list(cell, result, ret); + else + vllist = afs_parse_text_addrs(cell->net, result, ret, ',', + VL_SERVICE, AFS_VL_PORT); + kfree(result); + if (IS_ERR(vllist) && vllist != ERR_PTR(-ENOMEM)) + pr_err("Failed to parse DNS data %ld\n", PTR_ERR(vllist)); + + return vllist; } /* @@ -347,43 +392,3 @@ int afs_end_cursor(struct afs_addr_cursor *ac) ac->begun = false; return ac->error; } - -/* - * Set the address cursor for iterating over VL servers. - */ -int afs_set_vl_cursor(struct afs_addr_cursor *ac, struct afs_cell *cell) -{ - struct afs_addr_list *alist; - int ret; - - if (!rcu_access_pointer(cell->vl_addrs)) { - ret = wait_on_bit(&cell->flags, AFS_CELL_FL_NO_LOOKUP_YET, - TASK_INTERRUPTIBLE); - if (ret < 0) - return ret; - - if (!rcu_access_pointer(cell->vl_addrs) && - ktime_get_real_seconds() < cell->dns_expiry) - return cell->error; - } - - read_lock(&cell->vl_addrs_lock); - alist = rcu_dereference_protected(cell->vl_addrs, - lockdep_is_held(&cell->vl_addrs_lock)); - if (alist->nr_addrs > 0) - afs_get_addrlist(alist); - else - alist = NULL; - read_unlock(&cell->vl_addrs_lock); - - if (!alist) - return -EDESTADDRREQ; - - ac->alist = alist; - ac->addr = NULL; - ac->start = READ_ONCE(alist->index); - ac->index = ac->start; - ac->error = 0; - ac->begun = false; - return 0; -} diff --git a/fs/afs/cell.c b/fs/afs/cell.c index 6127f0fcd62c..963b6fa51fdf 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -119,7 +119,7 @@ struct afs_cell *afs_lookup_cell_rcu(struct afs_net *net, */ static struct afs_cell *afs_alloc_cell(struct afs_net *net, const char *name, unsigned int namelen, - const char *vllist) + const char *addresses) { struct afs_cell *cell; int i, ret; @@ -134,7 +134,7 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net, if (namelen == 5 && memcmp(name, "@cell", 5) == 0) return ERR_PTR(-EINVAL); - _enter("%*.*s,%s", namelen, namelen, name, vllist); + _enter("%*.*s,%s", namelen, namelen, name, addresses); cell = kzalloc(sizeof(struct afs_cell), GFP_KERNEL); if (!cell) { @@ -153,22 +153,23 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net, (1 << AFS_CELL_FL_NO_LOOKUP_YET)); INIT_LIST_HEAD(&cell->proc_volumes); rwlock_init(&cell->proc_lock); - rwlock_init(&cell->vl_addrs_lock); + rwlock_init(&cell->vl_servers_lock); /* Fill in the VL server list if we were given a list of addresses to * use. */ - if (vllist) { - struct afs_addr_list *alist; + if (addresses) { + struct afs_vlserver_list *vllist; - alist = afs_parse_text_addrs(vllist, strlen(vllist), ':', - VL_SERVICE, AFS_VL_PORT); - if (IS_ERR(alist)) { - ret = PTR_ERR(alist); + vllist = afs_parse_text_addrs(net, + addresses, strlen(addresses), ':', + VL_SERVICE, AFS_VL_PORT); + if (IS_ERR(vllist)) { + ret = PTR_ERR(vllist); goto parse_failed; } - rcu_assign_pointer(cell->vl_addrs, alist); + rcu_assign_pointer(cell->vl_servers, vllist); cell->dns_expiry = TIME64_MAX; } @@ -356,14 +357,14 @@ int afs_cell_init(struct afs_net *net, const char *rootcell) */ static void afs_update_cell(struct afs_cell *cell) { - struct afs_addr_list *alist, *old; + struct afs_vlserver_list *vllist, *old; time64_t now, expiry; _enter("%s", cell->name); - alist = afs_dns_query(cell, &expiry); - if (IS_ERR(alist)) { - switch (PTR_ERR(alist)) { + vllist = afs_dns_query(cell, &expiry); + if (IS_ERR(vllist)) { + switch (PTR_ERR(vllist)) { case -ENODATA: /* The DNS said that the cell does not exist */ set_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags); @@ -387,12 +388,12 @@ static void afs_update_cell(struct afs_cell *cell) /* Exclusion on changing vl_addrs is achieved by a * non-reentrant work item. */ - old = rcu_dereference_protected(cell->vl_addrs, true); - rcu_assign_pointer(cell->vl_addrs, alist); + old = rcu_dereference_protected(cell->vl_servers, true); + rcu_assign_pointer(cell->vl_servers, vllist); cell->dns_expiry = expiry; if (old) - afs_put_addrlist(old); + afs_put_vlserverlist(cell->net, old); } if (test_and_clear_bit(AFS_CELL_FL_NO_LOOKUP_YET, &cell->flags)) @@ -414,7 +415,7 @@ static void afs_cell_destroy(struct rcu_head *rcu) ASSERTCMP(atomic_read(&cell->usage), ==, 0); - afs_put_addrlist(rcu_access_pointer(cell->vl_addrs)); + afs_put_vlserverlist(cell->net, rcu_access_pointer(cell->vl_servers)); key_put(cell->anonymous_key); kfree(cell); diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c index f29c6dade7f6..0efed0a63080 100644 --- a/fs/afs/dynroot.c +++ b/fs/afs/dynroot.c @@ -46,7 +46,7 @@ static int afs_probe_cell_name(struct dentry *dentry) return 0; } - ret = dns_query("afsdb", name, len, "", NULL, NULL); + ret = dns_query("afsdb", name, len, "srv=1", NULL, NULL); if (ret == -ENODATA) ret = -EDESTADDRREQ; return ret; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 81936a4d5035..7e264cb9b4f7 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -77,6 +78,8 @@ struct afs_addr_list { unsigned char nr_addrs; unsigned char index; /* Address currently in use */ unsigned char nr_ipv4; /* Number of IPv4 addresses */ + enum dns_record_source source:8; + enum dns_lookup_status status:8; unsigned long probed; /* Mask of servers that have been probed */ unsigned long yfs; /* Mask of servers that are YFS */ struct sockaddr_rxrpc addrs[]; @@ -355,12 +358,51 @@ struct afs_cell { rwlock_t proc_lock; /* VL server list. */ - rwlock_t vl_addrs_lock; /* Lock on vl_addrs */ - struct afs_addr_list __rcu *vl_addrs; /* List of VL servers */ + rwlock_t vl_servers_lock; /* Lock on vl_servers */ + struct afs_vlserver_list __rcu *vl_servers; + u8 name_len; /* Length of name */ char name[64 + 1]; /* Cell name, case-flattened and NUL-padded */ }; +/* + * Volume Location server record. + */ +struct afs_vlserver { + struct rcu_head rcu; + struct afs_addr_list __rcu *addresses; /* List of addresses for this VL server */ + unsigned long flags; +#define AFS_VLSERVER_FL_PROBED 0 /* The VL server has been probed */ +#define AFS_VLSERVER_FL_PROBING 1 /* VL server is being probed */ + rwlock_t lock; /* Lock on addresses */ + atomic_t usage; + u16 name_len; /* Length of name */ + u16 port; + char name[]; /* Server name, case-flattened */ +}; + +/* + * Weighted list of Volume Location servers. + */ +struct afs_vlserver_entry { + u16 priority; /* Preference (as SRV) */ + u16 weight; /* Weight (as SRV) */ + enum dns_record_source source:8; + enum dns_lookup_status status:8; + struct afs_vlserver *server; +}; + +struct afs_vlserver_list { + struct rcu_head rcu; + atomic_t usage; + u8 nr_servers; + u8 index; /* Server currently in use */ + enum dns_record_source source:8; + enum dns_lookup_status status:8; + rwlock_t lock; + struct afs_vlserver_entry servers[]; +}; + /* * Cached VLDB entry. * @@ -616,6 +658,23 @@ struct afs_addr_cursor { bool responded; /* T if the current address responded */ }; +/* + * Cursor for iterating over a set of volume location servers. + */ +struct afs_vl_cursor { + struct afs_addr_cursor ac; + struct afs_cell *cell; /* The cell we're querying */ + struct afs_vlserver_list *server_list; /* Current server list (pins ref) */ + struct key *key; /* Key for the server */ + unsigned char start; /* Initial index in server list */ + unsigned char index; /* Number of servers tried beyond start */ + short error; + unsigned short flags; +#define AFS_VL_CURSOR_STOP 0x0001 /* Set to cease iteration */ +#define AFS_VL_CURSOR_RETRY 0x0002 /* Set to do a retry */ +#define AFS_VL_CURSOR_RETRIED 0x0004 /* Set if started a retry */ +}; + /* * Cursor for iterating over a set of fileservers. */ @@ -662,12 +721,12 @@ extern struct afs_addr_list *afs_alloc_addrlist(unsigned int, unsigned short, unsigned short); extern void afs_put_addrlist(struct afs_addr_list *); -extern struct afs_addr_list *afs_parse_text_addrs(const char *, size_t, char, - unsigned short, unsigned short); -extern struct afs_addr_list *afs_dns_query(struct afs_cell *, time64_t *); +extern struct afs_vlserver_list *afs_parse_text_addrs(struct afs_net *, + const char *, size_t, char, + unsigned short, unsigned short); +extern struct afs_vlserver_list *afs_dns_query(struct afs_cell *, time64_t *); extern bool afs_iterate_addresses(struct afs_addr_cursor *); extern int afs_end_cursor(struct afs_addr_cursor *); -extern int afs_set_vl_cursor(struct afs_addr_cursor *, struct afs_cell *); extern void afs_merge_fs_addr4(struct afs_addr_list *, __be32, u16); extern void afs_merge_fs_addr6(struct afs_addr_list *, __be32 *, u16); @@ -1088,14 +1147,43 @@ extern void afs_fs_exit(void); /* * vlclient.c */ -extern struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *, - struct afs_addr_cursor *, - struct key *, const char *, int); -extern struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *, struct afs_addr_cursor *, - struct key *, const uuid_t *); +extern struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_vl_cursor *, + const char *, int); +extern struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *, const uuid_t *); extern int afs_vl_get_capabilities(struct afs_net *, struct afs_addr_cursor *, struct key *); -extern struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *, struct afs_addr_cursor *, - struct key *, const uuid_t *); +extern struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *, const uuid_t *); + +/* + * vl_rotate.c + */ +extern bool afs_begin_vlserver_operation(struct afs_vl_cursor *, + struct afs_cell *, struct key *); +extern bool afs_select_vlserver(struct afs_vl_cursor *); +extern bool afs_select_current_vlserver(struct afs_vl_cursor *); +extern int afs_end_vlserver_operation(struct afs_vl_cursor *); + +/* + * vlserver_list.c + */ +static inline struct afs_vlserver *afs_get_vlserver(struct afs_vlserver *vlserver) +{ + atomic_inc(&vlserver->usage); + return vlserver; +} + +static inline struct afs_vlserver_list *afs_get_vlserverlist(struct afs_vlserver_list *vllist) +{ + if (vllist) + atomic_inc(&vllist->usage); + return vllist; +} + +extern struct afs_vlserver *afs_alloc_vlserver(const char *, size_t, unsigned short); +extern void afs_put_vlserver(struct afs_net *, struct afs_vlserver *); +extern struct afs_vlserver_list *afs_alloc_vlserver_list(unsigned int); +extern void afs_put_vlserverlist(struct afs_net *, struct afs_vlserver_list *); +extern struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *, + const void *, size_t); /* * volume.c diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 9101f62707af..6585f4bec0d3 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -17,6 +17,11 @@ #include #include "internal.h" +struct afs_vl_seq_net_private { + struct seq_net_private seq; /* Must be first */ + struct afs_vlserver_list *vllist; +}; + static inline struct afs_net *afs_seq2net(struct seq_file *m) { return afs_net(seq_file_net(m)); @@ -247,61 +252,102 @@ static const struct seq_operations afs_proc_cell_volumes_ops = { .show = afs_proc_cell_volumes_show, }; +static const char *const dns_record_sources[NR__dns_record_source + 1] = { + [DNS_RECORD_UNAVAILABLE] = "unav", + [DNS_RECORD_FROM_CONFIG] = "cfg", + [DNS_RECORD_FROM_DNS_A] = "A", + [DNS_RECORD_FROM_DNS_AFSDB] = "AFSDB", + [DNS_RECORD_FROM_DNS_SRV] = "SRV", + [DNS_RECORD_FROM_NSS] = "nss", + [NR__dns_record_source] = "[weird]" +}; + +static const char *const dns_lookup_statuses[NR__dns_lookup_status + 1] = { + [DNS_LOOKUP_NOT_DONE] = "no-lookup", + [DNS_LOOKUP_GOOD] = "good", + [DNS_LOOKUP_GOOD_WITH_BAD] = "good/bad", + [DNS_LOOKUP_BAD] = "bad", + [DNS_LOOKUP_GOT_NOT_FOUND] = "not-found", + [DNS_LOOKUP_GOT_LOCAL_FAILURE] = "local-failure", + [DNS_LOOKUP_GOT_TEMP_FAILURE] = "temp-failure", + [DNS_LOOKUP_GOT_NS_FAILURE] = "ns-failure", + [NR__dns_lookup_status] = "[weird]" +}; + /* * Display the list of Volume Location servers we're using for a cell. */ static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v) { - struct sockaddr_rxrpc *addr = v; + const struct afs_vl_seq_net_private *priv = m->private; + const struct afs_vlserver_list *vllist = priv->vllist; + const struct afs_vlserver_entry *entry; + const struct afs_vlserver *vlserver; + const struct afs_addr_list *alist; + int i; - /* display header on line 1 */ - if (v == (void *)1) { - seq_puts(m, "ADDRESS\n"); + if (v == SEQ_START_TOKEN) { + seq_printf(m, "# source %s, status %s\n", + dns_record_sources[vllist->source], + dns_lookup_statuses[vllist->status]); return 0; } - /* display one cell per line on subsequent lines */ - seq_printf(m, "%pISp\n", &addr->transport); + entry = v; + vlserver = entry->server; + alist = rcu_dereference(vlserver->addresses); + + seq_printf(m, "%s [p=%hu w=%hu s=%s,%s]:\n", + vlserver->name, entry->priority, entry->weight, + dns_record_sources[alist ? alist->source : entry->source], + dns_lookup_statuses[alist ? alist->status : entry->status]); + if (alist) { + for (i = 0; i < alist->nr_addrs; i++) + seq_printf(m, " %c %pISpc\n", + alist->index == i ? '>' : '-', + &alist->addrs[i].transport); + } return 0; } static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos) __acquires(rcu) { - struct afs_addr_list *alist; + struct afs_vl_seq_net_private *priv = m->private; + struct afs_vlserver_list *vllist; struct afs_cell *cell = PDE_DATA(file_inode(m->file)); loff_t pos = *_pos; rcu_read_lock(); - alist = rcu_dereference(cell->vl_addrs); + vllist = rcu_dereference(cell->vl_servers); + priv->vllist = vllist; - /* allow for the header line */ - if (!pos) - return (void *) 1; - pos--; + if (pos < 0) + *_pos = pos = 0; + if (pos == 0) + return SEQ_START_TOKEN; - if (!alist || pos >= alist->nr_addrs) + if (!vllist || pos - 1 >= vllist->nr_servers) return NULL; - return alist->addrs + pos; + return &vllist->servers[pos - 1]; } static void *afs_proc_cell_vlservers_next(struct seq_file *m, void *v, loff_t *_pos) { - struct afs_addr_list *alist; - struct afs_cell *cell = PDE_DATA(file_inode(m->file)); + struct afs_vl_seq_net_private *priv = m->private; + struct afs_vlserver_list *vllist = priv->vllist; loff_t pos; - alist = rcu_dereference(cell->vl_addrs); - pos = *_pos; - (*_pos)++; - if (!alist || pos >= alist->nr_addrs) + pos++; + *_pos = pos; + if (!vllist || pos - 1 >= vllist->nr_servers) return NULL; - return alist->addrs + pos; + return &vllist->servers[pos - 1]; } static void afs_proc_cell_vlservers_stop(struct seq_file *m, void *v) @@ -562,7 +608,7 @@ int afs_proc_cell_setup(struct afs_cell *cell) if (!proc_create_net_data("vlservers", 0444, dir, &afs_proc_cell_vlservers_ops, - sizeof(struct seq_net_private), + sizeof(struct afs_vl_seq_net_private), cell) || !proc_create_net_data("volumes", 0444, dir, &afs_proc_cell_volumes_ops, diff --git a/fs/afs/server.c b/fs/afs/server.c index 1d329e6981d5..6102ea9ee3fb 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -246,41 +246,23 @@ enomem: static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell, struct key *key, const uuid_t *uuid) { - struct afs_addr_cursor ac; - struct afs_addr_list *alist; + struct afs_vl_cursor vc; + struct afs_addr_list *alist = NULL; int ret; - ret = afs_set_vl_cursor(&ac, cell); - if (ret < 0) - return ERR_PTR(ret); - - while (afs_iterate_addresses(&ac)) { - if (test_bit(ac.index, &ac.alist->yfs)) - alist = afs_yfsvl_get_endpoints(cell->net, &ac, key, uuid); - else - alist = afs_vl_get_addrs_u(cell->net, &ac, key, uuid); - switch (ac.error) { - case 0: - afs_end_cursor(&ac); - return alist; - case -ECONNABORTED: - ac.error = afs_abort_to_error(ac.abort_code); - goto error; - case -ENOMEM: - case -ENONET: - goto error; - case -ENETUNREACH: - case -EHOSTUNREACH: - case -ECONNREFUSED: - break; - default: - ac.error = -EIO; - goto error; + ret = -ERESTARTSYS; + if (afs_begin_vlserver_operation(&vc, cell, key)) { + while (afs_select_vlserver(&vc)) { + if (test_bit(vc.ac.index, &vc.ac.alist->yfs)) + alist = afs_yfsvl_get_endpoints(&vc, uuid); + else + alist = afs_vl_get_addrs_u(&vc, uuid); } + + ret = afs_end_vlserver_operation(&vc); } -error: - return ERR_PTR(afs_end_cursor(&ac)); + return ret < 0 ? ERR_PTR(ret) : alist; } /* diff --git a/fs/afs/vl_list.c b/fs/afs/vl_list.c new file mode 100644 index 000000000000..c1e316ba105a --- /dev/null +++ b/fs/afs/vl_list.c @@ -0,0 +1,336 @@ +/* AFS vlserver list management. + * + * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include "internal.h" + +struct afs_vlserver *afs_alloc_vlserver(const char *name, size_t name_len, + unsigned short port) +{ + struct afs_vlserver *vlserver; + + vlserver = kzalloc(struct_size(vlserver, name, name_len + 1), + GFP_KERNEL); + if (vlserver) { + atomic_set(&vlserver->usage, 1); + rwlock_init(&vlserver->lock); + vlserver->name_len = name_len; + vlserver->port = port; + memcpy(vlserver->name, name, name_len); + } + return vlserver; +} + +static void afs_vlserver_rcu(struct rcu_head *rcu) +{ + struct afs_vlserver *vlserver = container_of(rcu, struct afs_vlserver, rcu); + + afs_put_addrlist(rcu_access_pointer(vlserver->addresses)); + kfree_rcu(vlserver, rcu); +} + +void afs_put_vlserver(struct afs_net *net, struct afs_vlserver *vlserver) +{ + if (vlserver) { + unsigned int u = atomic_dec_return(&vlserver->usage); + //_debug("VL PUT %p{%u}", vlserver, u); + + if (u == 0) + call_rcu(&vlserver->rcu, afs_vlserver_rcu); + } +} + +struct afs_vlserver_list *afs_alloc_vlserver_list(unsigned int nr_servers) +{ + struct afs_vlserver_list *vllist; + + vllist = kzalloc(struct_size(vllist, servers, nr_servers), GFP_KERNEL); + if (vllist) { + atomic_set(&vllist->usage, 1); + rwlock_init(&vllist->lock); + } + + return vllist; +} + +void afs_put_vlserverlist(struct afs_net *net, struct afs_vlserver_list *vllist) +{ + if (vllist) { + unsigned int u = atomic_dec_return(&vllist->usage); + + //_debug("VLLS PUT %p{%u}", vllist, u); + if (u == 0) { + int i; + + for (i = 0; i < vllist->nr_servers; i++) { + afs_put_vlserver(net, vllist->servers[i].server); + } + kfree_rcu(vllist, rcu); + } + } +} + +static u16 afs_extract_le16(const u8 **_b) +{ + u16 val; + + val = (u16)*(*_b)++ << 0; + val |= (u16)*(*_b)++ << 8; + return val; +} + +/* + * Build a VL server address list from a DNS queried server list. + */ +static struct afs_addr_list *afs_extract_vl_addrs(const u8 **_b, const u8 *end, + u8 nr_addrs, u16 port) +{ + struct afs_addr_list *alist; + const u8 *b = *_b; + int ret = -EINVAL; + + alist = afs_alloc_addrlist(nr_addrs, VL_SERVICE, port); + if (!alist) + return ERR_PTR(-ENOMEM); + if (nr_addrs == 0) + return alist; + + for (; nr_addrs > 0 && end - b >= nr_addrs; nr_addrs--) { + struct dns_server_list_v1_address hdr; + __be32 x[4]; + + hdr.address_type = *b++; + + switch (hdr.address_type) { + case DNS_ADDRESS_IS_IPV4: + if (end - b < 4) { + _leave(" = -EINVAL [short inet]"); + goto error; + } + memcpy(x, b, 4); + afs_merge_fs_addr4(alist, x[0], port); + b += 4; + break; + + case DNS_ADDRESS_IS_IPV6: + if (end - b < 16) { + _leave(" = -EINVAL [short inet6]"); + goto error; + } + memcpy(x, b, 16); + afs_merge_fs_addr6(alist, x, port); + b += 16; + break; + + default: + _leave(" = -EADDRNOTAVAIL [unknown af %u]", + hdr.address_type); + ret = -EADDRNOTAVAIL; + goto error; + } + } + + /* Start with IPv6 if available. */ + if (alist->nr_ipv4 < alist->nr_addrs) + alist->index = alist->nr_ipv4; + + *_b = b; + return alist; + +error: + *_b = b; + afs_put_addrlist(alist); + return ERR_PTR(ret); +} + +/* + * Build a VL server list from a DNS queried server list. + */ +struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *cell, + const void *buffer, + size_t buffer_size) +{ + const struct dns_server_list_v1_header *hdr = buffer; + struct dns_server_list_v1_server bs; + struct afs_vlserver_list *vllist, *previous; + struct afs_addr_list *addrs; + struct afs_vlserver *server; + const u8 *b = buffer, *end = buffer + buffer_size; + int ret = -ENOMEM, nr_servers, i, j; + + _enter(""); + + /* Check that it's a server list, v1 */ + if (end - b < sizeof(*hdr) || + hdr->hdr.content != DNS_PAYLOAD_IS_SERVER_LIST || + hdr->hdr.version != 1) { + pr_notice("kAFS: Got DNS record [%u,%u] len %zu\n", + hdr->hdr.content, hdr->hdr.version, end - b); + ret = -EDESTADDRREQ; + goto dump; + } + + nr_servers = hdr->nr_servers; + + vllist = afs_alloc_vlserver_list(nr_servers); + if (!vllist) + return ERR_PTR(-ENOMEM); + + vllist->source = (hdr->source < NR__dns_record_source) ? + hdr->source : NR__dns_record_source; + vllist->status = (hdr->status < NR__dns_lookup_status) ? + hdr->status : NR__dns_lookup_status; + + read_lock(&cell->vl_servers_lock); + previous = afs_get_vlserverlist( + rcu_dereference_protected(cell->vl_servers, + lockdep_is_held(&cell->vl_servers_lock))); + read_unlock(&cell->vl_servers_lock); + + b += sizeof(*hdr); + while (end - b >= sizeof(bs)) { + bs.name_len = afs_extract_le16(&b); + bs.priority = afs_extract_le16(&b); + bs.weight = afs_extract_le16(&b); + bs.port = afs_extract_le16(&b); + bs.source = *b++; + bs.status = *b++; + bs.protocol = *b++; + bs.nr_addrs = *b++; + + _debug("extract %u %u %u %u %u %u %*.*s", + bs.name_len, bs.priority, bs.weight, + bs.port, bs.protocol, bs.nr_addrs, + bs.name_len, bs.name_len, b); + + if (end - b < bs.name_len) + break; + + ret = -EPROTONOSUPPORT; + if (bs.protocol == DNS_SERVER_PROTOCOL_UNSPECIFIED) { + bs.protocol = DNS_SERVER_PROTOCOL_UDP; + } else if (bs.protocol != DNS_SERVER_PROTOCOL_UDP) { + _leave(" = [proto %u]", bs.protocol); + goto error; + } + + if (bs.port == 0) + bs.port = AFS_VL_PORT; + if (bs.source > NR__dns_record_source) + bs.source = NR__dns_record_source; + if (bs.status > NR__dns_lookup_status) + bs.status = NR__dns_lookup_status; + + server = NULL; + if (previous) { + /* See if we can update an old server record */ + for (i = 0; i < previous->nr_servers; i++) { + struct afs_vlserver *p = previous->servers[i].server; + + if (p->name_len == bs.name_len && + p->port == bs.port && + strncasecmp(b, p->name, bs.name_len) == 0) { + server = afs_get_vlserver(p); + break; + } + } + } + + if (!server) { + ret = -ENOMEM; + server = afs_alloc_vlserver(b, bs.name_len, bs.port); + if (!server) + goto error; + } + + b += bs.name_len; + + /* Extract the addresses - note that we can't skip this as we + * have to advance the payload pointer. + */ + addrs = afs_extract_vl_addrs(&b, end, bs.nr_addrs, bs.port); + if (IS_ERR(addrs)) { + ret = PTR_ERR(addrs); + goto error_2; + } + + if (vllist->nr_servers >= nr_servers) { + _debug("skip %u >= %u", vllist->nr_servers, nr_servers); + afs_put_addrlist(addrs); + afs_put_vlserver(cell->net, server); + continue; + } + + addrs->source = bs.source; + addrs->status = bs.status; + + if (addrs->nr_addrs == 0) { + afs_put_addrlist(addrs); + if (!rcu_access_pointer(server->addresses)) { + afs_put_vlserver(cell->net, server); + continue; + } + } else { + struct afs_addr_list *old = addrs; + + write_lock(&server->lock); + rcu_swap_protected(server->addresses, old, + lockdep_is_held(&server->lock)); + write_unlock(&server->lock); + afs_put_addrlist(old); + } + + + /* TODO: Might want to check for duplicates */ + + /* Insertion-sort by priority and weight */ + for (j = 0; j < vllist->nr_servers; j++) { + if (bs.priority < vllist->servers[j].priority) + break; /* Lower preferable */ + if (bs.priority == vllist->servers[j].priority && + bs.weight > vllist->servers[j].weight) + break; /* Higher preferable */ + } + + if (j < vllist->nr_servers) { + memmove(vllist->servers + j + 1, + vllist->servers + j, + (vllist->nr_servers - j) * sizeof(struct afs_vlserver_entry)); + } + + vllist->servers[j].priority = bs.priority; + vllist->servers[j].weight = bs.weight; + vllist->servers[j].server = server; + vllist->nr_servers++; + } + + if (b != end) { + _debug("parse error %zd", b - end); + goto error; + } + + afs_put_vlserverlist(cell->net, previous); + _leave(" = ok [%u]", vllist->nr_servers); + return vllist; + +error_2: + afs_put_vlserver(cell->net, server); +error: + afs_put_vlserverlist(cell->net, vllist); + afs_put_vlserverlist(cell->net, previous); +dump: + if (ret != -ENOMEM) { + printk(KERN_DEBUG "DNS: at %zu\n", (const void *)b - buffer); + print_hex_dump_bytes("DNS: ", DUMP_PREFIX_NONE, buffer, buffer_size); + } + return ERR_PTR(ret); +} diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c new file mode 100644 index 000000000000..44a936ad9c7a --- /dev/null +++ b/fs/afs/vl_rotate.c @@ -0,0 +1,251 @@ +/* Handle vlserver selection and rotation. + * + * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include +#include +#include +#include "internal.h" +#include "afs_vl.h" + +/* + * Begin an operation on a volume location server. + */ +bool afs_begin_vlserver_operation(struct afs_vl_cursor *vc, struct afs_cell *cell, + struct key *key) +{ + memset(vc, 0, sizeof(*vc)); + vc->cell = cell; + vc->key = key; + vc->error = -EDESTADDRREQ; + vc->ac.error = SHRT_MAX; + + if (signal_pending(current)) { + vc->error = -EINTR; + vc->flags |= AFS_VL_CURSOR_STOP; + return false; + } + + return true; +} + +/* + * Begin iteration through a server list, starting with the last used server if + * possible, or the last recorded good server if not. + */ +static bool afs_start_vl_iteration(struct afs_vl_cursor *vc) +{ + struct afs_cell *cell = vc->cell; + + if (wait_on_bit(&cell->flags, AFS_CELL_FL_NO_LOOKUP_YET, + TASK_INTERRUPTIBLE)) { + vc->error = -ERESTARTSYS; + return false; + } + + read_lock(&cell->vl_servers_lock); + vc->server_list = afs_get_vlserverlist( + rcu_dereference_protected(cell->vl_servers, + lockdep_is_held(&cell->vl_servers_lock))); + read_unlock(&cell->vl_servers_lock); + if (!vc->server_list || !vc->server_list->nr_servers) + return false; + + vc->start = READ_ONCE(vc->server_list->index); + vc->index = vc->start; + return true; +} + +/* + * Select the vlserver to use. May be called multiple times to rotate + * through the vlservers. + */ +bool afs_select_vlserver(struct afs_vl_cursor *vc) +{ + struct afs_addr_list *alist; + struct afs_vlserver *vlserver; + int error = vc->ac.error; + + _enter("%u/%u,%u/%u,%d,%d", + vc->index, vc->start, + vc->ac.index, vc->ac.start, + error, vc->ac.abort_code); + + if (vc->flags & AFS_VL_CURSOR_STOP) { + _leave(" = f [stopped]"); + return false; + } + + /* Evaluate the result of the previous operation, if there was one. */ + switch (error) { + case SHRT_MAX: + goto start; + + default: + case 0: + /* Success or local failure. Stop. */ + vc->error = error; + vc->flags |= AFS_VL_CURSOR_STOP; + _leave(" = f [okay/local %d]", vc->ac.error); + return false; + + case -ECONNABORTED: + /* The far side rejected the operation on some grounds. This + * might involve the server being busy or the volume having been moved. + */ + switch (vc->ac.abort_code) { + case AFSVL_IO: + case AFSVL_BADVOLOPER: + case AFSVL_NOMEM: + /* The server went weird. */ + vc->error = -EREMOTEIO; + //write_lock(&vc->cell->vl_servers_lock); + //vc->server_list->weird_mask |= 1 << vc->index; + //write_unlock(&vc->cell->vl_servers_lock); + goto next_server; + + default: + vc->error = afs_abort_to_error(vc->ac.abort_code); + goto failed; + } + + case -ENETUNREACH: + case -EHOSTUNREACH: + case -ECONNREFUSED: + case -ETIMEDOUT: + case -ETIME: + _debug("no conn %d", error); + vc->error = error; + goto iterate_address; + + case -ECONNRESET: + _debug("call reset"); + vc->error = error; + vc->flags |= AFS_VL_CURSOR_RETRY; + goto next_server; + } + +restart_from_beginning: + _debug("restart"); + afs_end_cursor(&vc->ac); + afs_put_vlserverlist(vc->cell->net, vc->server_list); + vc->server_list = NULL; + if (vc->flags & AFS_VL_CURSOR_RETRIED) + goto failed; + vc->flags |= AFS_VL_CURSOR_RETRIED; +start: + _debug("start"); + + /* TODO: Consider checking the VL server list */ + + if (!afs_start_vl_iteration(vc)) + goto failed; + +use_server: + _debug("use"); + /* We're starting on a different vlserver from the list. We need to + * check it, find its address list and probe its capabilities before we + * use it. + */ + ASSERTCMP(vc->ac.alist, ==, NULL); + vlserver = vc->server_list->servers[vc->index].server; + + // TODO: Check the vlserver occasionally + //if (!afs_check_vlserver_record(vc, vlserver)) + // goto failed; + + _debug("USING VLSERVER: %s", vlserver->name); + + read_lock(&vlserver->lock); + alist = rcu_dereference_protected(vlserver->addresses, + lockdep_is_held(&vlserver->lock)); + afs_get_addrlist(alist); + read_unlock(&vlserver->lock); + + memset(&vc->ac, 0, sizeof(vc->ac)); + + /* Probe the current vlserver if we haven't done so yet. */ +#if 0 // TODO + if (!test_bit(AFS_VLSERVER_FL_PROBED, &vlserver->flags)) { + vc->ac.alist = afs_get_addrlist(alist); + + if (!afs_probe_vlserver(vc)) { + error = vc->ac.error; + switch (error) { + case -ENOMEM: + case -ERESTARTSYS: + case -EINTR: + goto failed_set_error; + default: + goto next_server; + } + } + } +#endif + + if (!vc->ac.alist) + vc->ac.alist = alist; + else + afs_put_addrlist(alist); + + vc->ac.start = READ_ONCE(alist->index); + vc->ac.index = vc->ac.start; + +iterate_address: + ASSERT(vc->ac.alist); + _debug("iterate %d/%d", vc->ac.index, vc->ac.alist->nr_addrs); + /* Iterate over the current server's address list to try and find an + * address on which it will respond to us. + */ + if (!afs_iterate_addresses(&vc->ac)) + goto next_server; + + _leave(" = t %pISpc", &vc->ac.addr->transport); + return true; + +next_server: + _debug("next"); + afs_end_cursor(&vc->ac); + vc->index++; + if (vc->index >= vc->server_list->nr_servers) + vc->index = 0; + if (vc->index != vc->start) + goto use_server; + + /* That's all the servers poked to no good effect. Try again if some + * of them were busy. + */ + if (vc->flags & AFS_VL_CURSOR_RETRY) + goto restart_from_beginning; + + goto failed; + +failed: + vc->flags |= AFS_VL_CURSOR_STOP; + afs_end_cursor(&vc->ac); + _leave(" = f [failed %d]", vc->error); + return false; +} + +/* + * Tidy up a volume location server cursor and unlock the vnode. + */ +int afs_end_vlserver_operation(struct afs_vl_cursor *vc) +{ + struct afs_net *net = vc->cell->net; + + afs_end_cursor(&vc->ac); + afs_put_vlserverlist(net, vc->server_list); + + if (vc->error == -ECONNABORTED) + vc->error = afs_abort_to_error(vc->ac.abort_code); + + return vc->error; +} diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c index e18c51742daa..3127ab9b5521 100644 --- a/fs/afs/vlclient.c +++ b/fs/afs/vlclient.c @@ -128,14 +128,13 @@ static const struct afs_call_type afs_RXVLGetEntryByNameU = { * Dispatch a get volume entry by name or ID operation (uuid variant). If the * volname is a decimal number then it's a volume ID not a volume name. */ -struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *net, - struct afs_addr_cursor *ac, - struct key *key, +struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_vl_cursor *vc, const char *volname, int volnamesz) { struct afs_vldb_entry *entry; struct afs_call *call; + struct afs_net *net = vc->cell->net; size_t reqsz, padsz; __be32 *bp; @@ -155,7 +154,7 @@ struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *net, return ERR_PTR(-ENOMEM); } - call->key = key; + call->key = vc->key; call->reply[0] = entry; call->ret_reply0 = true; @@ -168,7 +167,7 @@ struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *net, memset((void *)bp + volnamesz, 0, padsz); trace_afs_make_vl_call(call); - return (struct afs_vldb_entry *)afs_make_call(ac, call, GFP_KERNEL, false); + return (struct afs_vldb_entry *)afs_make_call(&vc->ac, call, GFP_KERNEL, false); } /* @@ -266,14 +265,13 @@ static const struct afs_call_type afs_RXVLGetAddrsU = { * Dispatch an operation to get the addresses for a server, where the server is * nominated by UUID. */ -struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *net, - struct afs_addr_cursor *ac, - struct key *key, +struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *vc, const uuid_t *uuid) { struct afs_ListAddrByAttributes__xdr *r; const struct afs_uuid *u = (const struct afs_uuid *)uuid; struct afs_call *call; + struct afs_net *net = vc->cell->net; __be32 *bp; int i; @@ -285,7 +283,7 @@ struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *net, if (!call) return ERR_PTR(-ENOMEM); - call->key = key; + call->key = vc->key; call->reply[0] = NULL; call->ret_reply0 = true; @@ -306,7 +304,7 @@ struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *net, r->uuid.node[i] = htonl(u->node[i]); trace_afs_make_vl_call(call); - return (struct afs_addr_list *)afs_make_call(ac, call, GFP_KERNEL, false); + return (struct afs_addr_list *)afs_make_call(&vc->ac, call, GFP_KERNEL, false); } /* @@ -367,14 +365,13 @@ static const struct afs_call_type afs_RXVLGetCapabilities = { }; /* - * Probe a fileserver for the capabilities that it supports. This can + * Probe a volume server for the capabilities that it supports. This can * return up to 196 words. * * We use this to probe for service upgrade to determine what the server at the * other end supports. */ -int afs_vl_get_capabilities(struct afs_net *net, - struct afs_addr_cursor *ac, +int afs_vl_get_capabilities(struct afs_net *net, struct afs_addr_cursor *ac, struct key *key) { struct afs_call *call; @@ -617,12 +614,11 @@ static const struct afs_call_type afs_YFSVLGetEndpoints = { * Dispatch an operation to get the addresses for a server, where the server is * nominated by UUID. */ -struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *net, - struct afs_addr_cursor *ac, - struct key *key, +struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *vc, const uuid_t *uuid) { struct afs_call *call; + struct afs_net *net = vc->cell->net; __be32 *bp; _enter(""); @@ -633,7 +629,7 @@ struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *net, if (!call) return ERR_PTR(-ENOMEM); - call->key = key; + call->key = vc->key; call->reply[0] = NULL; call->ret_reply0 = true; @@ -644,5 +640,5 @@ struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *net, memcpy(bp, uuid, sizeof(*uuid)); /* Type opr_uuid */ trace_afs_make_vl_call(call); - return (struct afs_addr_list *)afs_make_call(ac, call, GFP_KERNEL, false); + return (struct afs_addr_list *)afs_make_call(&vc->ac, call, GFP_KERNEL, false); } diff --git a/fs/afs/volume.c b/fs/afs/volume.c index 3037bd01f617..1cd263fa6028 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -74,55 +74,35 @@ static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell, const char *volname, size_t volnamesz) { - struct afs_addr_cursor ac; - struct afs_vldb_entry *vldb; + struct afs_vldb_entry *vldb = ERR_PTR(-EDESTADDRREQ); + struct afs_vl_cursor vc; int ret; - ret = afs_set_vl_cursor(&ac, cell); - if (ret < 0) - return ERR_PTR(ret); + if (!afs_begin_vlserver_operation(&vc, cell, key)) + return ERR_PTR(-ERESTARTSYS); - while (afs_iterate_addresses(&ac)) { - if (!test_bit(ac.index, &ac.alist->probed)) { - ret = afs_vl_get_capabilities(cell->net, &ac, key); + while (afs_select_vlserver(&vc)) { + if (!test_bit(vc.ac.index, &vc.ac.alist->probed)) { + ret = afs_vl_get_capabilities(cell->net, &vc.ac, key); switch (ret) { case VL_SERVICE: - clear_bit(ac.index, &ac.alist->yfs); - set_bit(ac.index, &ac.alist->probed); - ac.addr->srx_service = ret; + clear_bit(vc.ac.index, &vc.ac.alist->yfs); + set_bit(vc.ac.index, &vc.ac.alist->probed); + vc.ac.addr->srx_service = ret; break; case YFS_VL_SERVICE: - set_bit(ac.index, &ac.alist->yfs); - set_bit(ac.index, &ac.alist->probed); - ac.addr->srx_service = ret; + set_bit(vc.ac.index, &vc.ac.alist->yfs); + set_bit(vc.ac.index, &vc.ac.alist->probed); + vc.ac.addr->srx_service = ret; break; } } - vldb = afs_vl_get_entry_by_name_u(cell->net, &ac, key, - volname, volnamesz); - switch (ac.error) { - case 0: - afs_end_cursor(&ac); - return vldb; - case -ECONNABORTED: - ac.error = afs_abort_to_error(ac.abort_code); - goto error; - case -ENOMEM: - case -ENONET: - goto error; - case -ENETUNREACH: - case -EHOSTUNREACH: - case -ECONNREFUSED: - break; - default: - ac.error = -EIO; - goto error; - } + vldb = afs_vl_get_entry_by_name_u(&vc, volname, volnamesz); } -error: - return ERR_PTR(afs_end_cursor(&ac)); + ret = afs_end_vlserver_operation(&vc); + return ret < 0 ? ERR_PTR(ret) : vldb; } /* From ded2f4c58ac24083c536aa7d2ff2b73752a88612 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 20 Oct 2018 00:57:57 +0100 Subject: [PATCH 09/26] afs: Fix TTL on VL server and address lists Currently the TTL on VL server and address lists isn't set in all circumstances and may be set to poor choices in others, since the TTL is derived from the SRV/AFSDB DNS record if and when available. Fix the TTL by limiting the range to a minimum and maximum from the current time. At some point these can be made into sysctl knobs. Further, use the TTL we obtained from the upcall to set the expiry on negative results too; in future a mechanism can be added to force reloading of such data. Signed-off-by: David Howells --- fs/afs/cell.c | 26 ++++++++++++++++++++++---- fs/afs/proc.c | 14 +++++++++++--- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/fs/afs/cell.c b/fs/afs/cell.c index 963b6fa51fdf..cf445dbd5f2e 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -20,6 +20,8 @@ #include "internal.h" static unsigned __read_mostly afs_cell_gc_delay = 10; +static unsigned __read_mostly afs_cell_min_ttl = 10 * 60; +static unsigned __read_mostly afs_cell_max_ttl = 24 * 60 * 60; static void afs_manage_cell(struct work_struct *); @@ -171,6 +173,8 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net, rcu_assign_pointer(cell->vl_servers, vllist); cell->dns_expiry = TIME64_MAX; + } else { + cell->dns_expiry = ktime_get_real_seconds(); } _leave(" = %p", cell); @@ -358,25 +362,39 @@ int afs_cell_init(struct afs_net *net, const char *rootcell) static void afs_update_cell(struct afs_cell *cell) { struct afs_vlserver_list *vllist, *old; - time64_t now, expiry; + unsigned int min_ttl = READ_ONCE(afs_cell_min_ttl); + unsigned int max_ttl = READ_ONCE(afs_cell_max_ttl); + time64_t now, expiry = 0; _enter("%s", cell->name); vllist = afs_dns_query(cell, &expiry); + + now = ktime_get_real_seconds(); + if (min_ttl > max_ttl) + max_ttl = min_ttl; + if (expiry < now + min_ttl) + expiry = now + min_ttl; + else if (expiry > now + max_ttl) + expiry = now + max_ttl; + if (IS_ERR(vllist)) { switch (PTR_ERR(vllist)) { case -ENODATA: - /* The DNS said that the cell does not exist */ + case -EDESTADDRREQ: + /* The DNS said that the cell does not exist or there + * weren't any addresses to be had. + */ set_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags); clear_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags); - cell->dns_expiry = ktime_get_real_seconds() + 61; + cell->dns_expiry = expiry; break; case -EAGAIN: case -ECONNREFUSED: default: set_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags); - cell->dns_expiry = ktime_get_real_seconds() + 10; + cell->dns_expiry = now + 10; break; } diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 6585f4bec0d3..fc36c41641ab 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -37,16 +37,24 @@ static inline struct afs_net *afs_seq2net_single(struct seq_file *m) */ static int afs_proc_cells_show(struct seq_file *m, void *v) { - struct afs_cell *cell = list_entry(v, struct afs_cell, proc_link); + struct afs_vlserver_list *vllist; + struct afs_cell *cell; if (v == SEQ_START_TOKEN) { /* display header on line 1 */ - seq_puts(m, "USE NAME\n"); + seq_puts(m, "USE TTL SV NAME\n"); return 0; } + cell = list_entry(v, struct afs_cell, proc_link); + vllist = rcu_dereference(cell->vl_servers); + /* display one cell per line on subsequent lines */ - seq_printf(m, "%3u %s\n", atomic_read(&cell->usage), cell->name); + seq_printf(m, "%3u %6lld %2u %s\n", + atomic_read(&cell->usage), + cell->dns_expiry - ktime_get_real_seconds(), + vllist ? vllist->nr_servers : 0, + cell->name); return 0; } From 4ac15ea53622272c01954461b4814892b7481b40 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 20 Oct 2018 00:57:57 +0100 Subject: [PATCH 10/26] afs: Handle EIO from delivery function Fix afs_deliver_to_call() to handle -EIO being returned by the operation delivery function, indicating that the call found itself in the wrong state, by printing an error and aborting the call. Currently, an assertion failure will occur. This can happen, say, if the delivery function falls off the end without calling afs_extract_data() with the want_more parameter set to false to collect the end of the Rx phase of a call. The assertion failure looks like: AFS: Assertion failed 4 == 7 is false 0x4 == 0x7 is false ------------[ cut here ]------------ kernel BUG at fs/afs/rxrpc.c:462! and is matched in the trace buffer by a line like: kworker/7:3-3226 [007] ...1 85158.030203: afs_io_error: c=0003be0c r=-5 CM_REPLY Fixes: 98bf40cd99fc ("afs: Protect call->state changes against signals") Reported-by: Marc Dionne Signed-off-by: David Howells --- fs/afs/rxrpc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index a3904a8315de..947ae3ab389b 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -499,7 +499,6 @@ static void afs_deliver_to_call(struct afs_call *call) case -EINPROGRESS: case -EAGAIN: goto out; - case -EIO: case -ECONNABORTED: ASSERTCMP(state, ==, AFS_CALL_COMPLETE); goto done; @@ -508,6 +507,10 @@ static void afs_deliver_to_call(struct afs_call *call) rxrpc_kernel_abort_call(call->net->socket, call->rxcall, abort_code, ret, "KIV"); goto local_abort; + case -EIO: + pr_err("kAFS: Call %u in bad state %u\n", + call->debug_id, state); + /* Fall through */ case -ENODATA: case -EBADMSG: case -EMSGSIZE: From f51375cd9e1ad75e9e38186aa0d3749ade7d52a5 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 20 Oct 2018 00:57:57 +0100 Subject: [PATCH 11/26] afs: Add a couple of tracepoints to log I/O errors Add a couple of tracepoints to log the production of I/O errors within the AFS filesystem. Signed-off-by: David Howells --- fs/afs/cmservice.c | 10 ++--- fs/afs/dir.c | 18 +++++---- fs/afs/internal.h | 11 ++++++ fs/afs/mntpt.c | 5 ++- fs/afs/rxrpc.c | 2 +- fs/afs/server.c | 2 +- fs/afs/write.c | 1 + include/trace/events/afs.h | 81 ++++++++++++++++++++++++++++++++++++++ 8 files changed, 114 insertions(+), 16 deletions(-) diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 4db62ae8dc1a..186f621f8722 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -260,7 +260,7 @@ static int afs_deliver_cb_callback(struct afs_call *call) } if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING)) - return -EIO; + return afs_io_error(call, afs_io_error_cm_reply); /* we'll need the file server record as that tells us which set of * vnodes to operate upon */ @@ -368,7 +368,7 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call) } if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING)) - return -EIO; + return afs_io_error(call, afs_io_error_cm_reply); /* we'll need the file server record as that tells us which set of * vnodes to operate upon */ @@ -409,7 +409,7 @@ static int afs_deliver_cb_probe(struct afs_call *call) return ret; if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING)) - return -EIO; + return afs_io_error(call, afs_io_error_cm_reply); return afs_queue_call_work(call); } @@ -490,7 +490,7 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call) } if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING)) - return -EIO; + return afs_io_error(call, afs_io_error_cm_reply); return afs_queue_call_work(call); } @@ -573,7 +573,7 @@ static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call) return ret; if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING)) - return -EIO; + return afs_io_error(call, afs_io_error_cm_reply); return afs_queue_call_work(call); } diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 855bf2b79fed..78f9754fd03d 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -138,6 +138,7 @@ static bool afs_dir_check_page(struct afs_vnode *dvnode, struct page *page, ntohs(dbuf->blocks[tmp].hdr.magic)); trace_afs_dir_check_failed(dvnode, off, i_size); kunmap(page); + trace_afs_file_error(dvnode, -EIO, afs_file_error_dir_bad_magic); goto error; } @@ -190,9 +191,11 @@ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key) retry: i_size = i_size_read(&dvnode->vfs_inode); if (i_size < 2048) - return ERR_PTR(-EIO); - if (i_size > 2048 * 1024) + return ERR_PTR(afs_bad(dvnode, afs_file_error_dir_small)); + if (i_size > 2048 * 1024) { + trace_afs_file_error(dvnode, -EFBIG, afs_file_error_dir_big); return ERR_PTR(-EFBIG); + } _enter("%llu", i_size); @@ -315,7 +318,8 @@ content_has_grown: /* * deal with one block in an AFS directory */ -static int afs_dir_iterate_block(struct dir_context *ctx, +static int afs_dir_iterate_block(struct afs_vnode *dvnode, + struct dir_context *ctx, union afs_xdr_dir_block *block, unsigned blkoff) { @@ -365,7 +369,7 @@ static int afs_dir_iterate_block(struct dir_context *ctx, " (len %u/%zu)", blkoff / sizeof(union afs_xdr_dir_block), offset, next, tmp, nlen); - return -EIO; + return afs_bad(dvnode, afs_file_error_dir_over_end); } if (!(block->hdr.bitmap[next / 8] & (1 << (next % 8)))) { @@ -373,7 +377,7 @@ static int afs_dir_iterate_block(struct dir_context *ctx, " %u unmarked extension (len %u/%zu)", blkoff / sizeof(union afs_xdr_dir_block), offset, next, tmp, nlen); - return -EIO; + return afs_bad(dvnode, afs_file_error_dir_unmarked_ext); } _debug("ENT[%zu.%u]: ext %u/%zu", @@ -442,7 +446,7 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx, */ page = req->pages[blkoff / PAGE_SIZE]; if (!page) { - ret = -EIO; + ret = afs_bad(dvnode, afs_file_error_dir_missing_page); break; } mark_page_accessed(page); @@ -455,7 +459,7 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx, do { dblock = &dbuf->blocks[(blkoff % PAGE_SIZE) / sizeof(union afs_xdr_dir_block)]; - ret = afs_dir_iterate_block(ctx, dblock, blkoff); + ret = afs_dir_iterate_block(dvnode, ctx, dblock, blkoff); if (ret != 1) { kunmap(page); goto out; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 7e264cb9b4f7..28d08aac515d 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -1257,6 +1257,17 @@ static inline void afs_check_for_remote_deletion(struct afs_fs_cursor *fc, } } +static inline int afs_io_error(struct afs_call *call, enum afs_io_error where) +{ + trace_afs_io_error(call->debug_id, -EIO, where); + return -EIO; +} + +static inline int afs_bad(struct afs_vnode *vnode, enum afs_file_error where) +{ + trace_afs_file_error(vnode, -EIO, where); + return -EIO; +} /*****************************************************************************/ /* diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 99fd13500a97..2e51c6994148 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -130,9 +130,10 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt) goto error_no_page; } - ret = -EIO; - if (PageError(page)) + if (PageError(page)) { + ret = afs_bad(AFS_FS_I(d_inode(mntpt)), afs_file_error_mntpt); goto error; + } buf = kmap_atomic(page); memcpy(devname, buf, size); diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 947ae3ab389b..e6ab824ab761 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -915,7 +915,7 @@ int afs_extract_data(struct afs_call *call, bool want_more) break; case AFS_CALL_COMPLETE: kdebug("prem complete %d", call->error); - return -EIO; + return afs_io_error(call, afs_io_error_extract); default: break; } diff --git a/fs/afs/server.c b/fs/afs/server.c index 6102ea9ee3fb..1a087eb8f2d7 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -542,7 +542,7 @@ static bool afs_do_probe_fileserver(struct afs_fs_cursor *fc) case -ETIME: break; default: - fc->ac.error = -EIO; + fc->ac.error = afs_io_error(NULL, afs_io_error_fs_probe_fail); goto error; } } diff --git a/fs/afs/write.c b/fs/afs/write.c index 19c04caf3c01..fdb9d6024126 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -533,6 +533,7 @@ no_more: case -ENOENT: case -ENOMEDIUM: case -ENXIO: + trace_afs_file_error(vnode, ret, afs_file_error_writeback_fail); afs_kill_pages(mapping, first, last); mapping_set_error(mapping, ret); break; diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 5e0f8dcede26..48b20a261d39 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -103,6 +103,24 @@ enum afs_eproto_cause { afs_eproto_yvl_vlendpt_type, }; +enum afs_io_error { + afs_io_error_cm_reply, + afs_io_error_extract, + afs_io_error_fs_probe_fail, + afs_io_error_vl_lookup_fail, +}; + +enum afs_file_error { + afs_file_error_dir_bad_magic, + afs_file_error_dir_big, + afs_file_error_dir_missing_page, + afs_file_error_dir_over_end, + afs_file_error_dir_small, + afs_file_error_dir_unmarked_ext, + afs_file_error_mntpt, + afs_file_error_writeback_fail, +}; + #endif /* end __AFS_DECLARE_TRACE_ENUMS_ONCE_ONLY */ /* @@ -183,6 +201,21 @@ enum afs_eproto_cause { EM(afs_eproto_yvl_vlendpt6_len, "YVL.VlEnd6Len") \ E_(afs_eproto_yvl_vlendpt_type, "YVL.VlEndType") +#define afs_io_errors \ + EM(afs_io_error_cm_reply, "CM_REPLY") \ + EM(afs_io_error_extract, "EXTRACT") \ + EM(afs_io_error_fs_probe_fail, "FS_PROBE_FAIL") \ + E_(afs_io_error_vl_lookup_fail, "VL_LOOKUP_FAIL") + +#define afs_file_errors \ + EM(afs_file_error_dir_bad_magic, "DIR_BAD_MAGIC") \ + EM(afs_file_error_dir_big, "DIR_BIG") \ + EM(afs_file_error_dir_missing_page, "DIR_MISSING_PAGE") \ + EM(afs_file_error_dir_over_end, "DIR_ENT_OVER_END") \ + EM(afs_file_error_dir_small, "DIR_SMALL") \ + EM(afs_file_error_dir_unmarked_ext, "DIR_UNMARKED_EXT") \ + EM(afs_file_error_mntpt, "MNTPT_READ_FAILED") \ + E_(afs_file_error_writeback_fail, "WRITEBACK_FAILED") /* * Export enum symbols via userspace. @@ -197,6 +230,9 @@ afs_fs_operations; afs_vl_operations; afs_edit_dir_ops; afs_edit_dir_reasons; +afs_eproto_causes; +afs_io_errors; +afs_file_errors; /* * Now redefine the EM() and E_() macros to map the enums to the strings that @@ -613,6 +649,51 @@ TRACE_EVENT(afs_protocol_error, __print_symbolic(__entry->cause, afs_eproto_causes)) ); +TRACE_EVENT(afs_io_error, + TP_PROTO(unsigned int call, int error, enum afs_io_error where), + + TP_ARGS(call, error, where), + + TP_STRUCT__entry( + __field(unsigned int, call ) + __field(int, error ) + __field(enum afs_io_error, where ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->error = error; + __entry->where = where; + ), + + TP_printk("c=%08x r=%d %s", + __entry->call, __entry->error, + __print_symbolic(__entry->where, afs_io_errors)) + ); + +TRACE_EVENT(afs_file_error, + TP_PROTO(struct afs_vnode *vnode, int error, enum afs_file_error where), + + TP_ARGS(vnode, error, where), + + TP_STRUCT__entry( + __field_struct(struct afs_fid, fid ) + __field(int, error ) + __field(enum afs_file_error, where ) + ), + + TP_fast_assign( + __entry->fid = vnode->fid; + __entry->error = error; + __entry->where = where; + ), + + TP_printk("%x:%x:%x r=%d %s", + __entry->fid.vid, __entry->fid.vnode, __entry->fid.unique, + __entry->error, + __print_symbolic(__entry->where, afs_file_errors)) + ); + TRACE_EVENT(afs_cm_no_server, TP_PROTO(struct afs_call *call, struct sockaddr_rxrpc *srx), From 2a0b4f64c9edcdcb67306f26d9d08ef982cb0ccd Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 20 Oct 2018 00:57:57 +0100 Subject: [PATCH 12/26] afs: Don't invoke the server to read data beyond EOF When writing a new page, clear space in the page rather than attempting to load it from the server if the space is beyond the EOF. Signed-off-by: David Howells --- fs/afs/write.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/afs/write.c b/fs/afs/write.c index fdb9d6024126..11066a3248ba 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -33,10 +33,21 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key, loff_t pos, unsigned int len, struct page *page) { struct afs_read *req; + size_t p; + void *data; int ret; _enter(",,%llu", (unsigned long long)pos); + if (pos >= vnode->vfs_inode.i_size) { + p = pos & ~PAGE_MASK; + ASSERTCMP(p + len, <=, PAGE_SIZE); + data = kmap(page); + memset(data + p, 0, len); + kunmap(page); + return 0; + } + req = kzalloc(sizeof(struct afs_read) + sizeof(struct page *), GFP_KERNEL); if (!req) From 3b6492df4153b8550d347dfc581856138678a231 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 20 Oct 2018 00:57:57 +0100 Subject: [PATCH 13/26] afs: Increase to 64-bit volume ID and 96-bit vnode ID for YFS Increase the sizes of the volume ID to 64 bits and the vnode ID (inode number equivalent) to 96 bits to allow the support of YFS. This requires the iget comparator to check the vnode->fid rather than i_ino and i_generation as i_ino is not sufficiently capacious. It also requires this data to be placed into the vnode cache key for fscache. For the moment, just discard the top 32 bits of the vnode ID when returning it though stat. Signed-off-by: David Howells --- fs/afs/afs.h | 11 ++++++----- fs/afs/cache.c | 2 +- fs/afs/callback.c | 2 +- fs/afs/dir.c | 24 ++++++++++++------------ fs/afs/dynroot.c | 2 +- fs/afs/file.c | 8 ++++---- fs/afs/flock.c | 22 +++++++++++----------- fs/afs/fsclient.c | 24 ++++++++++++------------ fs/afs/inode.c | 31 +++++++++++++++++-------------- fs/afs/proc.c | 2 +- fs/afs/rotate.c | 2 +- fs/afs/security.c | 6 +++--- fs/afs/super.c | 5 +++-- fs/afs/volume.c | 2 +- fs/afs/write.c | 18 +++++++++--------- fs/afs/xattr.c | 2 +- include/trace/events/afs.h | 4 ++-- 17 files changed, 86 insertions(+), 81 deletions(-) diff --git a/fs/afs/afs.h b/fs/afs/afs.h index b4ff1f7ae4ab..c23b31b742fa 100644 --- a/fs/afs/afs.h +++ b/fs/afs/afs.h @@ -23,9 +23,9 @@ #define AFSPATHMAX 1024 /* Maximum length of a pathname plus NUL */ #define AFSOPAQUEMAX 1024 /* Maximum length of an opaque field */ -typedef unsigned afs_volid_t; -typedef unsigned afs_vnodeid_t; -typedef unsigned long long afs_dataversion_t; +typedef u64 afs_volid_t; +typedef u64 afs_vnodeid_t; +typedef u64 afs_dataversion_t; typedef enum { AFSVL_RWVOL, /* read/write volume */ @@ -52,8 +52,9 @@ typedef enum { */ struct afs_fid { afs_volid_t vid; /* volume ID */ - afs_vnodeid_t vnode; /* file index within volume */ - unsigned unique; /* unique ID number (file index version) */ + afs_vnodeid_t vnode; /* Lower 64-bits of file index within volume */ + u32 vnode_hi; /* Upper 32-bits of file index */ + u32 unique; /* unique ID number (file index version) */ }; /* diff --git a/fs/afs/cache.c b/fs/afs/cache.c index b1c31ec4523a..f6d0a21e8052 100644 --- a/fs/afs/cache.c +++ b/fs/afs/cache.c @@ -49,7 +49,7 @@ static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data, struct afs_vnode *vnode = cookie_netfs_data; struct afs_vnode_cache_aux aux; - _enter("{%x,%x,%llx},%p,%u", + _enter("{%llx,%x,%llx},%p,%u", vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version, buffer, buflen); diff --git a/fs/afs/callback.c b/fs/afs/callback.c index 5f261fbf2182..8698198ad427 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -310,7 +310,7 @@ void afs_break_callbacks(struct afs_server *server, size_t count, /* TODO: Sort the callback break list by volume ID */ for (; count > 0; callbacks++, count--) { - _debug("- Fid { vl=%08x n=%u u=%u } CB { v=%u x=%u t=%u }", + _debug("- Fid { vl=%08llx n=%llu u=%u } CB { v=%u x=%u t=%u }", callbacks->fid.vid, callbacks->fid.vnode, callbacks->fid.unique, diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 78f9754fd03d..024b7cf7441c 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -552,7 +552,7 @@ static int afs_do_lookup_one(struct inode *dir, struct dentry *dentry, } *fid = cookie.fid; - _leave(" = 0 { vn=%u u=%u }", fid->vnode, fid->unique); + _leave(" = 0 { vn=%llu u=%u }", fid->vnode, fid->unique); return 0; } @@ -830,7 +830,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, struct key *key; int ret; - _enter("{%x:%u},%p{%pd},", + _enter("{%llx:%llu},%p{%pd},", dvnode->fid.vid, dvnode->fid.vnode, dentry, dentry); ASSERTCMP(d_inode(dentry), ==, NULL); @@ -900,7 +900,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) if (d_really_is_positive(dentry)) { vnode = AFS_FS_I(d_inode(dentry)); - _enter("{v={%x:%u} n=%pd fl=%lx},", + _enter("{v={%llx:%llu} n=%pd fl=%lx},", vnode->fid.vid, vnode->fid.vnode, dentry, vnode->flags); } else { @@ -969,7 +969,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) /* if the vnode ID has changed, then the dirent points to a * different file */ if (fid.vnode != vnode->fid.vnode) { - _debug("%pd: dirent changed [%u != %u]", + _debug("%pd: dirent changed [%llu != %llu]", dentry, fid.vnode, vnode->fid.vnode); goto not_found; @@ -1108,7 +1108,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) mode |= S_IFDIR; - _enter("{%x:%u},{%pd},%ho", + _enter("{%llx:%llu},{%pd},%ho", dvnode->fid.vid, dvnode->fid.vnode, dentry, mode); key = afs_request_key(dvnode->volume->cell); @@ -1178,7 +1178,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) u64 data_version = dvnode->status.data_version; int ret; - _enter("{%x:%u},{%pd}", + _enter("{%llx:%llu},{%pd}", dvnode->fid.vid, dvnode->fid.vnode, dentry); key = afs_request_key(dvnode->volume->cell); @@ -1270,7 +1270,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) u64 data_version = dvnode->status.data_version; int ret; - _enter("{%x:%u},{%pd}", + _enter("{%llx:%llu},{%pd}", dvnode->fid.vid, dvnode->fid.vnode, dentry); if (dentry->d_name.len >= AFSNAMEMAX) @@ -1334,7 +1334,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, mode |= S_IFREG; - _enter("{%x:%u},{%pd},%ho,", + _enter("{%llx:%llu},{%pd},%ho,", dvnode->fid.vid, dvnode->fid.vnode, dentry, mode); ret = -ENAMETOOLONG; @@ -1397,7 +1397,7 @@ static int afs_link(struct dentry *from, struct inode *dir, dvnode = AFS_FS_I(dir); data_version = dvnode->status.data_version; - _enter("{%x:%u},{%x:%u},{%pd}", + _enter("{%llx:%llu},{%llx:%llu},{%pd}", vnode->fid.vid, vnode->fid.vnode, dvnode->fid.vid, dvnode->fid.vnode, dentry); @@ -1468,7 +1468,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, u64 data_version = dvnode->status.data_version; int ret; - _enter("{%x:%u},{%pd},%s", + _enter("{%llx:%llu},{%pd},%s", dvnode->fid.vid, dvnode->fid.vnode, dentry, content); @@ -1544,7 +1544,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, orig_data_version = orig_dvnode->status.data_version; new_data_version = new_dvnode->status.data_version; - _enter("{%x:%u},{%x:%u},{%x:%u},{%pd}", + _enter("{%llx:%llu},{%llx:%llu},{%llx:%llu},{%pd}", orig_dvnode->fid.vid, orig_dvnode->fid.vnode, vnode->fid.vid, vnode->fid.vnode, new_dvnode->fid.vid, new_dvnode->fid.vnode, @@ -1611,7 +1611,7 @@ static int afs_dir_releasepage(struct page *page, gfp_t gfp_flags) { struct afs_vnode *dvnode = AFS_FS_I(page->mapping->host); - _enter("{{%x:%u}[%lu]}", dvnode->fid.vid, dvnode->fid.vnode, page->index); + _enter("{{%llx:%llu}[%lu]}", dvnode->fid.vid, dvnode->fid.vnode, page->index); set_page_private(page, 0); ClearPagePrivate(page); diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c index 0efed0a63080..a9ba81ddf154 100644 --- a/fs/afs/dynroot.c +++ b/fs/afs/dynroot.c @@ -62,7 +62,7 @@ struct inode *afs_try_auto_mntpt(struct dentry *dentry, struct inode *dir) struct inode *inode; int ret = -ENOENT; - _enter("%p{%pd}, {%x:%u}", + _enter("%p{%pd}, {%llx:%llu}", dentry, dentry, vnode->fid.vid, vnode->fid.vnode); if (!test_bit(AFS_VNODE_AUTOCELL, &vnode->flags)) diff --git a/fs/afs/file.c b/fs/afs/file.c index 7d4f26198573..d6bc3f5d784b 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -121,7 +121,7 @@ int afs_open(struct inode *inode, struct file *file) struct key *key; int ret; - _enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode); + _enter("{%llx:%llu},", vnode->fid.vid, vnode->fid.vnode); key = afs_request_key(vnode->volume->cell); if (IS_ERR(key)) { @@ -170,7 +170,7 @@ int afs_release(struct inode *inode, struct file *file) struct afs_vnode *vnode = AFS_FS_I(inode); struct afs_file *af = file->private_data; - _enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode); + _enter("{%llx:%llu},", vnode->fid.vid, vnode->fid.vnode); if ((file->f_mode & FMODE_WRITE)) return vfs_fsync(file, 0); @@ -228,7 +228,7 @@ int afs_fetch_data(struct afs_vnode *vnode, struct key *key, struct afs_read *de struct afs_fs_cursor fc; int ret; - _enter("%s{%x:%u.%u},%x,,,", + _enter("%s{%llx:%llu.%u},%x,,,", vnode->volume->name, vnode->fid.vid, vnode->fid.vnode, @@ -634,7 +634,7 @@ static int afs_releasepage(struct page *page, gfp_t gfp_flags) struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); unsigned long priv; - _enter("{{%x:%u}[%lu],%lx},%x", + _enter("{{%llx:%llu}[%lu],%lx},%x", vnode->fid.vid, vnode->fid.vnode, page->index, page->flags, gfp_flags); diff --git a/fs/afs/flock.c b/fs/afs/flock.c index dc62d15a964b..0568fd986821 100644 --- a/fs/afs/flock.c +++ b/fs/afs/flock.c @@ -29,7 +29,7 @@ static const struct file_lock_operations afs_lock_ops = { */ void afs_lock_may_be_available(struct afs_vnode *vnode) { - _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode); + _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode); queue_delayed_work(afs_lock_manager, &vnode->lock_work, 0); } @@ -76,7 +76,7 @@ static int afs_set_lock(struct afs_vnode *vnode, struct key *key, struct afs_fs_cursor fc; int ret; - _enter("%s{%x:%u.%u},%x,%u", + _enter("%s{%llx:%llu.%u},%x,%u", vnode->volume->name, vnode->fid.vid, vnode->fid.vnode, @@ -107,7 +107,7 @@ static int afs_extend_lock(struct afs_vnode *vnode, struct key *key) struct afs_fs_cursor fc; int ret; - _enter("%s{%x:%u.%u},%x", + _enter("%s{%llx:%llu.%u},%x", vnode->volume->name, vnode->fid.vid, vnode->fid.vnode, @@ -138,7 +138,7 @@ static int afs_release_lock(struct afs_vnode *vnode, struct key *key) struct afs_fs_cursor fc; int ret; - _enter("%s{%x:%u.%u},%x", + _enter("%s{%llx:%llu.%u},%x", vnode->volume->name, vnode->fid.vid, vnode->fid.vnode, @@ -175,7 +175,7 @@ void afs_lock_work(struct work_struct *work) struct key *key; int ret; - _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode); + _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode); spin_lock(&vnode->lock); @@ -192,7 +192,7 @@ again: ret = afs_release_lock(vnode, vnode->lock_key); if (ret < 0) printk(KERN_WARNING "AFS:" - " Failed to release lock on {%x:%x} error %d\n", + " Failed to release lock on {%llx:%llx} error %d\n", vnode->fid.vid, vnode->fid.vnode, ret); spin_lock(&vnode->lock); @@ -229,7 +229,7 @@ again: key_put(key); if (ret < 0) - pr_warning("AFS: Failed to extend lock on {%x:%x} error %d\n", + pr_warning("AFS: Failed to extend lock on {%llx:%llx} error %d\n", vnode->fid.vid, vnode->fid.vnode, ret); spin_lock(&vnode->lock); @@ -430,7 +430,7 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl) struct key *key = afs_file_key(file); int ret; - _enter("{%x:%u},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type); + _enter("{%llx:%llu},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type); /* only whole-file locks are supported */ if (fl->fl_start != 0 || fl->fl_end != OFFSET_MAX) @@ -582,7 +582,7 @@ static int afs_do_unlk(struct file *file, struct file_lock *fl) struct afs_vnode *vnode = AFS_FS_I(locks_inode(file)); int ret; - _enter("{%x:%u},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type); + _enter("{%llx:%llu},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type); /* Flush all pending writes before doing anything with locks. */ vfs_fsync(file, 0); @@ -639,7 +639,7 @@ int afs_lock(struct file *file, int cmd, struct file_lock *fl) { struct afs_vnode *vnode = AFS_FS_I(locks_inode(file)); - _enter("{%x:%u},%d,{t=%x,fl=%x,r=%Ld:%Ld}", + _enter("{%llx:%llu},%d,{t=%x,fl=%x,r=%Ld:%Ld}", vnode->fid.vid, vnode->fid.vnode, cmd, fl->fl_type, fl->fl_flags, (long long) fl->fl_start, (long long) fl->fl_end); @@ -662,7 +662,7 @@ int afs_flock(struct file *file, int cmd, struct file_lock *fl) { struct afs_vnode *vnode = AFS_FS_I(locks_inode(file)); - _enter("{%x:%u},%d,{t=%x,fl=%x}", + _enter("{%llx:%llu},%d,{t=%x,fl=%x}", vnode->fid.vid, vnode->fid.vnode, cmd, fl->fl_type, fl->fl_flags); diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index fe2e9e39b388..5e3027f21390 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -90,7 +90,7 @@ void afs_update_inode_from_status(struct afs_vnode *vnode, if (!(flags & AFS_VNODE_NOT_YET_SET)) { if (expected_version && *expected_version != status->data_version) { - _debug("vnode modified %llx on {%x:%u} [exp %llx]", + _debug("vnode modified %llx on {%llx:%llu} [exp %llx]", (unsigned long long) status->data_version, vnode->fid.vid, vnode->fid.vnode, (unsigned long long) *expected_version); @@ -164,7 +164,7 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call, if (type != status->type && vnode && !test_bit(AFS_VNODE_UNSET, &vnode->flags)) { - pr_warning("Vnode %x:%x:%x changed type %u to %u\n", + pr_warning("Vnode %llx:%llx:%x changed type %u to %u\n", vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique, @@ -389,7 +389,7 @@ static int afs_deliver_fs_fetch_status_vnode(struct afs_call *call) if (ret < 0) return ret; - _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode); + _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode); /* unmarshall the reply once we've received all of it */ bp = call->buffer; @@ -426,7 +426,7 @@ int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsy struct afs_net *net = afs_v2net(vnode); __be32 *bp; - _enter(",%x,{%x:%u},,", + _enter(",%x,{%llx:%llu},,", key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); call = afs_alloc_flat_call(net, &afs_RXFSFetchStatus_vnode, @@ -1261,7 +1261,7 @@ static int afs_fs_store_data64(struct afs_fs_cursor *fc, struct afs_net *net = afs_v2net(vnode); __be32 *bp; - _enter(",%x,{%x:%u},,", + _enter(",%x,{%llx:%llu},,", key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); call = afs_alloc_flat_call(net, &afs_RXFSStoreData64, @@ -1318,7 +1318,7 @@ int afs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping, loff_t size, pos, i_size; __be32 *bp; - _enter(",%x,{%x:%u},,", + _enter(",%x,{%llx:%llu},,", key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); size = (loff_t)to - (loff_t)offset; @@ -1440,7 +1440,7 @@ static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct iattr *attr) struct afs_net *net = afs_v2net(vnode); __be32 *bp; - _enter(",%x,{%x:%u},,", + _enter(",%x,{%llx:%llu},,", key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); ASSERT(attr->ia_valid & ATTR_SIZE); @@ -1487,7 +1487,7 @@ static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr) struct afs_net *net = afs_v2net(vnode); __be32 *bp; - _enter(",%x,{%x:%u},,", + _enter(",%x,{%llx:%llu},,", key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); ASSERT(attr->ia_valid & ATTR_SIZE); @@ -1536,7 +1536,7 @@ int afs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr) if (attr->ia_valid & ATTR_SIZE) return afs_fs_setattr_size(fc, attr); - _enter(",%x,{%x:%u},,", + _enter(",%x,{%llx:%llu},,", key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); call = afs_alloc_flat_call(net, &afs_RXFSStoreStatus, @@ -2034,7 +2034,7 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call) if (ret < 0) return ret; - _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode); + _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode); /* unmarshall the reply once we've received all of it */ bp = call->buffer; @@ -2079,7 +2079,7 @@ int afs_fs_fetch_status(struct afs_fs_cursor *fc, struct afs_call *call; __be32 *bp; - _enter(",%x,{%x:%u},,", + _enter(",%x,{%llx:%llu},,", key_serial(fc->key), fid->vid, fid->vnode); call = afs_alloc_flat_call(net, &afs_RXFSFetchStatus, 16, (21 + 3 + 6) * 4); @@ -2250,7 +2250,7 @@ int afs_fs_inline_bulk_status(struct afs_fs_cursor *fc, __be32 *bp; int i; - _enter(",%x,{%x:%u},%u", + _enter(",%x,{%llx:%llu},%u", key_serial(fc->key), fids[0].vid, fids[1].vnode, nr_fids); call = afs_alloc_flat_call(net, &afs_RXFSInlineBulkStatus, diff --git a/fs/afs/inode.c b/fs/afs/inode.c index ab4e7a15c205..4ba47efe9668 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -100,7 +100,7 @@ int afs_fetch_status(struct afs_vnode *vnode, struct key *key, bool new_inode) struct afs_fs_cursor fc; int ret; - _enter("%s,{%x:%u.%u,S=%lx}", + _enter("%s,{%llx:%llu.%u,S=%lx}", vnode->volume->name, vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique, vnode->flags); @@ -127,9 +127,9 @@ int afs_fetch_status(struct afs_vnode *vnode, struct key *key, bool new_inode) int afs_iget5_test(struct inode *inode, void *opaque) { struct afs_iget_data *data = opaque; + struct afs_vnode *vnode = AFS_FS_I(inode); - return inode->i_ino == data->fid.vnode && - inode->i_generation == data->fid.unique; + return memcmp(&vnode->fid, &data->fid, sizeof(data->fid)) == 0; } /* @@ -150,11 +150,14 @@ static int afs_iget5_set(struct inode *inode, void *opaque) struct afs_iget_data *data = opaque; struct afs_vnode *vnode = AFS_FS_I(inode); - inode->i_ino = data->fid.vnode; - inode->i_generation = data->fid.unique; vnode->fid = data->fid; vnode->volume = data->volume; + /* YFS supports 96-bit vnode IDs, but Linux only supports + * 64-bit inode numbers. + */ + inode->i_ino = data->fid.vnode; + inode->i_generation = data->fid.unique; return 0; } @@ -193,7 +196,7 @@ struct inode *afs_iget_pseudo_dir(struct super_block *sb, bool root) return ERR_PTR(-ENOMEM); } - _debug("GOT INODE %p { ino=%lu, vl=%x, vn=%x, u=%x }", + _debug("GOT INODE %p { ino=%lu, vl=%llx, vn=%llx, u=%x }", inode, inode->i_ino, data.fid.vid, data.fid.vnode, data.fid.unique); @@ -252,8 +255,8 @@ static void afs_get_inode_cache(struct afs_vnode *vnode) key.vnode_id = vnode->fid.vnode; key.unique = vnode->fid.unique; - key.vnode_id_ext[0] = 0; - key.vnode_id_ext[1] = 0; + key.vnode_id_ext[0] = vnode->fid.vnode >> 32; + key.vnode_id_ext[1] = vnode->fid.vnode_hi; aux.data_version = vnode->status.data_version; vnode->cache = fscache_acquire_cookie(vnode->volume->cache, @@ -277,7 +280,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key, struct inode *inode; int ret; - _enter(",{%x:%u.%u},,", fid->vid, fid->vnode, fid->unique); + _enter(",{%llx:%llu.%u},,", fid->vid, fid->vnode, fid->unique); as = sb->s_fs_info; data.volume = as->volume; @@ -289,7 +292,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key, return ERR_PTR(-ENOMEM); } - _debug("GOT INODE %p { vl=%x vn=%x, u=%x }", + _debug("GOT INODE %p { vl=%llx vn=%llx, u=%x }", inode, fid->vid, fid->vnode, fid->unique); vnode = AFS_FS_I(inode); @@ -352,7 +355,7 @@ bad_inode: */ void afs_zap_data(struct afs_vnode *vnode) { - _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode); + _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode); #ifdef CONFIG_AFS_FSCACHE fscache_invalidate(vnode->cache); @@ -382,7 +385,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) bool valid = false; int ret; - _enter("{v={%x:%u} fl=%lx},%x", + _enter("{v={%llx:%llu} fl=%lx},%x", vnode->fid.vid, vnode->fid.vnode, vnode->flags, key_serial(key)); @@ -501,7 +504,7 @@ void afs_evict_inode(struct inode *inode) vnode = AFS_FS_I(inode); - _enter("{%x:%u.%d}", + _enter("{%llx:%llu.%d}", vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique); @@ -550,7 +553,7 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr) struct key *key; int ret; - _enter("{%x:%u},{n=%pd},%x", + _enter("{%llx:%llu},{n=%pd},%x", vnode->fid.vid, vnode->fid.vnode, dentry, attr->ia_valid); diff --git a/fs/afs/proc.c b/fs/afs/proc.c index fc36c41641ab..d887f822f4eb 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -221,7 +221,7 @@ static int afs_proc_cell_volumes_show(struct seq_file *m, void *v) return 0; } - seq_printf(m, "%3d %08x %s\n", + seq_printf(m, "%3d %08llx %s\n", atomic_read(&vol->usage), vol->vid, afs_vol_types[vol->type]); diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c index d7cbc3c230ee..41405dde0113 100644 --- a/fs/afs/rotate.c +++ b/fs/afs/rotate.c @@ -118,7 +118,7 @@ static void afs_busy(struct afs_volume *volume, u32 abort_code) default: m = "busy"; break; } - pr_notice("kAFS: Volume %u '%s' is %s\n", volume->vid, volume->name, m); + pr_notice("kAFS: Volume %llu '%s' is %s\n", volume->vid, volume->name, m); } /* diff --git a/fs/afs/security.c b/fs/afs/security.c index 81dfedb7879f..d1ae53fd3739 100644 --- a/fs/afs/security.c +++ b/fs/afs/security.c @@ -126,7 +126,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key, bool changed = false; int i, j; - _enter("{%x:%u},%x,%x", + _enter("{%llx:%llu},%x,%x", vnode->fid.vid, vnode->fid.vnode, key_serial(key), caller_access); rcu_read_lock(); @@ -289,7 +289,7 @@ int afs_check_permit(struct afs_vnode *vnode, struct key *key, bool valid = false; int i, ret; - _enter("{%x:%u},%x", + _enter("{%llx:%llu},%x", vnode->fid.vid, vnode->fid.vnode, key_serial(key)); /* check the permits to see if we've got one yet */ @@ -349,7 +349,7 @@ int afs_permission(struct inode *inode, int mask) if (mask & MAY_NOT_BLOCK) return -ECHILD; - _enter("{{%x:%u},%lx},%x,", + _enter("{{%llx:%llu},%lx},%x,", vnode->fid.vid, vnode->fid.vnode, vnode->flags, mask); key = afs_request_key(vnode->volume->cell); diff --git a/fs/afs/super.c b/fs/afs/super.c index 4d3e274207fb..dcd07fe99871 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -406,10 +406,11 @@ static int afs_fill_super(struct super_block *sb, inode = afs_iget_pseudo_dir(sb, true); sb->s_flags |= SB_RDONLY; } else { - sprintf(sb->s_id, "%u", as->volume->vid); + sprintf(sb->s_id, "%llu", as->volume->vid); afs_activate_volume(as->volume); fid.vid = as->volume->vid; fid.vnode = 1; + fid.vnode_hi = 0; fid.unique = 1; inode = afs_iget(sb, params->key, &fid, NULL, NULL, NULL); } @@ -663,7 +664,7 @@ static void afs_destroy_inode(struct inode *inode) { struct afs_vnode *vnode = AFS_FS_I(inode); - _enter("%p{%x:%u}", inode, vnode->fid.vid, vnode->fid.vnode); + _enter("%p{%llx:%llu}", inode, vnode->fid.vid, vnode->fid.vnode); _debug("DESTROY INODE %p", inode); diff --git a/fs/afs/volume.c b/fs/afs/volume.c index 1cd263fa6028..f0020e35bf6f 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -250,7 +250,7 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key) /* We look up an ID by passing it as a decimal string in the * operation's name parameter. */ - idsz = sprintf(idbuf, "%u", volume->vid); + idsz = sprintf(idbuf, "%llu", volume->vid); vldb = afs_vl_lookup_vldb(volume->cell, key, idbuf, idsz); if (IS_ERR(vldb)) { diff --git a/fs/afs/write.c b/fs/afs/write.c index 11066a3248ba..72efcfcf9f95 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -92,7 +92,7 @@ int afs_write_begin(struct file *file, struct address_space *mapping, pgoff_t index = pos >> PAGE_SHIFT; int ret; - _enter("{%x:%u},{%lx},%u,%u", + _enter("{%llx:%llu},{%lx},%u,%u", vnode->fid.vid, vnode->fid.vnode, index, from, to); /* We want to store information about how much of a page is altered in @@ -192,7 +192,7 @@ int afs_write_end(struct file *file, struct address_space *mapping, loff_t i_size, maybe_i_size; int ret; - _enter("{%x:%u},{%lx}", + _enter("{%llx:%llu},{%lx}", vnode->fid.vid, vnode->fid.vnode, page->index); maybe_i_size = pos + copied; @@ -241,7 +241,7 @@ static void afs_kill_pages(struct address_space *mapping, struct pagevec pv; unsigned count, loop; - _enter("{%x:%u},%lx-%lx", + _enter("{%llx:%llu},%lx-%lx", vnode->fid.vid, vnode->fid.vnode, first, last); pagevec_init(&pv); @@ -283,7 +283,7 @@ static void afs_redirty_pages(struct writeback_control *wbc, struct pagevec pv; unsigned count, loop; - _enter("{%x:%u},%lx-%lx", + _enter("{%llx:%llu},%lx-%lx", vnode->fid.vid, vnode->fid.vnode, first, last); pagevec_init(&pv); @@ -325,7 +325,7 @@ static int afs_store_data(struct address_space *mapping, struct list_head *p; int ret = -ENOKEY, ret2; - _enter("%s{%x:%u.%u},%lx,%lx,%x,%x", + _enter("%s{%llx:%llu.%u},%lx,%lx,%x,%x", vnode->volume->name, vnode->fid.vid, vnode->fid.vnode, @@ -687,7 +687,7 @@ void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call) unsigned count, loop; pgoff_t first = call->first, last = call->last; - _enter("{%x:%u},{%lx-%lx}", + _enter("{%llx:%llu},{%lx-%lx}", vnode->fid.vid, vnode->fid.vnode, first, last); pagevec_init(&pv); @@ -726,7 +726,7 @@ ssize_t afs_file_write(struct kiocb *iocb, struct iov_iter *from) ssize_t result; size_t count = iov_iter_count(from); - _enter("{%x.%u},{%zu},", + _enter("{%llx:%llu},{%zu},", vnode->fid.vid, vnode->fid.vnode, count); if (IS_SWAPFILE(&vnode->vfs_inode)) { @@ -754,7 +754,7 @@ int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync) struct inode *inode = file_inode(file); struct afs_vnode *vnode = AFS_FS_I(inode); - _enter("{%x:%u},{n=%pD},%d", + _enter("{%llx:%llu},{n=%pD},%d", vnode->fid.vid, vnode->fid.vnode, file, datasync); @@ -772,7 +772,7 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf) struct afs_vnode *vnode = AFS_FS_I(inode); unsigned long priv; - _enter("{{%x:%u}},{%lx}", + _enter("{{%llx:%llu}},{%lx}", vnode->fid.vid, vnode->fid.vnode, vmf->page->index); sb_start_pagefault(inode->i_sb); diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c index cfcc674e64a5..a2cdf25573e2 100644 --- a/fs/afs/xattr.c +++ b/fs/afs/xattr.c @@ -72,7 +72,7 @@ static int afs_xattr_get_fid(const struct xattr_handler *handler, char text[8 + 1 + 8 + 1 + 8 + 1]; size_t len; - len = sprintf(text, "%x:%x:%x", + len = sprintf(text, "%llx:%llx:%x", vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique); if (size == 0) return len; diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 48b20a261d39..8acd56e20a37 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -374,7 +374,7 @@ TRACE_EVENT(afs_make_fs_call, } ), - TP_printk("c=%08x %06x:%06x:%06x %s", + TP_printk("c=%08x %06llx:%06llx:%06x %s", __entry->call, __entry->fid.vid, __entry->fid.vnode, @@ -688,7 +688,7 @@ TRACE_EVENT(afs_file_error, __entry->where = where; ), - TP_printk("%x:%x:%x r=%d %s", + TP_printk("%llx:%llx:%x r=%d %s", __entry->fid.vid, __entry->fid.vnode, __entry->fid.unique, __entry->error, __print_symbolic(__entry->where, afs_file_errors)) From 0067191201de0d1870ff692368900da5207daf2c Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 20 Oct 2018 00:57:57 +0100 Subject: [PATCH 14/26] afs: Commit the status on a new file/dir/symlink Call the function to commit the status on a new file, dir or symlink so that the access rights for the caller's key are cached for that object. Without this, the next access to the file will cause a FetchStatus operation to be emitted to retrieve the access rights. Signed-off-by: David Howells --- fs/afs/dir.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 024b7cf7441c..8936731c59ff 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -1089,6 +1089,7 @@ static void afs_vnode_new_inode(struct afs_fs_cursor *fc, vnode = AFS_FS_I(inode); set_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags); + afs_vnode_commit_status(fc, vnode, 0); d_add(new_dentry, inode); } From 06aeb2971457b33c1123af9f307a55f3dc4052c9 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 20 Oct 2018 00:57:58 +0100 Subject: [PATCH 15/26] afs: Remove callback details from afs_callback_break struct Remove unnecessary details of a broken callback, such as version, expiry and type, from the afs_callback_break struct as they're not actually used and make the list take more memory. Signed-off-by: David Howells --- fs/afs/afs.h | 2 +- fs/afs/callback.c | 8 ++------ fs/afs/cmservice.c | 17 +++++------------ 3 files changed, 8 insertions(+), 19 deletions(-) diff --git a/fs/afs/afs.h b/fs/afs/afs.h index c23b31b742fa..fb9bcb8758ea 100644 --- a/fs/afs/afs.h +++ b/fs/afs/afs.h @@ -75,7 +75,7 @@ struct afs_callback { struct afs_callback_break { struct afs_fid fid; /* File identifier */ - struct afs_callback cb; /* Callback details */ + //struct afs_callback cb; /* Callback details */ }; #define AFSCBMAX 50 /* maximum callbacks transferred per bulk op */ diff --git a/fs/afs/callback.c b/fs/afs/callback.c index 8698198ad427..df9bfee698ad 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -310,14 +310,10 @@ void afs_break_callbacks(struct afs_server *server, size_t count, /* TODO: Sort the callback break list by volume ID */ for (; count > 0; callbacks++, count--) { - _debug("- Fid { vl=%08llx n=%llu u=%u } CB { v=%u x=%u t=%u }", + _debug("- Fid { vl=%08llx n=%llu u=%u }", callbacks->fid.vid, callbacks->fid.vnode, - callbacks->fid.unique, - callbacks->cb.version, - callbacks->cb.expiry, - callbacks->cb.type - ); + callbacks->fid.unique); afs_break_one_callback(server, &callbacks->fid); } diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 186f621f8722..fc0010d800a0 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -218,7 +218,6 @@ static int afs_deliver_cb_callback(struct afs_call *call) cb->fid.vid = ntohl(*bp++); cb->fid.vnode = ntohl(*bp++); cb->fid.unique = ntohl(*bp++); - cb->cb.type = AFSCM_CB_UNTYPED; } afs_extract_to_tmp(call); @@ -236,24 +235,18 @@ static int afs_deliver_cb_callback(struct afs_call *call) if (call->count2 != call->count && call->count2 != 0) return afs_protocol_error(call, -EBADMSG, afs_eproto_cb_count); - afs_extract_to_buf(call, call->count2 * 3 * 4); + call->_iter = &call->iter; + iov_iter_discard(&call->iter, READ, call->count2 * 3 * 4); call->unmarshall++; case 4: - _debug("extract CB array"); + _debug("extract discard %zu/%u", + iov_iter_count(&call->iter), call->count2 * 3 * 4); + ret = afs_extract_data(call, false); if (ret < 0) return ret; - _debug("unmarshall CB array"); - cb = call->request; - bp = call->buffer; - for (loop = call->count2; loop > 0; loop--, cb++) { - cb->cb.version = ntohl(*bp++); - cb->cb.expiry = ntohl(*bp++); - cb->cb.type = ntohl(*bp++); - } - call->unmarshall++; case 5: break; From 35dbfba3111a5ef0663bb89185ce8dfdbef63f8d Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 20 Oct 2018 00:57:58 +0100 Subject: [PATCH 16/26] afs: Implement the YFS cache manager service Implement the YFS cache manager service which gives extra capabilities on top of AFS. This is done by listening for an additional service on the same port and indicating that anyone requesting an upgrade should be upgraded to the YFS port. Signed-off-by: David Howells --- fs/afs/cmservice.c | 103 +++++++++++++++++++++++++++++++++++++++++- fs/afs/protocol_yfs.h | 57 +++++++++++++++++++++++ fs/afs/rxrpc.c | 15 ++++++ 3 files changed, 174 insertions(+), 1 deletion(-) create mode 100644 fs/afs/protocol_yfs.h diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index fc0010d800a0..8cf8d10daa6c 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -16,6 +16,7 @@ #include #include "internal.h" #include "afs_cm.h" +#include "protocol_yfs.h" static int afs_deliver_cb_init_call_back_state(struct afs_call *); static int afs_deliver_cb_init_call_back_state3(struct afs_call *); @@ -30,6 +31,8 @@ static void SRXAFSCB_Probe(struct work_struct *); static void SRXAFSCB_ProbeUuid(struct work_struct *); static void SRXAFSCB_TellMeAboutYourself(struct work_struct *); +static int afs_deliver_yfs_cb_callback(struct afs_call *); + #define CM_NAME(name) \ const char afs_SRXCB##name##_name[] __tracepoint_string = \ "CB." #name @@ -100,13 +103,24 @@ static const struct afs_call_type afs_SRXCBTellMeAboutYourself = { .work = SRXAFSCB_TellMeAboutYourself, }; +/* + * YFS CB.CallBack operation type + */ +static CM_NAME(YFS_CallBack); +static const struct afs_call_type afs_SRXYFSCB_CallBack = { + .name = afs_SRXCBYFS_CallBack_name, + .deliver = afs_deliver_yfs_cb_callback, + .destructor = afs_cm_destructor, + .work = SRXAFSCB_CallBack, +}; + /* * route an incoming cache manager call * - return T if supported, F if not */ bool afs_cm_incoming_call(struct afs_call *call) { - _enter("{CB.OP %u}", call->operation_ID); + _enter("{%u, CB.OP %u}", call->service_id, call->operation_ID); switch (call->operation_ID) { case CBCallBack: @@ -127,6 +141,11 @@ bool afs_cm_incoming_call(struct afs_call *call) case CBTellMeAboutYourself: call->type = &afs_SRXCBTellMeAboutYourself; return true; + case YFSCBCallBack: + if (call->service_id != YFS_CM_SERVICE) + return false; + call->type = &afs_SRXYFSCB_CallBack; + return true; default: return false; } @@ -570,3 +589,85 @@ static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call) return afs_queue_call_work(call); } + +/* + * deliver request data to a YFS CB.CallBack call + */ +static int afs_deliver_yfs_cb_callback(struct afs_call *call) +{ + struct afs_callback_break *cb; + struct sockaddr_rxrpc srx; + struct yfs_xdr_YFSFid *bp; + size_t size; + int ret, loop; + + _enter("{%u}", call->unmarshall); + + switch (call->unmarshall) { + case 0: + afs_extract_to_tmp(call); + call->unmarshall++; + + /* extract the FID array and its count in two steps */ + case 1: + _debug("extract FID count"); + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + call->count = ntohl(call->tmp); + _debug("FID count: %u", call->count); + if (call->count > YFSCBMAX) + return afs_protocol_error(call, -EBADMSG, + afs_eproto_cb_fid_count); + + size = array_size(call->count, sizeof(struct yfs_xdr_YFSFid)); + call->buffer = kmalloc(size, GFP_KERNEL); + if (!call->buffer) + return -ENOMEM; + afs_extract_to_buf(call, size); + call->unmarshall++; + + case 2: + _debug("extract FID array"); + ret = afs_extract_data(call, false); + if (ret < 0) + return ret; + + _debug("unmarshall FID array"); + call->request = kcalloc(call->count, + sizeof(struct afs_callback_break), + GFP_KERNEL); + if (!call->request) + return -ENOMEM; + + cb = call->request; + bp = call->buffer; + for (loop = call->count; loop > 0; loop--, cb++) { + cb->fid.vid = xdr_to_u64(bp->volume); + cb->fid.vnode = xdr_to_u64(bp->vnode.lo); + cb->fid.vnode_hi = ntohl(bp->vnode.hi); + cb->fid.unique = ntohl(bp->vnode.unique); + bp++; + } + + afs_extract_to_tmp(call); + call->unmarshall++; + + case 3: + break; + } + + if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING)) + return afs_io_error(call, afs_io_error_cm_reply); + + /* We'll need the file server record as that tells us which set of + * vnodes to operate upon. + */ + rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx); + call->cm_server = afs_find_server(call->net, &srx); + if (!call->cm_server) + trace_afs_cm_no_server(call, &srx); + + return afs_queue_call_work(call); +} diff --git a/fs/afs/protocol_yfs.h b/fs/afs/protocol_yfs.h new file mode 100644 index 000000000000..b7b211dd9857 --- /dev/null +++ b/fs/afs/protocol_yfs.h @@ -0,0 +1,57 @@ +/* YFS protocol bits + * + * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#define YFS_FS_SERVICE 2500 +#define YFS_CM_SERVICE 2501 + +#define YFSCBMAX 1024 + +enum YFS_CM_Operations { + YFSCBProbe = 206, /* probe client */ + YFSCBGetLock = 207, /* get contents of CM lock table */ + YFSCBXStatsVersion = 209, /* get version of extended statistics */ + YFSCBGetXStats = 210, /* get contents of extended statistics data */ + YFSCBInitCallBackState3 = 213, /* initialise callback state, version 3 */ + YFSCBProbeUuid = 214, /* check the client hasn't rebooted */ + YFSCBGetServerPrefs = 215, + YFSCBGetCellServDV = 216, + YFSCBGetLocalCell = 217, + YFSCBGetCacheConfig = 218, + YFSCBGetCellByNum = 65537, + YFSCBTellMeAboutYourself = 65538, /* get client capabilities */ + YFSCBCallBack = 64204, +}; + +struct yfs_xdr_u64 { + __be32 msw; + __be32 lsw; +} __packed; + +static inline u64 xdr_to_u64(const struct yfs_xdr_u64 x) +{ + return ((u64)ntohl(x.msw) << 32) | ntohl(x.lsw); +} + +static inline struct yfs_xdr_u64 u64_to_xdr(const u64 x) +{ + return (struct yfs_xdr_u64){ .msw = htonl(x >> 32), .lsw = htonl(x) }; +} + +struct yfs_xdr_vnode { + struct yfs_xdr_u64 lo; + __be32 hi; + __be32 unique; +} __packed; + +struct yfs_xdr_YFSFid { + struct yfs_xdr_u64 volume; + struct yfs_xdr_vnode vnode; +} __packed; diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index e6ab824ab761..ce98e133caa6 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -16,6 +16,7 @@ #include #include "internal.h" #include "afs_cm.h" +#include "protocol_yfs.h" struct workqueue_struct *afs_async_calls; @@ -42,6 +43,7 @@ int afs_open_socket(struct afs_net *net) struct sockaddr_rxrpc srx; struct socket *socket; unsigned int min_level; + u16 service_upgrade[2]; int ret; _enter(""); @@ -75,6 +77,19 @@ int afs_open_socket(struct afs_net *net) if (ret < 0) goto error_2; + srx.srx_service = YFS_CM_SERVICE; + ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx)); + if (ret < 0) + goto error_2; + + service_upgrade[0] = CM_SERVICE; + service_upgrade[1] = YFS_CM_SERVICE; + ret = kernel_setsockopt(socket, SOL_RXRPC, RXRPC_UPGRADEABLE_SERVICE, + (void *)service_upgrade, sizeof(service_upgrade)); + if (ret < 0) + goto error_2; + + rxrpc_kernel_new_call_notification(socket, afs_rx_new_call, afs_rx_discard_new_call); From 36bb5f490a542f230beb982475b56d79d72033de Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 20 Oct 2018 00:57:58 +0100 Subject: [PATCH 17/26] afs: Fix FS.FetchStatus delivery from updating wrong vnode The FS.FetchStatus reply delivery function was updating inode of the directory in which a lookup had been done with the status of the looked up file. This corrupts some of the directory state. Fixes: 5cf9dd55a0ec ("afs: Prospectively look up extra files when doing a single lookup") Signed-off-by: David Howells --- fs/afs/fsclient.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 5e3027f21390..f758750e81d8 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -2026,7 +2026,7 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call) struct afs_file_status *status = call->reply[1]; struct afs_callback *callback = call->reply[2]; struct afs_volsync *volsync = call->reply[3]; - struct afs_vnode *vnode = call->reply[0]; + struct afs_fid *fid = call->reply[0]; const __be32 *bp; int ret; @@ -2034,21 +2034,15 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call) if (ret < 0) return ret; - _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode); + _enter("{%llx:%llu}", fid->vid, fid->vnode); /* unmarshall the reply once we've received all of it */ bp = call->buffer; - ret = afs_decode_status(call, &bp, status, vnode, + ret = afs_decode_status(call, &bp, status, NULL, &call->expected_version, NULL); if (ret < 0) return ret; - callback[call->count].version = ntohl(bp[0]); - callback[call->count].expiry = ntohl(bp[1]); - callback[call->count].type = ntohl(bp[2]); - if (vnode) - xdr_decode_AFSCallBack(call, vnode, &bp); - else - bp += 3; + xdr_decode_AFSCallBack_raw(&bp, callback); if (volsync) xdr_decode_AFSVolSync(&bp, volsync); @@ -2089,7 +2083,7 @@ int afs_fs_fetch_status(struct afs_fs_cursor *fc, } call->key = fc->key; - call->reply[0] = NULL; /* vnode for fid[0] */ + call->reply[0] = fid; call->reply[1] = status; call->reply[2] = callback; call->reply[3] = volsync; From 12d8e95a911eb3921a1fecf1e4d913654f6f4fb5 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 20 Oct 2018 00:57:58 +0100 Subject: [PATCH 18/26] afs: Calc callback expiry in op reply delivery Calculate the callback expiration time at the point of operation reply delivery, using the reply time queried from AF_RXRPC on that call as a base. Signed-off-by: David Howells --- fs/afs/afs.h | 2 +- fs/afs/fsclient.c | 22 +++++++++++++++++----- fs/afs/inode.c | 4 ++-- fs/afs/internal.h | 2 ++ fs/afs/rxrpc.c | 6 ++++++ 5 files changed, 28 insertions(+), 8 deletions(-) diff --git a/fs/afs/afs.h b/fs/afs/afs.h index fb9bcb8758ea..417cd23529c5 100644 --- a/fs/afs/afs.h +++ b/fs/afs/afs.h @@ -68,8 +68,8 @@ typedef enum { } afs_callback_type_t; struct afs_callback { + time64_t expires_at; /* Time at which expires */ unsigned version; /* Callback version */ - unsigned expiry; /* Time at which expires */ afs_callback_type_t type; /* Type of callback */ }; diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index f758750e81d8..6105cdb17163 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -287,13 +287,19 @@ static void xdr_decode_AFSCallBack(struct afs_call *call, *_bp = bp; } -static void xdr_decode_AFSCallBack_raw(const __be32 **_bp, +static ktime_t xdr_decode_expiry(struct afs_call *call, u32 expiry) +{ + return ktime_add_ns(call->reply_time, expiry * NSEC_PER_SEC); +} + +static void xdr_decode_AFSCallBack_raw(struct afs_call *call, + const __be32 **_bp, struct afs_callback *cb) { const __be32 *bp = *_bp; cb->version = ntohl(*bp++); - cb->expiry = ntohl(*bp++); + cb->expires_at = xdr_decode_expiry(call, ntohl(*bp++)); cb->type = ntohl(*bp++); *_bp = bp; } @@ -440,6 +446,7 @@ int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsy call->reply[0] = vnode; call->reply[1] = volsync; call->expected_version = new_inode ? 1 : vnode->status.data_version; + call->want_reply_time = true; /* marshall the parameters */ bp = call->request; @@ -627,6 +634,7 @@ static int afs_fs_fetch_data64(struct afs_fs_cursor *fc, struct afs_read *req) call->reply[1] = NULL; /* volsync */ call->reply[2] = req; call->expected_version = vnode->status.data_version; + call->want_reply_time = true; /* marshall the parameters */ bp = call->request; @@ -672,6 +680,7 @@ int afs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req) call->reply[1] = NULL; /* volsync */ call->reply[2] = req; call->expected_version = vnode->status.data_version; + call->want_reply_time = true; /* marshall the parameters */ bp = call->request; @@ -714,7 +723,7 @@ static int afs_deliver_fs_create_vnode(struct afs_call *call) &call->expected_version, NULL); if (ret < 0) return ret; - xdr_decode_AFSCallBack_raw(&bp, call->reply[3]); + xdr_decode_AFSCallBack_raw(call, &bp, call->reply[3]); /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ _leave(" = 0 [done]"); @@ -773,6 +782,7 @@ int afs_fs_create(struct afs_fs_cursor *fc, call->reply[2] = newstatus; call->reply[3] = newcb; call->expected_version = current_data_version + 1; + call->want_reply_time = true; /* marshall the parameters */ bp = call->request; @@ -2042,7 +2052,7 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call) &call->expected_version, NULL); if (ret < 0) return ret; - xdr_decode_AFSCallBack_raw(&bp, callback); + xdr_decode_AFSCallBack_raw(call, &bp, callback); if (volsync) xdr_decode_AFSVolSync(&bp, volsync); @@ -2088,6 +2098,7 @@ int afs_fs_fetch_status(struct afs_fs_cursor *fc, call->reply[2] = callback; call->reply[3] = volsync; call->expected_version = 1; /* vnode->status.data_version */ + call->want_reply_time = true; /* marshall the parameters */ bp = call->request; @@ -2188,7 +2199,7 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call) bp = call->buffer; callbacks = call->reply[2]; callbacks[call->count].version = ntohl(bp[0]); - callbacks[call->count].expiry = ntohl(bp[1]); + callbacks[call->count].expires_at = xdr_decode_expiry(call, ntohl(bp[1])); callbacks[call->count].type = ntohl(bp[2]); statuses = call->reply[1]; if (call->count == 0 && vnode && statuses[0].abort_code == 0) @@ -2261,6 +2272,7 @@ int afs_fs_inline_bulk_status(struct afs_fs_cursor *fc, call->reply[2] = callbacks; call->reply[3] = volsync; call->count2 = nr_fids; + call->want_reply_time = true; /* marshall the parameters */ bp = call->request; diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 4ba47efe9668..4c6d8e1112c2 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -317,11 +317,11 @@ struct inode *afs_iget(struct super_block *sb, struct key *key, * didn't give us a callback) */ vnode->cb_version = 0; vnode->cb_type = 0; - vnode->cb_expires_at = 0; + vnode->cb_expires_at = ktime_get(); } else { vnode->cb_version = cb->version; vnode->cb_type = cb->type; - vnode->cb_expires_at = cb->expiry; + vnode->cb_expires_at = cb->expires_at; vnode->cb_interest = afs_get_cb_interest(cbi); set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 28d08aac515d..78065af05153 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -131,6 +131,7 @@ struct afs_call { bool async; /* T if asynchronous */ bool ret_reply0; /* T if should return reply[0] on success */ bool upgrade; /* T to request service upgrade */ + bool want_reply_time; /* T if want reply_time */ u16 service_id; /* Actual service ID (after upgrade) */ unsigned int debug_id; /* Trace ID */ u32 operation_ID; /* operation ID for an incoming call */ @@ -144,6 +145,7 @@ struct afs_call { }; afs_dataversion_t expected_version; /* Updated version expected from store */ afs_dataversion_t expected_version_2; /* 2nd updated version expected from store */ + ktime_t reply_time; /* Time of first reply packet */ }; struct afs_call_type { diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index ce98e133caa6..444ba0d511ef 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -499,6 +499,12 @@ static void afs_deliver_to_call(struct afs_call *call) return; } + if (call->want_reply_time && + rxrpc_kernel_get_reply_time(call->net->socket, + call->rxcall, + &call->reply_time)) + call->want_reply_time = false; + ret = call->type->deliver(call); state = READ_ONCE(call->state); switch (ret) { From f58db83fd3325a305cf615f4ffb0e6f60745ed8a Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 20 Oct 2018 00:57:58 +0100 Subject: [PATCH 19/26] afs: Get the target vnode in afs_rmdir() and get a callback on it Get the target vnode in afs_rmdir() and validate it before we attempt the deletion, The vnode pointer will be passed through to the delivery function in a later patch so that the delivery function can mark it deleted. Signed-off-by: David Howells --- fs/afs/dir.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 8936731c59ff..f2dd48d4363f 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -1174,7 +1174,7 @@ static void afs_dir_remove_subdir(struct dentry *dentry) static int afs_rmdir(struct inode *dir, struct dentry *dentry) { struct afs_fs_cursor fc; - struct afs_vnode *dvnode = AFS_FS_I(dir); + struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode = NULL; struct key *key; u64 data_version = dvnode->status.data_version; int ret; @@ -1188,6 +1188,14 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) goto error; } + /* Try to make sure we have a callback promise on the victim. */ + if (d_really_is_positive(dentry)) { + vnode = AFS_FS_I(d_inode(dentry)); + ret = afs_validate(vnode, key); + if (ret < 0) + goto error_key; + } + ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, dvnode, key)) { while (afs_select_fileserver(&fc)) { @@ -1206,6 +1214,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) } } +error_key: key_put(key); error: return ret; From d4936803a92b7d088086b1d7b8ecb5739d52c03b Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 20 Oct 2018 00:57:58 +0100 Subject: [PATCH 20/26] afs: Expand data structure fields to support YFS Expand fields in various data structures to support the expanded information that YFS is capable of returning. Signed-off-by: David Howells --- fs/afs/afs.h | 35 ++++++++++++++++++----------------- fs/afs/fsclient.c | 9 +++++---- 2 files changed, 23 insertions(+), 21 deletions(-) diff --git a/fs/afs/afs.h b/fs/afs/afs.h index 417cd23529c5..d12ffb457e47 100644 --- a/fs/afs/afs.h +++ b/fs/afs/afs.h @@ -130,19 +130,18 @@ typedef u32 afs_access_t; struct afs_file_status { u64 size; /* file size */ afs_dataversion_t data_version; /* current data version */ - time_t mtime_client; /* last time client changed data */ - time_t mtime_server; /* last time server changed data */ - unsigned abort_code; /* Abort if bulk-fetching this failed */ - - afs_file_type_t type; /* file type */ - unsigned nlink; /* link count */ - u32 author; /* author ID */ - u32 owner; /* owner ID */ - u32 group; /* group ID */ + struct timespec64 mtime_client; /* Last time client changed data */ + struct timespec64 mtime_server; /* Last time server changed data */ + s64 author; /* author ID */ + s64 owner; /* owner ID */ + s64 group; /* group ID */ afs_access_t caller_access; /* access rights for authenticated caller */ afs_access_t anon_access; /* access rights for unauthenticated caller */ umode_t mode; /* UNIX mode */ + afs_file_type_t type; /* file type */ + u32 nlink; /* link count */ s32 lock_count; /* file lock count (0=UNLK -1=WRLCK +ve=#RDLCK */ + u32 abort_code; /* Abort if bulk-fetching this failed */ }; /* @@ -159,25 +158,27 @@ struct afs_file_status { * AFS volume synchronisation information */ struct afs_volsync { - time_t creation; /* volume creation time */ + time64_t creation; /* volume creation time */ }; /* * AFS volume status record */ struct afs_volume_status { - u32 vid; /* volume ID */ - u32 parent_id; /* parent volume ID */ + afs_volid_t vid; /* volume ID */ + afs_volid_t parent_id; /* parent volume ID */ u8 online; /* true if volume currently online and available */ u8 in_service; /* true if volume currently in service */ u8 blessed; /* same as in_service */ u8 needs_salvage; /* true if consistency checking required */ u32 type; /* volume type (afs_voltype_t) */ - u32 min_quota; /* minimum space set aside (blocks) */ - u32 max_quota; /* maximum space this volume may occupy (blocks) */ - u32 blocks_in_use; /* space this volume currently occupies (blocks) */ - u32 part_blocks_avail; /* space available in volume's partition */ - u32 part_max_blocks; /* size of volume's partition */ + u64 min_quota; /* minimum space set aside (blocks) */ + u64 max_quota; /* maximum space this volume may occupy (blocks) */ + u64 blocks_in_use; /* space this volume currently occupies (blocks) */ + u64 part_blocks_avail; /* space available in volume's partition */ + u64 part_max_blocks; /* size of volume's partition */ + s64 vol_copy_date; + s64 vol_backup_date; }; #define AFS_BLOCK_SIZE 1024 diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 6105cdb17163..2da65309e0de 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -69,8 +69,7 @@ void afs_update_inode_from_status(struct afs_vnode *vnode, struct timespec64 t; umode_t mode; - t.tv_sec = status->mtime_client; - t.tv_nsec = 0; + t = status->mtime_client; vnode->vfs_inode.i_ctime = t; vnode->vfs_inode.i_mtime = t; vnode->vfs_inode.i_atime = t; @@ -194,8 +193,10 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call, EXTRACT_M(mode); EXTRACT_M(group); - status->mtime_client = ntohl(xdr->mtime_client); - status->mtime_server = ntohl(xdr->mtime_server); + status->mtime_client.tv_sec = ntohl(xdr->mtime_client); + status->mtime_client.tv_nsec = 0; + status->mtime_server.tv_sec = ntohl(xdr->mtime_server); + status->mtime_server.tv_nsec = 0; status->lock_count = ntohl(xdr->lock_count); size = (u64)ntohl(xdr->size_lo); From 30062bd13e3659a309d249a06d5f4ebb4a5c5251 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 20 Oct 2018 00:57:58 +0100 Subject: [PATCH 21/26] afs: Implement YFS support in the fs client Implement support for talking to YFS-variant fileservers in the cache manager and the filesystem client. These implement upgraded services on the same port as their AFS services. YFS fileservers provide expanded capabilities over AFS. Signed-off-by: David Howells --- fs/afs/Makefile | 3 +- fs/afs/callback.c | 9 +- fs/afs/dir.c | 21 +- fs/afs/fsclient.c | 104 +- fs/afs/internal.h | 35 +- fs/afs/protocol_yfs.h | 106 ++ fs/afs/server.c | 8 + fs/afs/yfsclient.c | 2184 ++++++++++++++++++++++++++++++++++++ include/trace/events/afs.h | 58 +- 9 files changed, 2500 insertions(+), 28 deletions(-) create mode 100644 fs/afs/yfsclient.c diff --git a/fs/afs/Makefile b/fs/afs/Makefile index 03e9f7afea1b..cc942b790cff 100644 --- a/fs/afs/Makefile +++ b/fs/afs/Makefile @@ -33,7 +33,8 @@ kafs-y := \ vl_list.o \ volume.o \ write.o \ - xattr.o + xattr.o \ + yfsclient.o kafs-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_AFS_FS) := kafs.o diff --git a/fs/afs/callback.c b/fs/afs/callback.c index df9bfee698ad..1c7955f5cdaf 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -210,12 +210,10 @@ void afs_init_callback_state(struct afs_server *server) /* * actually break a callback */ -void afs_break_callback(struct afs_vnode *vnode) +void __afs_break_callback(struct afs_vnode *vnode) { _enter(""); - write_seqlock(&vnode->cb_lock); - clear_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags); if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { vnode->cb_break++; @@ -230,7 +228,12 @@ void afs_break_callback(struct afs_vnode *vnode) afs_lock_may_be_available(vnode); spin_unlock(&vnode->lock); } +} +void afs_break_callback(struct afs_vnode *vnode) +{ + write_seqlock(&vnode->cb_lock); + __afs_break_callback(vnode); write_sequnlock(&vnode->cb_lock); } diff --git a/fs/afs/dir.c b/fs/afs/dir.c index f2dd48d4363f..43dea3b00c29 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -1200,7 +1200,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) if (afs_begin_vnode_operation(&fc, dvnode, key)) { while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(dvnode); - afs_fs_remove(&fc, dentry->d_name.name, true, + afs_fs_remove(&fc, vnode, dentry->d_name.name, true, data_version); } @@ -1245,7 +1245,9 @@ static int afs_dir_remove_link(struct dentry *dentry, struct key *key, if (d_really_is_positive(dentry)) { struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); - if (dir_valid) { + if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) { + /* Already done */ + } else if (dir_valid) { drop_nlink(&vnode->vfs_inode); if (vnode->vfs_inode.i_nlink == 0) { set_bit(AFS_VNODE_DELETED, &vnode->flags); @@ -1274,7 +1276,7 @@ static int afs_dir_remove_link(struct dentry *dentry, struct key *key, static int afs_unlink(struct inode *dir, struct dentry *dentry) { struct afs_fs_cursor fc; - struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode; + struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode = NULL; struct key *key; unsigned long d_version = (unsigned long)dentry->d_fsdata; u64 data_version = dvnode->status.data_version; @@ -1304,7 +1306,18 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) if (afs_begin_vnode_operation(&fc, dvnode, key)) { while (afs_select_fileserver(&fc)) { fc.cb_break = afs_calc_vnode_cb_break(dvnode); - afs_fs_remove(&fc, dentry->d_name.name, false, + + if (test_bit(AFS_SERVER_FL_IS_YFS, &fc.cbi->server->flags) && + !test_bit(AFS_SERVER_FL_NO_RM2, &fc.cbi->server->flags)) { + yfs_fs_remove_file2(&fc, vnode, dentry->d_name.name, + data_version); + if (fc.ac.error != -ECONNABORTED || + fc.ac.abort_code != RXGEN_OPCODE) + continue; + set_bit(AFS_SERVER_FL_NO_RM2, &fc.cbi->server->flags); + } + + afs_fs_remove(&fc, vnode, dentry->d_name.name, false, data_version); } diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 2da65309e0de..3975969719de 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -17,6 +17,7 @@ #include "internal.h" #include "afs_fs.h" #include "xdr_fs.h" +#include "protocol_yfs.h" static const struct afs_fid afs_zero_fid; @@ -312,14 +313,18 @@ static void xdr_decode_AFSVolSync(const __be32 **_bp, struct afs_volsync *volsync) { const __be32 *bp = *_bp; + u32 creation; - volsync->creation = ntohl(*bp++); + creation = ntohl(*bp++); bp++; /* spare2 */ bp++; /* spare3 */ bp++; /* spare4 */ bp++; /* spare5 */ bp++; /* spare6 */ *_bp = bp; + + if (volsync) + volsync->creation = creation; } /* @@ -380,6 +385,8 @@ static void xdr_decode_AFSFetchVolumeStatus(const __be32 **_bp, vs->blocks_in_use = ntohl(*bp++); vs->part_blocks_avail = ntohl(*bp++); vs->part_max_blocks = ntohl(*bp++); + vs->vol_copy_date = 0; + vs->vol_backup_date = 0; *_bp = bp; } @@ -405,8 +412,7 @@ static int afs_deliver_fs_fetch_status_vnode(struct afs_call *call) if (ret < 0) return ret; xdr_decode_AFSCallBack(call, vnode, &bp); - if (call->reply[1]) - xdr_decode_AFSVolSync(&bp, call->reply[1]); + xdr_decode_AFSVolSync(&bp, call->reply[1]); _leave(" = 0 [done]"); return 0; @@ -433,6 +439,9 @@ int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsy struct afs_net *net = afs_v2net(vnode); __be32 *bp; + if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) + return yfs_fs_fetch_file_status(fc, volsync, new_inode); + _enter(",%x,{%llx:%llu},,", key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); @@ -567,8 +576,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) if (ret < 0) return ret; xdr_decode_AFSCallBack(call, vnode, &bp); - if (call->reply[1]) - xdr_decode_AFSVolSync(&bp, call->reply[1]); + xdr_decode_AFSVolSync(&bp, call->reply[1]); call->unmarshall++; @@ -665,6 +673,9 @@ int afs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req) struct afs_net *net = afs_v2net(vnode); __be32 *bp; + if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) + return yfs_fs_fetch_data(fc, req); + if (upper_32_bits(req->pos) || upper_32_bits(req->len) || upper_32_bits(req->pos + req->len)) @@ -765,6 +776,15 @@ int afs_fs_create(struct afs_fs_cursor *fc, size_t namesz, reqsz, padsz; __be32 *bp; + if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)){ + if (S_ISDIR(mode)) + return yfs_fs_make_dir(fc, name, mode, current_data_version, + newfid, newstatus, newcb); + else + return yfs_fs_create_file(fc, name, mode, current_data_version, + newfid, newstatus, newcb); + } + _enter(""); namesz = strlen(name); @@ -857,15 +877,18 @@ static const struct afs_call_type afs_RXFSRemoveDir = { /* * remove a file or directory */ -int afs_fs_remove(struct afs_fs_cursor *fc, const char *name, bool isdir, - u64 current_data_version) +int afs_fs_remove(struct afs_fs_cursor *fc, struct afs_vnode *vnode, + const char *name, bool isdir, u64 current_data_version) { - struct afs_vnode *vnode = fc->vnode; + struct afs_vnode *dvnode = fc->vnode; struct afs_call *call; - struct afs_net *net = afs_v2net(vnode); + struct afs_net *net = afs_v2net(dvnode); size_t namesz, reqsz, padsz; __be32 *bp; + if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) + return yfs_fs_remove(fc, vnode, name, isdir, current_data_version); + _enter(""); namesz = strlen(name); @@ -879,15 +902,16 @@ int afs_fs_remove(struct afs_fs_cursor *fc, const char *name, bool isdir, return -ENOMEM; call->key = fc->key; - call->reply[0] = vnode; + call->reply[0] = dvnode; + call->reply[1] = vnode; call->expected_version = current_data_version + 1; /* marshall the parameters */ bp = call->request; *bp++ = htonl(isdir ? FSREMOVEDIR : FSREMOVEFILE); - *bp++ = htonl(vnode->fid.vid); - *bp++ = htonl(vnode->fid.vnode); - *bp++ = htonl(vnode->fid.unique); + *bp++ = htonl(dvnode->fid.vid); + *bp++ = htonl(dvnode->fid.vnode); + *bp++ = htonl(dvnode->fid.unique); *bp++ = htonl(namesz); memcpy(bp, name, namesz); bp = (void *) bp + namesz; @@ -897,7 +921,7 @@ int afs_fs_remove(struct afs_fs_cursor *fc, const char *name, bool isdir, } afs_use_fs_server(call, fc->cbi); - trace_afs_make_fs_call(call, &vnode->fid); + trace_afs_make_fs_call(call, &dvnode->fid); return afs_make_call(&fc->ac, call, GFP_NOFS, false); } @@ -953,6 +977,9 @@ int afs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode, size_t namesz, reqsz, padsz; __be32 *bp; + if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) + return yfs_fs_link(fc, vnode, name, current_data_version); + _enter(""); namesz = strlen(name); @@ -1047,6 +1074,10 @@ int afs_fs_symlink(struct afs_fs_cursor *fc, size_t namesz, reqsz, padsz, c_namesz, c_padsz; __be32 *bp; + if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) + return yfs_fs_symlink(fc, name, contents, current_data_version, + newfid, newstatus); + _enter(""); namesz = strlen(name); @@ -1159,6 +1190,12 @@ int afs_fs_rename(struct afs_fs_cursor *fc, size_t reqsz, o_namesz, o_padsz, n_namesz, n_padsz; __be32 *bp; + if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) + return yfs_fs_rename(fc, orig_name, + new_dvnode, new_name, + current_orig_data_version, + current_new_data_version); + _enter(""); o_namesz = strlen(orig_name); @@ -1329,6 +1366,9 @@ int afs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping, loff_t size, pos, i_size; __be32 *bp; + if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) + return yfs_fs_store_data(fc, mapping, first, last, offset, to); + _enter(",%x,{%llx:%llu},,", key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); @@ -1544,6 +1584,9 @@ int afs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr) struct afs_net *net = afs_v2net(vnode); __be32 *bp; + if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) + return yfs_fs_setattr(fc, attr); + if (attr->ia_valid & ATTR_SIZE) return afs_fs_setattr_size(fc, attr); @@ -1728,6 +1771,9 @@ int afs_fs_get_volume_status(struct afs_fs_cursor *fc, __be32 *bp; void *tmpbuf; + if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) + return yfs_fs_get_volume_status(fc, vs); + _enter(""); tmpbuf = kmalloc(AFSOPAQUEMAX, GFP_KERNEL); @@ -1817,6 +1863,9 @@ int afs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type) struct afs_net *net = afs_v2net(vnode); __be32 *bp; + if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) + return yfs_fs_set_lock(fc, type); + _enter(""); call = afs_alloc_flat_call(net, &afs_RXFSSetLock, 5 * 4, 6 * 4); @@ -1849,6 +1898,9 @@ int afs_fs_extend_lock(struct afs_fs_cursor *fc) struct afs_net *net = afs_v2net(vnode); __be32 *bp; + if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) + return yfs_fs_extend_lock(fc); + _enter(""); call = afs_alloc_flat_call(net, &afs_RXFSExtendLock, 4 * 4, 6 * 4); @@ -1880,6 +1932,9 @@ int afs_fs_release_lock(struct afs_fs_cursor *fc) struct afs_net *net = afs_v2net(vnode); __be32 *bp; + if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) + return yfs_fs_release_lock(fc); + _enter(""); call = afs_alloc_flat_call(net, &afs_RXFSReleaseLock, 4 * 4, 6 * 4); @@ -1951,6 +2006,7 @@ int afs_fs_give_up_all_callbacks(struct afs_net *net, */ static int afs_deliver_fs_get_capabilities(struct afs_call *call) { + struct afs_server *server = call->reply[0]; u32 count; int ret; @@ -1986,6 +2042,11 @@ static int afs_deliver_fs_get_capabilities(struct afs_call *call) break; } + if (call->service_id == YFS_FS_SERVICE) + set_bit(AFS_SERVER_FL_IS_YFS, &server->flags); + else + clear_bit(AFS_SERVER_FL_IS_YFS, &server->flags); + _leave(" = 0 [done]"); return 0; } @@ -2019,6 +2080,8 @@ int afs_fs_get_capabilities(struct afs_net *net, return -ENOMEM; call->key = key; + call->reply[0] = server; + call->upgrade = true; /* marshall the parameters */ bp = call->request; @@ -2054,8 +2117,7 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call) if (ret < 0) return ret; xdr_decode_AFSCallBack_raw(call, &bp, callback); - if (volsync) - xdr_decode_AFSVolSync(&bp, volsync); + xdr_decode_AFSVolSync(&bp, volsync); _leave(" = 0 [done]"); return 0; @@ -2084,6 +2146,9 @@ int afs_fs_fetch_status(struct afs_fs_cursor *fc, struct afs_call *call; __be32 *bp; + if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) + return yfs_fs_fetch_status(fc, net, fid, status, callback, volsync); + _enter(",%x,{%llx:%llu},,", key_serial(fc->key), fid->vid, fid->vnode); @@ -2218,8 +2283,7 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call) return ret; bp = call->buffer; - if (call->reply[3]) - xdr_decode_AFSVolSync(&bp, call->reply[3]); + xdr_decode_AFSVolSync(&bp, call->reply[3]); call->unmarshall++; @@ -2256,6 +2320,10 @@ int afs_fs_inline_bulk_status(struct afs_fs_cursor *fc, __be32 *bp; int i; + if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) + return yfs_fs_inline_bulk_status(fc, net, fids, statuses, callbacks, + nr_fids, volsync); + _enter(",%x,{%llx:%llu},%u", key_serial(fc->key), fids[0].vid, fids[1].vnode, nr_fids); diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 78065af05153..ce79bd514331 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -459,6 +459,8 @@ struct afs_server { #define AFS_SERVER_FL_PROBING 6 /* Fileserver is being probed */ #define AFS_SERVER_FL_NO_IBULK 7 /* Fileserver doesn't support FS.InlineBulkStatus */ #define AFS_SERVER_FL_MAY_HAVE_CB 8 /* May have callbacks on this fileserver */ +#define AFS_SERVER_FL_IS_YFS 9 /* Server is YFS not AFS */ +#define AFS_SERVER_FL_NO_RM2 10 /* Fileserver doesn't support YFS.RemoveFile2 */ atomic_t usage; u32 addr_version; /* Address list version */ @@ -751,6 +753,7 @@ extern struct fscache_cookie_def afs_vnode_cache_index_def; * callback.c */ extern void afs_init_callback_state(struct afs_server *); +extern void __afs_break_callback(struct afs_vnode *); extern void afs_break_callback(struct afs_vnode *); extern void afs_break_callbacks(struct afs_server *, size_t, struct afs_callback_break*); @@ -864,7 +867,7 @@ extern int afs_fs_give_up_callbacks(struct afs_net *, struct afs_server *); extern int afs_fs_fetch_data(struct afs_fs_cursor *, struct afs_read *); extern int afs_fs_create(struct afs_fs_cursor *, const char *, umode_t, u64, struct afs_fid *, struct afs_file_status *, struct afs_callback *); -extern int afs_fs_remove(struct afs_fs_cursor *, const char *, bool, u64); +extern int afs_fs_remove(struct afs_fs_cursor *, struct afs_vnode *, const char *, bool, u64); extern int afs_fs_link(struct afs_fs_cursor *, struct afs_vnode *, const char *, u64); extern int afs_fs_symlink(struct afs_fs_cursor *, const char *, const char *, u64, struct afs_fid *, struct afs_file_status *); @@ -1228,6 +1231,36 @@ extern int afs_launder_page(struct page *); extern const struct xattr_handler *afs_xattr_handlers[]; extern ssize_t afs_listxattr(struct dentry *, char *, size_t); +/* + * yfsclient.c + */ +extern int yfs_fs_fetch_file_status(struct afs_fs_cursor *, struct afs_volsync *, bool); +extern int yfs_fs_fetch_data(struct afs_fs_cursor *, struct afs_read *); +extern int yfs_fs_create_file(struct afs_fs_cursor *, const char *, umode_t, u64, + struct afs_fid *, struct afs_file_status *, struct afs_callback *); +extern int yfs_fs_make_dir(struct afs_fs_cursor *, const char *, umode_t, u64, + struct afs_fid *, struct afs_file_status *, struct afs_callback *); +extern int yfs_fs_remove_file2(struct afs_fs_cursor *, struct afs_vnode *, const char *, u64); +extern int yfs_fs_remove(struct afs_fs_cursor *, struct afs_vnode *, const char *, bool, u64); +extern int yfs_fs_link(struct afs_fs_cursor *, struct afs_vnode *, const char *, u64); +extern int yfs_fs_symlink(struct afs_fs_cursor *, const char *, const char *, u64, + struct afs_fid *, struct afs_file_status *); +extern int yfs_fs_rename(struct afs_fs_cursor *, const char *, + struct afs_vnode *, const char *, u64, u64); +extern int yfs_fs_store_data(struct afs_fs_cursor *, struct address_space *, + pgoff_t, pgoff_t, unsigned, unsigned); +extern int yfs_fs_setattr(struct afs_fs_cursor *, struct iattr *); +extern int yfs_fs_get_volume_status(struct afs_fs_cursor *, struct afs_volume_status *); +extern int yfs_fs_set_lock(struct afs_fs_cursor *, afs_lock_type_t); +extern int yfs_fs_extend_lock(struct afs_fs_cursor *); +extern int yfs_fs_release_lock(struct afs_fs_cursor *); +extern int yfs_fs_fetch_status(struct afs_fs_cursor *, struct afs_net *, + struct afs_fid *, struct afs_file_status *, + struct afs_callback *, struct afs_volsync *); +extern int yfs_fs_inline_bulk_status(struct afs_fs_cursor *, struct afs_net *, + struct afs_fid *, struct afs_file_status *, + struct afs_callback *, unsigned int, + struct afs_volsync *); /* * Miscellaneous inline functions. diff --git a/fs/afs/protocol_yfs.h b/fs/afs/protocol_yfs.h index b7b211dd9857..07bc10f076aa 100644 --- a/fs/afs/protocol_yfs.h +++ b/fs/afs/protocol_yfs.h @@ -30,6 +30,36 @@ enum YFS_CM_Operations { YFSCBCallBack = 64204, }; +enum YFS_FS_Operations { + YFSFETCHACL = 64131, /* YFS Fetch file ACL */ + YFSFETCHSTATUS = 64132, /* YFS Fetch file status */ + YFSSTOREACL = 64134, /* YFS Store file ACL */ + YFSSTORESTATUS = 64135, /* YFS Store file status */ + YFSREMOVEFILE = 64136, /* YFS Remove a file */ + YFSCREATEFILE = 64137, /* YFS Create a file */ + YFSRENAME = 64138, /* YFS Rename or move a file or directory */ + YFSSYMLINK = 64139, /* YFS Create a symbolic link */ + YFSLINK = 64140, /* YFS Create a hard link */ + YFSMAKEDIR = 64141, /* YFS Create a directory */ + YFSREMOVEDIR = 64142, /* YFS Remove a directory */ + YFSGETVOLUMESTATUS = 64149, /* YFS Get volume status information */ + YFSSETVOLUMESTATUS = 64150, /* YFS Set volume status information */ + YFSSETLOCK = 64156, /* YFS Request a file lock */ + YFSEXTENDLOCK = 64157, /* YFS Extend a file lock */ + YFSRELEASELOCK = 64158, /* YFS Release a file lock */ + YFSLOOKUP = 64161, /* YFS lookup file in directory */ + YFSFLUSHCPS = 64165, + YFSFETCHOPAQUEACL = 64168, + YFSWHOAMI = 64170, + YFSREMOVEACL = 64171, + YFSREMOVEFILE2 = 64173, + YFSSTOREOPAQUEACL2 = 64174, + YFSINLINEBULKSTATUS = 64536, /* YFS Fetch multiple file statuses with errors */ + YFSFETCHDATA64 = 64537, /* YFS Fetch file data */ + YFSSTOREDATA64 = 64538, /* YFS Store file data */ + YFSUPDATESYMLINK = 64540, +}; + struct yfs_xdr_u64 { __be32 msw; __be32 lsw; @@ -55,3 +85,79 @@ struct yfs_xdr_YFSFid { struct yfs_xdr_u64 volume; struct yfs_xdr_vnode vnode; } __packed; + + +struct yfs_xdr_YFSFetchStatus { + __be32 type; + __be32 nlink; + struct yfs_xdr_u64 size; + struct yfs_xdr_u64 data_version; + struct yfs_xdr_u64 author; + struct yfs_xdr_u64 owner; + struct yfs_xdr_u64 group; + __be32 mode; + __be32 caller_access; + __be32 anon_access; + struct yfs_xdr_vnode parent; + __be32 data_access_protocol; + struct yfs_xdr_u64 mtime_client; + struct yfs_xdr_u64 mtime_server; + __be32 lock_count; + __be32 abort_code; +} __packed; + +struct yfs_xdr_YFSCallBack { + __be32 version; + struct yfs_xdr_u64 expiration_time; + __be32 type; +} __packed; + +struct yfs_xdr_YFSStoreStatus { + __be32 mask; + __be32 mode; + struct yfs_xdr_u64 mtime_client; + struct yfs_xdr_u64 owner; + struct yfs_xdr_u64 group; +} __packed; + +struct yfs_xdr_RPCFlags { + __be32 rpc_flags; +} __packed; + +struct yfs_xdr_YFSVolSync { + struct yfs_xdr_u64 vol_creation_date; + struct yfs_xdr_u64 vol_update_date; + struct yfs_xdr_u64 max_quota; + struct yfs_xdr_u64 blocks_in_use; + struct yfs_xdr_u64 blocks_avail; +} __packed; + +enum yfs_volume_type { + yfs_volume_type_ro = 0, + yfs_volume_type_rw = 1, +}; + +#define yfs_FVSOnline 0x1 +#define yfs_FVSInservice 0x2 +#define yfs_FVSBlessed 0x4 +#define yfs_FVSNeedsSalvage 0x8 + +struct yfs_xdr_YFSFetchVolumeStatus { + struct yfs_xdr_u64 vid; + struct yfs_xdr_u64 parent_id; + __be32 flags; + __be32 type; + struct yfs_xdr_u64 max_quota; + struct yfs_xdr_u64 blocks_in_use; + struct yfs_xdr_u64 part_blocks_avail; + struct yfs_xdr_u64 part_max_blocks; + struct yfs_xdr_u64 vol_copy_date; + struct yfs_xdr_u64 vol_backup_date; +} __packed; + +struct yfs_xdr_YFSStoreVolumeStatus { + __be32 mask; + struct yfs_xdr_u64 min_quota; + struct yfs_xdr_u64 max_quota; + struct yfs_xdr_u64 file_quota; +} __packed; diff --git a/fs/afs/server.c b/fs/afs/server.c index 1a087eb8f2d7..aa35cfae5440 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -13,6 +13,7 @@ #include #include "afs_fs.h" #include "internal.h" +#include "protocol_yfs.h" static unsigned afs_server_gc_delay = 10; /* Server record timeout in seconds */ static unsigned afs_server_update_delay = 30; /* Time till VLDB recheck in secs */ @@ -513,6 +514,8 @@ void afs_purge_servers(struct afs_net *net) */ static bool afs_do_probe_fileserver(struct afs_fs_cursor *fc) { + int i; + _enter(""); fc->ac.addr = NULL; @@ -526,6 +529,11 @@ static bool afs_do_probe_fileserver(struct afs_fs_cursor *fc) &fc->ac, fc->key); switch (fc->ac.error) { case 0: + if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) { + for (i = 0; i < fc->ac.alist->nr_addrs; i++) + fc->ac.alist->addrs[i].srx_service = + YFS_FS_SERVICE; + } afs_end_cursor(&fc->ac); set_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags); return true; diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c new file mode 100644 index 000000000000..d5e3f0095040 --- /dev/null +++ b/fs/afs/yfsclient.c @@ -0,0 +1,2184 @@ +/* YFS File Server client stubs + * + * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include "internal.h" +#include "afs_fs.h" +#include "xdr_fs.h" +#include "protocol_yfs.h" + +static const struct afs_fid afs_zero_fid; + +static inline void afs_use_fs_server(struct afs_call *call, struct afs_cb_interest *cbi) +{ + call->cbi = afs_get_cb_interest(cbi); +} + +#define xdr_size(x) (sizeof(*x) / sizeof(__be32)) + +static void xdr_decode_YFSFid(const __be32 **_bp, struct afs_fid *fid) +{ + const struct yfs_xdr_YFSFid *x = (const void *)*_bp; + + fid->vid = xdr_to_u64(x->volume); + fid->vnode = xdr_to_u64(x->vnode.lo); + fid->vnode_hi = ntohl(x->vnode.hi); + fid->unique = ntohl(x->vnode.unique); + *_bp += xdr_size(x); +} + +static __be32 *xdr_encode_u32(__be32 *bp, u32 n) +{ + *bp++ = htonl(n); + return bp; +} + +static __be32 *xdr_encode_u64(__be32 *bp, u64 n) +{ + struct yfs_xdr_u64 *x = (void *)bp; + + *x = u64_to_xdr(n); + return bp + xdr_size(x); +} + +static __be32 *xdr_encode_YFSFid(__be32 *bp, struct afs_fid *fid) +{ + struct yfs_xdr_YFSFid *x = (void *)bp; + + x->volume = u64_to_xdr(fid->vid); + x->vnode.lo = u64_to_xdr(fid->vnode); + x->vnode.hi = htonl(fid->vnode_hi); + x->vnode.unique = htonl(fid->unique); + return bp + xdr_size(x); +} + +static size_t xdr_strlen(unsigned int len) +{ + return sizeof(__be32) + round_up(len, sizeof(__be32)); +} + +static __be32 *xdr_encode_string(__be32 *bp, const char *p, unsigned int len) +{ + bp = xdr_encode_u32(bp, len); + bp = memcpy(bp, p, len); + if (len & 3) { + unsigned int pad = 4 - (len & 3); + + memset((u8 *)bp + len, 0, pad); + len += pad; + } + + return bp + len / sizeof(__be32); +} + +static s64 linux_to_yfs_time(const struct timespec64 *t) +{ + /* Convert to 100ns intervals. */ + return (u64)t->tv_sec * 10000000 + t->tv_nsec/100; +} + +static __be32 *xdr_encode_YFSStoreStatus_mode(__be32 *bp, mode_t mode) +{ + struct yfs_xdr_YFSStoreStatus *x = (void *)bp; + + x->mask = htonl(AFS_SET_MODE); + x->mode = htonl(mode & S_IALLUGO); + x->mtime_client = u64_to_xdr(0); + x->owner = u64_to_xdr(0); + x->group = u64_to_xdr(0); + return bp + xdr_size(x); +} + +static __be32 *xdr_encode_YFSStoreStatus_mtime(__be32 *bp, const struct timespec64 *t) +{ + struct yfs_xdr_YFSStoreStatus *x = (void *)bp; + s64 mtime = linux_to_yfs_time(t); + + x->mask = htonl(AFS_SET_MTIME); + x->mode = htonl(0); + x->mtime_client = u64_to_xdr(mtime); + x->owner = u64_to_xdr(0); + x->group = u64_to_xdr(0); + return bp + xdr_size(x); +} + +/* + * Convert a signed 100ns-resolution 64-bit time into a timespec. + */ +static struct timespec64 yfs_time_to_linux(s64 t) +{ + struct timespec64 ts; + u64 abs_t; + + /* + * Unfortunately can not use normal 64 bit division on 32 bit arch, but + * the alternative, do_div, does not work with negative numbers so have + * to special case them + */ + if (t < 0) { + abs_t = -t; + ts.tv_nsec = (time64_t)(do_div(abs_t, 10000000) * 100); + ts.tv_nsec = -ts.tv_nsec; + ts.tv_sec = -abs_t; + } else { + abs_t = t; + ts.tv_nsec = (time64_t)do_div(abs_t, 10000000) * 100; + ts.tv_sec = abs_t; + } + + return ts; +} + +static struct timespec64 xdr_to_time(const struct yfs_xdr_u64 xdr) +{ + s64 t = xdr_to_u64(xdr); + + return yfs_time_to_linux(t); +} + +static void yfs_check_req(struct afs_call *call, __be32 *bp) +{ + size_t len = (void *)bp - call->request; + + if (len > call->request_size) + pr_err("kAFS: %s: Request buffer overflow (%zu>%u)\n", + call->type->name, len, call->request_size); + else if (len < call->request_size) + pr_warning("kAFS: %s: Request buffer underflow (%zu<%u)\n", + call->type->name, len, call->request_size); +} + +/* + * Dump a bad file status record. + */ +static void xdr_dump_bad(const __be32 *bp) +{ + __be32 x[4]; + int i; + + pr_notice("YFS XDR: Bad status record\n"); + for (i = 0; i < 5 * 4 * 4; i += 16) { + memcpy(x, bp, 16); + bp += 4; + pr_notice("%03x: %08x %08x %08x %08x\n", + i, ntohl(x[0]), ntohl(x[1]), ntohl(x[2]), ntohl(x[3])); + } + + memcpy(x, bp, 4); + pr_notice("0x50: %08x\n", ntohl(x[0])); +} + +/* + * Decode a YFSFetchStatus block + */ +static int xdr_decode_YFSFetchStatus(struct afs_call *call, + const __be32 **_bp, + struct afs_file_status *status, + struct afs_vnode *vnode, + const afs_dataversion_t *expected_version, + struct afs_read *read_req) +{ + const struct yfs_xdr_YFSFetchStatus *xdr = (const void *)*_bp; + u32 type; + u8 flags = 0; + + status->abort_code = ntohl(xdr->abort_code); + if (status->abort_code != 0) { + if (vnode && status->abort_code == VNOVNODE) { + set_bit(AFS_VNODE_DELETED, &vnode->flags); + status->nlink = 0; + __afs_break_callback(vnode); + } + return 0; + } + + type = ntohl(xdr->type); + switch (type) { + case AFS_FTYPE_FILE: + case AFS_FTYPE_DIR: + case AFS_FTYPE_SYMLINK: + if (type != status->type && + vnode && + !test_bit(AFS_VNODE_UNSET, &vnode->flags)) { + pr_warning("Vnode %llx:%llx:%x changed type %u to %u\n", + vnode->fid.vid, + vnode->fid.vnode, + vnode->fid.unique, + status->type, type); + goto bad; + } + status->type = type; + break; + default: + goto bad; + } + +#define EXTRACT_M4(FIELD) \ + do { \ + u32 x = ntohl(xdr->FIELD); \ + if (status->FIELD != x) { \ + flags |= AFS_VNODE_META_CHANGED; \ + status->FIELD = x; \ + } \ + } while (0) + +#define EXTRACT_M8(FIELD) \ + do { \ + u64 x = xdr_to_u64(xdr->FIELD); \ + if (status->FIELD != x) { \ + flags |= AFS_VNODE_META_CHANGED; \ + status->FIELD = x; \ + } \ + } while (0) + +#define EXTRACT_D8(FIELD) \ + do { \ + u64 x = xdr_to_u64(xdr->FIELD); \ + if (status->FIELD != x) { \ + flags |= AFS_VNODE_DATA_CHANGED; \ + status->FIELD = x; \ + } \ + } while (0) + + EXTRACT_M4(nlink); + EXTRACT_D8(size); + EXTRACT_D8(data_version); + EXTRACT_M8(author); + EXTRACT_M8(owner); + EXTRACT_M8(group); + EXTRACT_M4(mode); + EXTRACT_M4(caller_access); /* call ticket dependent */ + EXTRACT_M4(anon_access); + + status->mtime_client = xdr_to_time(xdr->mtime_client); + status->mtime_server = xdr_to_time(xdr->mtime_server); + status->lock_count = ntohl(xdr->lock_count); + + if (read_req) { + read_req->data_version = status->data_version; + read_req->file_size = status->size; + } + + *_bp += xdr_size(xdr); + + if (vnode) { + if (test_bit(AFS_VNODE_UNSET, &vnode->flags)) + flags |= AFS_VNODE_NOT_YET_SET; + afs_update_inode_from_status(vnode, status, expected_version, + flags); + } + + return 0; + +bad: + xdr_dump_bad(*_bp); + return afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status); +} + +/* + * Decode the file status. We need to lock the target vnode if we're going to + * update its status so that stat() sees the attributes update atomically. + */ +static int yfs_decode_status(struct afs_call *call, + const __be32 **_bp, + struct afs_file_status *status, + struct afs_vnode *vnode, + const afs_dataversion_t *expected_version, + struct afs_read *read_req) +{ + int ret; + + if (!vnode) + return xdr_decode_YFSFetchStatus(call, _bp, status, vnode, + expected_version, read_req); + + write_seqlock(&vnode->cb_lock); + ret = xdr_decode_YFSFetchStatus(call, _bp, status, vnode, + expected_version, read_req); + write_sequnlock(&vnode->cb_lock); + return ret; +} + +/* + * Decode a YFSCallBack block + */ +static void xdr_decode_YFSCallBack(struct afs_call *call, + struct afs_vnode *vnode, + const __be32 **_bp) +{ + struct yfs_xdr_YFSCallBack *xdr = (void *)*_bp; + struct afs_cb_interest *old, *cbi = call->cbi; + u64 cb_expiry; + + write_seqlock(&vnode->cb_lock); + + if (call->cb_break == afs_cb_break_sum(vnode, cbi)) { + cb_expiry = xdr_to_u64(xdr->expiration_time); + do_div(cb_expiry, 10 * 1000 * 1000); + vnode->cb_version = ntohl(xdr->version); + vnode->cb_type = ntohl(xdr->type); + vnode->cb_expires_at = cb_expiry + ktime_get_real_seconds(); + old = vnode->cb_interest; + if (old != call->cbi) { + vnode->cb_interest = cbi; + cbi = old; + } + set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); + } + + write_sequnlock(&vnode->cb_lock); + call->cbi = cbi; + *_bp += xdr_size(xdr); +} + +static void xdr_decode_YFSCallBack_raw(const __be32 **_bp, + struct afs_callback *cb) +{ + struct yfs_xdr_YFSCallBack *x = (void *)*_bp; + u64 cb_expiry; + + cb_expiry = xdr_to_u64(x->expiration_time); + do_div(cb_expiry, 10 * 1000 * 1000); + cb->version = ntohl(x->version); + cb->type = ntohl(x->type); + cb->expires_at = cb_expiry + ktime_get_real_seconds(); + + *_bp += xdr_size(x); +} + +/* + * Decode a YFSVolSync block + */ +static void xdr_decode_YFSVolSync(const __be32 **_bp, + struct afs_volsync *volsync) +{ + struct yfs_xdr_YFSVolSync *x = (void *)*_bp; + u64 creation; + + if (volsync) { + creation = xdr_to_u64(x->vol_creation_date); + do_div(creation, 10 * 1000 * 1000); + volsync->creation = creation; + } + + *_bp += xdr_size(x); +} + +/* + * Encode the requested attributes into a YFSStoreStatus block + */ +static __be32 *xdr_encode_YFS_StoreStatus(__be32 *bp, struct iattr *attr) +{ + struct yfs_xdr_YFSStoreStatus *x = (void *)bp; + s64 mtime = 0, owner = 0, group = 0; + u32 mask = 0, mode = 0; + + mask = 0; + if (attr->ia_valid & ATTR_MTIME) { + mask |= AFS_SET_MTIME; + mtime = linux_to_yfs_time(&attr->ia_mtime); + } + + if (attr->ia_valid & ATTR_UID) { + mask |= AFS_SET_OWNER; + owner = from_kuid(&init_user_ns, attr->ia_uid); + } + + if (attr->ia_valid & ATTR_GID) { + mask |= AFS_SET_GROUP; + group = from_kgid(&init_user_ns, attr->ia_gid); + } + + if (attr->ia_valid & ATTR_MODE) { + mask |= AFS_SET_MODE; + mode = attr->ia_mode & S_IALLUGO; + } + + x->mask = htonl(mask); + x->mode = htonl(mode); + x->mtime_client = u64_to_xdr(mtime); + x->owner = u64_to_xdr(owner); + x->group = u64_to_xdr(group); + return bp + xdr_size(x); +} + +/* + * Decode a YFSFetchVolumeStatus block. + */ +static void xdr_decode_YFSFetchVolumeStatus(const __be32 **_bp, + struct afs_volume_status *vs) +{ + const struct yfs_xdr_YFSFetchVolumeStatus *x = (const void *)*_bp; + u32 flags; + + vs->vid = xdr_to_u64(x->vid); + vs->parent_id = xdr_to_u64(x->parent_id); + flags = ntohl(x->flags); + vs->online = flags & yfs_FVSOnline; + vs->in_service = flags & yfs_FVSInservice; + vs->blessed = flags & yfs_FVSBlessed; + vs->needs_salvage = flags & yfs_FVSNeedsSalvage; + vs->type = ntohl(x->type); + vs->min_quota = 0; + vs->max_quota = xdr_to_u64(x->max_quota); + vs->blocks_in_use = xdr_to_u64(x->blocks_in_use); + vs->part_blocks_avail = xdr_to_u64(x->part_blocks_avail); + vs->part_max_blocks = xdr_to_u64(x->part_max_blocks); + vs->vol_copy_date = xdr_to_u64(x->vol_copy_date); + vs->vol_backup_date = xdr_to_u64(x->vol_backup_date); + *_bp += sizeof(*x) / sizeof(__be32); +} + +/* + * deliver reply data to an FS.FetchStatus + */ +static int yfs_deliver_fs_fetch_status_vnode(struct afs_call *call) +{ + struct afs_vnode *vnode = call->reply[0]; + const __be32 *bp; + int ret; + + ret = afs_transfer_reply(call); + if (ret < 0) + return ret; + + _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode); + + /* unmarshall the reply once we've received all of it */ + bp = call->buffer; + ret = yfs_decode_status(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL); + if (ret < 0) + return ret; + xdr_decode_YFSCallBack(call, vnode, &bp); + xdr_decode_YFSVolSync(&bp, call->reply[1]); + + _leave(" = 0 [done]"); + return 0; +} + +/* + * YFS.FetchStatus operation type + */ +static const struct afs_call_type yfs_RXYFSFetchStatus_vnode = { + .name = "YFS.FetchStatus(vnode)", + .op = yfs_FS_FetchStatus, + .deliver = yfs_deliver_fs_fetch_status_vnode, + .destructor = afs_flat_call_destructor, +}; + +/* + * Fetch the status information for a file. + */ +int yfs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsync, + bool new_inode) +{ + struct afs_vnode *vnode = fc->vnode; + struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); + __be32 *bp; + + _enter(",%x,{%llx:%llu},,", + key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); + + call = afs_alloc_flat_call(net, &yfs_RXYFSFetchStatus_vnode, + sizeof(__be32) * 2 + + sizeof(struct yfs_xdr_YFSFid), + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSCallBack) + + sizeof(struct yfs_xdr_YFSVolSync)); + if (!call) { + fc->ac.error = -ENOMEM; + return -ENOMEM; + } + + call->key = fc->key; + call->reply[0] = vnode; + call->reply[1] = volsync; + call->expected_version = new_inode ? 1 : vnode->status.data_version; + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSFETCHSTATUS); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &vnode->fid); + yfs_check_req(call, bp); + + call->cb_break = fc->cb_break; + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); +} + +/* + * Deliver reply data to an YFS.FetchData64. + */ +static int yfs_deliver_fs_fetch_data64(struct afs_call *call) +{ + struct afs_vnode *vnode = call->reply[0]; + struct afs_read *req = call->reply[2]; + const __be32 *bp; + unsigned int size; + int ret; + + _enter("{%u,%zu/%llu}", + call->unmarshall, iov_iter_count(&call->iter), req->actual_len); + + switch (call->unmarshall) { + case 0: + req->actual_len = 0; + req->index = 0; + req->offset = req->pos & (PAGE_SIZE - 1); + afs_extract_to_tmp64(call); + call->unmarshall++; + + /* extract the returned data length */ + case 1: + _debug("extract data length"); + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + req->actual_len = be64_to_cpu(call->tmp64); + _debug("DATA length: %llu", req->actual_len); + req->remain = min(req->len, req->actual_len); + if (req->remain == 0) + goto no_more_data; + + call->unmarshall++; + + begin_page: + ASSERTCMP(req->index, <, req->nr_pages); + if (req->remain > PAGE_SIZE - req->offset) + size = PAGE_SIZE - req->offset; + else + size = req->remain; + call->bvec[0].bv_len = size; + call->bvec[0].bv_offset = req->offset; + call->bvec[0].bv_page = req->pages[req->index]; + iov_iter_bvec(&call->iter, READ, call->bvec, 1, size); + ASSERTCMP(size, <=, PAGE_SIZE); + + /* extract the returned data */ + case 2: + _debug("extract data %zu/%llu", + iov_iter_count(&call->iter), req->remain); + + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + req->remain -= call->bvec[0].bv_len; + req->offset += call->bvec[0].bv_len; + ASSERTCMP(req->offset, <=, PAGE_SIZE); + if (req->offset == PAGE_SIZE) { + req->offset = 0; + if (req->page_done) + req->page_done(call, req); + req->index++; + if (req->remain > 0) + goto begin_page; + } + + ASSERTCMP(req->remain, ==, 0); + if (req->actual_len <= req->len) + goto no_more_data; + + /* Discard any excess data the server gave us */ + iov_iter_discard(&call->iter, READ, req->actual_len - req->len); + call->unmarshall = 3; + case 3: + _debug("extract discard %zu/%llu", + iov_iter_count(&call->iter), req->actual_len - req->len); + + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + no_more_data: + call->unmarshall = 4; + afs_extract_to_buf(call, + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSCallBack) + + sizeof(struct yfs_xdr_YFSVolSync)); + + /* extract the metadata */ + case 4: + ret = afs_extract_data(call, false); + if (ret < 0) + return ret; + + bp = call->buffer; + ret = yfs_decode_status(call, &bp, &vnode->status, vnode, + &vnode->status.data_version, req); + if (ret < 0) + return ret; + xdr_decode_YFSCallBack(call, vnode, &bp); + xdr_decode_YFSVolSync(&bp, call->reply[1]); + + call->unmarshall++; + + case 5: + break; + } + + for (; req->index < req->nr_pages; req->index++) { + if (req->offset < PAGE_SIZE) + zero_user_segment(req->pages[req->index], + req->offset, PAGE_SIZE); + if (req->page_done) + req->page_done(call, req); + req->offset = 0; + } + + _leave(" = 0 [done]"); + return 0; +} + +static void yfs_fetch_data_destructor(struct afs_call *call) +{ + struct afs_read *req = call->reply[2]; + + afs_put_read(req); + afs_flat_call_destructor(call); +} + +/* + * YFS.FetchData64 operation type + */ +static const struct afs_call_type yfs_RXYFSFetchData64 = { + .name = "YFS.FetchData64", + .op = yfs_FS_FetchData64, + .deliver = yfs_deliver_fs_fetch_data64, + .destructor = yfs_fetch_data_destructor, +}; + +/* + * Fetch data from a file. + */ +int yfs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req) +{ + struct afs_vnode *vnode = fc->vnode; + struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); + __be32 *bp; + + _enter(",%x,{%llx:%llu},%llx,%llx", + key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode, + req->pos, req->len); + + call = afs_alloc_flat_call(net, &yfs_RXYFSFetchData64, + sizeof(__be32) * 2 + + sizeof(struct yfs_xdr_YFSFid) + + sizeof(struct yfs_xdr_u64) * 2, + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSCallBack) + + sizeof(struct yfs_xdr_YFSVolSync)); + if (!call) + return -ENOMEM; + + call->key = fc->key; + call->reply[0] = vnode; + call->reply[1] = NULL; /* volsync */ + call->reply[2] = req; + call->expected_version = vnode->status.data_version; + call->want_reply_time = true; + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSFETCHDATA64); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &vnode->fid); + bp = xdr_encode_u64(bp, req->pos); + bp = xdr_encode_u64(bp, req->len); + yfs_check_req(call, bp); + + refcount_inc(&req->usage); + call->cb_break = fc->cb_break; + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); +} + +/* + * Deliver reply data for YFS.CreateFile or YFS.MakeDir. + */ +static int yfs_deliver_fs_create_vnode(struct afs_call *call) +{ + struct afs_vnode *vnode = call->reply[0]; + const __be32 *bp; + int ret; + + _enter("{%u}", call->unmarshall); + + ret = afs_transfer_reply(call); + if (ret < 0) + return ret; + + /* unmarshall the reply once we've received all of it */ + bp = call->buffer; + xdr_decode_YFSFid(&bp, call->reply[1]); + ret = yfs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL); + if (ret < 0) + return ret; + ret = yfs_decode_status(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL); + if (ret < 0) + return ret; + xdr_decode_YFSCallBack_raw(&bp, call->reply[3]); + xdr_decode_YFSVolSync(&bp, NULL); + + _leave(" = 0 [done]"); + return 0; +} + +/* + * FS.CreateFile and FS.MakeDir operation type + */ +static const struct afs_call_type afs_RXFSCreateFile = { + .name = "YFS.CreateFile", + .op = yfs_FS_CreateFile, + .deliver = yfs_deliver_fs_create_vnode, + .destructor = afs_flat_call_destructor, +}; + +/* + * Create a file. + */ +int yfs_fs_create_file(struct afs_fs_cursor *fc, + const char *name, + umode_t mode, + u64 current_data_version, + struct afs_fid *newfid, + struct afs_file_status *newstatus, + struct afs_callback *newcb) +{ + struct afs_vnode *vnode = fc->vnode; + struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); + size_t namesz, reqsz, rplsz; + __be32 *bp; + + _enter(""); + + namesz = strlen(name); + reqsz = (sizeof(__be32) + + sizeof(__be32) + + sizeof(struct yfs_xdr_YFSFid) + + xdr_strlen(namesz) + + sizeof(struct yfs_xdr_YFSStoreStatus) + + sizeof(__be32)); + rplsz = (sizeof(struct yfs_xdr_YFSFid) + + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSCallBack) + + sizeof(struct yfs_xdr_YFSVolSync)); + + call = afs_alloc_flat_call(net, &afs_RXFSCreateFile, reqsz, rplsz); + if (!call) + return -ENOMEM; + + call->key = fc->key; + call->reply[0] = vnode; + call->reply[1] = newfid; + call->reply[2] = newstatus; + call->reply[3] = newcb; + call->expected_version = current_data_version + 1; + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSCREATEFILE); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &vnode->fid); + bp = xdr_encode_string(bp, name, namesz); + bp = xdr_encode_YFSStoreStatus_mode(bp, mode); + bp = xdr_encode_u32(bp, 0); /* ViceLockType */ + yfs_check_req(call, bp); + + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); +} + +static const struct afs_call_type yfs_RXFSMakeDir = { + .name = "YFS.MakeDir", + .op = yfs_FS_MakeDir, + .deliver = yfs_deliver_fs_create_vnode, + .destructor = afs_flat_call_destructor, +}; + +/* + * Make a directory. + */ +int yfs_fs_make_dir(struct afs_fs_cursor *fc, + const char *name, + umode_t mode, + u64 current_data_version, + struct afs_fid *newfid, + struct afs_file_status *newstatus, + struct afs_callback *newcb) +{ + struct afs_vnode *vnode = fc->vnode; + struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); + size_t namesz, reqsz, rplsz; + __be32 *bp; + + _enter(""); + + namesz = strlen(name); + reqsz = (sizeof(__be32) + + sizeof(struct yfs_xdr_RPCFlags) + + sizeof(struct yfs_xdr_YFSFid) + + xdr_strlen(namesz) + + sizeof(struct yfs_xdr_YFSStoreStatus)); + rplsz = (sizeof(struct yfs_xdr_YFSFid) + + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSCallBack) + + sizeof(struct yfs_xdr_YFSVolSync)); + + call = afs_alloc_flat_call(net, &yfs_RXFSMakeDir, reqsz, rplsz); + if (!call) + return -ENOMEM; + + call->key = fc->key; + call->reply[0] = vnode; + call->reply[1] = newfid; + call->reply[2] = newstatus; + call->reply[3] = newcb; + call->expected_version = current_data_version + 1; + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSMAKEDIR); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &vnode->fid); + bp = xdr_encode_string(bp, name, namesz); + bp = xdr_encode_YFSStoreStatus_mode(bp, mode); + yfs_check_req(call, bp); + + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); +} + +/* + * Deliver reply data to a YFS.RemoveFile2 operation. + */ +static int yfs_deliver_fs_remove_file2(struct afs_call *call) +{ + struct afs_vnode *dvnode = call->reply[0]; + struct afs_vnode *vnode = call->reply[1]; + struct afs_fid fid; + const __be32 *bp; + int ret; + + _enter("{%u}", call->unmarshall); + + ret = afs_transfer_reply(call); + if (ret < 0) + return ret; + + /* unmarshall the reply once we've received all of it */ + bp = call->buffer; + ret = yfs_decode_status(call, &bp, &dvnode->status, dvnode, + &call->expected_version, NULL); + if (ret < 0) + return ret; + + xdr_decode_YFSFid(&bp, &fid); + ret = yfs_decode_status(call, &bp, &vnode->status, vnode, NULL, NULL); + if (ret < 0) + return ret; + /* Was deleted if vnode->status.abort_code == VNOVNODE. */ + + xdr_decode_YFSVolSync(&bp, NULL); + return 0; +} + +/* + * YFS.RemoveFile2 operation type. + */ +static const struct afs_call_type yfs_RXYFSRemoveFile2 = { + .name = "YFS.RemoveFile2", + .op = yfs_FS_RemoveFile2, + .deliver = yfs_deliver_fs_remove_file2, + .destructor = afs_flat_call_destructor, +}; + +/* + * Remove a file and retrieve new file status. + */ +int yfs_fs_remove_file2(struct afs_fs_cursor *fc, struct afs_vnode *vnode, + const char *name, u64 current_data_version) +{ + struct afs_vnode *dvnode = fc->vnode; + struct afs_call *call; + struct afs_net *net = afs_v2net(dvnode); + size_t namesz; + __be32 *bp; + + _enter(""); + + namesz = strlen(name); + + call = afs_alloc_flat_call(net, &yfs_RXYFSRemoveFile2, + sizeof(__be32) + + sizeof(struct yfs_xdr_RPCFlags) + + sizeof(struct yfs_xdr_YFSFid) + + xdr_strlen(namesz), + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSFid) + + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSVolSync)); + if (!call) + return -ENOMEM; + + call->key = fc->key; + call->reply[0] = dvnode; + call->reply[1] = vnode; + call->expected_version = current_data_version + 1; + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSREMOVEFILE2); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &dvnode->fid); + bp = xdr_encode_string(bp, name, namesz); + yfs_check_req(call, bp); + + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &dvnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); +} + +/* + * Deliver reply data to a YFS.RemoveFile or YFS.RemoveDir operation. + */ +static int yfs_deliver_fs_remove(struct afs_call *call) +{ + struct afs_vnode *dvnode = call->reply[0]; + const __be32 *bp; + int ret; + + _enter("{%u}", call->unmarshall); + + ret = afs_transfer_reply(call); + if (ret < 0) + return ret; + + /* unmarshall the reply once we've received all of it */ + bp = call->buffer; + ret = yfs_decode_status(call, &bp, &dvnode->status, dvnode, + &call->expected_version, NULL); + if (ret < 0) + return ret; + + xdr_decode_YFSVolSync(&bp, NULL); + return 0; +} + +/* + * FS.RemoveDir and FS.RemoveFile operation types. + */ +static const struct afs_call_type yfs_RXYFSRemoveFile = { + .name = "YFS.RemoveFile", + .op = yfs_FS_RemoveFile, + .deliver = yfs_deliver_fs_remove, + .destructor = afs_flat_call_destructor, +}; + +static const struct afs_call_type yfs_RXYFSRemoveDir = { + .name = "YFS.RemoveDir", + .op = yfs_FS_RemoveDir, + .deliver = yfs_deliver_fs_remove, + .destructor = afs_flat_call_destructor, +}; + +/* + * remove a file or directory + */ +int yfs_fs_remove(struct afs_fs_cursor *fc, struct afs_vnode *vnode, + const char *name, bool isdir, u64 current_data_version) +{ + struct afs_vnode *dvnode = fc->vnode; + struct afs_call *call; + struct afs_net *net = afs_v2net(dvnode); + size_t namesz; + __be32 *bp; + + _enter(""); + + namesz = strlen(name); + call = afs_alloc_flat_call( + net, isdir ? &yfs_RXYFSRemoveDir : &yfs_RXYFSRemoveFile, + sizeof(__be32) + + sizeof(struct yfs_xdr_RPCFlags) + + sizeof(struct yfs_xdr_YFSFid) + + xdr_strlen(namesz), + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSVolSync)); + if (!call) + return -ENOMEM; + + call->key = fc->key; + call->reply[0] = dvnode; + call->reply[1] = vnode; + call->expected_version = current_data_version + 1; + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, isdir ? YFSREMOVEDIR : YFSREMOVEFILE); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &dvnode->fid); + bp = xdr_encode_string(bp, name, namesz); + yfs_check_req(call, bp); + + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &dvnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); +} + +/* + * Deliver reply data to a YFS.Link operation. + */ +static int yfs_deliver_fs_link(struct afs_call *call) +{ + struct afs_vnode *dvnode = call->reply[0], *vnode = call->reply[1]; + const __be32 *bp; + int ret; + + _enter("{%u}", call->unmarshall); + + ret = afs_transfer_reply(call); + if (ret < 0) + return ret; + + /* unmarshall the reply once we've received all of it */ + bp = call->buffer; + ret = yfs_decode_status(call, &bp, &vnode->status, vnode, NULL, NULL); + if (ret < 0) + return ret; + ret = yfs_decode_status(call, &bp, &dvnode->status, dvnode, + &call->expected_version, NULL); + if (ret < 0) + return ret; + xdr_decode_YFSVolSync(&bp, NULL); + _leave(" = 0 [done]"); + return 0; +} + +/* + * YFS.Link operation type. + */ +static const struct afs_call_type yfs_RXYFSLink = { + .name = "YFS.Link", + .op = yfs_FS_Link, + .deliver = yfs_deliver_fs_link, + .destructor = afs_flat_call_destructor, +}; + +/* + * Make a hard link. + */ +int yfs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode, + const char *name, u64 current_data_version) +{ + struct afs_vnode *dvnode = fc->vnode; + struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); + size_t namesz; + __be32 *bp; + + _enter(""); + + namesz = strlen(name); + call = afs_alloc_flat_call(net, &yfs_RXYFSLink, + sizeof(__be32) + + sizeof(struct yfs_xdr_RPCFlags) + + sizeof(struct yfs_xdr_YFSFid) + + xdr_strlen(namesz) + + sizeof(struct yfs_xdr_YFSFid), + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSVolSync)); + if (!call) + return -ENOMEM; + + call->key = fc->key; + call->reply[0] = dvnode; + call->reply[1] = vnode; + call->expected_version = current_data_version + 1; + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSLINK); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &dvnode->fid); + bp = xdr_encode_string(bp, name, namesz); + bp = xdr_encode_YFSFid(bp, &vnode->fid); + yfs_check_req(call, bp); + + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); +} + +/* + * Deliver reply data to a YFS.Symlink operation. + */ +static int yfs_deliver_fs_symlink(struct afs_call *call) +{ + struct afs_vnode *vnode = call->reply[0]; + const __be32 *bp; + int ret; + + _enter("{%u}", call->unmarshall); + + ret = afs_transfer_reply(call); + if (ret < 0) + return ret; + + /* unmarshall the reply once we've received all of it */ + bp = call->buffer; + xdr_decode_YFSFid(&bp, call->reply[1]); + ret = yfs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL); + if (ret < 0) + return ret; + ret = yfs_decode_status(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL); + if (ret < 0) + return ret; + xdr_decode_YFSVolSync(&bp, NULL); + + _leave(" = 0 [done]"); + return 0; +} + +/* + * YFS.Symlink operation type + */ +static const struct afs_call_type yfs_RXYFSSymlink = { + .name = "YFS.Symlink", + .op = yfs_FS_Symlink, + .deliver = yfs_deliver_fs_symlink, + .destructor = afs_flat_call_destructor, +}; + +/* + * Create a symbolic link. + */ +int yfs_fs_symlink(struct afs_fs_cursor *fc, + const char *name, + const char *contents, + u64 current_data_version, + struct afs_fid *newfid, + struct afs_file_status *newstatus) +{ + struct afs_vnode *dvnode = fc->vnode; + struct afs_call *call; + struct afs_net *net = afs_v2net(dvnode); + size_t namesz, contents_sz; + __be32 *bp; + + _enter(""); + + namesz = strlen(name); + contents_sz = strlen(contents); + call = afs_alloc_flat_call(net, &yfs_RXYFSSymlink, + sizeof(__be32) + + sizeof(struct yfs_xdr_RPCFlags) + + sizeof(struct yfs_xdr_YFSFid) + + xdr_strlen(namesz) + + xdr_strlen(contents_sz) + + sizeof(struct yfs_xdr_YFSStoreStatus), + sizeof(struct yfs_xdr_YFSFid) + + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSVolSync)); + if (!call) + return -ENOMEM; + + call->key = fc->key; + call->reply[0] = dvnode; + call->reply[1] = newfid; + call->reply[2] = newstatus; + call->expected_version = current_data_version + 1; + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSSYMLINK); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &dvnode->fid); + bp = xdr_encode_string(bp, name, namesz); + bp = xdr_encode_string(bp, contents, contents_sz); + bp = xdr_encode_YFSStoreStatus_mode(bp, S_IRWXUGO); + yfs_check_req(call, bp); + + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &dvnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); +} + +/* + * Deliver reply data to a YFS.Rename operation. + */ +static int yfs_deliver_fs_rename(struct afs_call *call) +{ + struct afs_vnode *orig_dvnode = call->reply[0]; + struct afs_vnode *new_dvnode = call->reply[1]; + const __be32 *bp; + int ret; + + _enter("{%u}", call->unmarshall); + + ret = afs_transfer_reply(call); + if (ret < 0) + return ret; + + /* unmarshall the reply once we've received all of it */ + bp = call->buffer; + ret = yfs_decode_status(call, &bp, &orig_dvnode->status, orig_dvnode, + &call->expected_version, NULL); + if (ret < 0) + return ret; + if (new_dvnode != orig_dvnode) { + ret = yfs_decode_status(call, &bp, &new_dvnode->status, new_dvnode, + &call->expected_version_2, NULL); + if (ret < 0) + return ret; + } + + xdr_decode_YFSVolSync(&bp, NULL); + _leave(" = 0 [done]"); + return 0; +} + +/* + * YFS.Rename operation type + */ +static const struct afs_call_type yfs_RXYFSRename = { + .name = "FS.Rename", + .op = yfs_FS_Rename, + .deliver = yfs_deliver_fs_rename, + .destructor = afs_flat_call_destructor, +}; + +/* + * Rename a file or directory. + */ +int yfs_fs_rename(struct afs_fs_cursor *fc, + const char *orig_name, + struct afs_vnode *new_dvnode, + const char *new_name, + u64 current_orig_data_version, + u64 current_new_data_version) +{ + struct afs_vnode *orig_dvnode = fc->vnode; + struct afs_call *call; + struct afs_net *net = afs_v2net(orig_dvnode); + size_t o_namesz, n_namesz; + __be32 *bp; + + _enter(""); + + o_namesz = strlen(orig_name); + n_namesz = strlen(new_name); + call = afs_alloc_flat_call(net, &yfs_RXYFSRename, + sizeof(__be32) + + sizeof(struct yfs_xdr_RPCFlags) + + sizeof(struct yfs_xdr_YFSFid) + + xdr_strlen(o_namesz) + + sizeof(struct yfs_xdr_YFSFid) + + xdr_strlen(n_namesz), + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSVolSync)); + if (!call) + return -ENOMEM; + + call->key = fc->key; + call->reply[0] = orig_dvnode; + call->reply[1] = new_dvnode; + call->expected_version = current_orig_data_version + 1; + call->expected_version_2 = current_new_data_version + 1; + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSRENAME); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &orig_dvnode->fid); + bp = xdr_encode_string(bp, orig_name, o_namesz); + bp = xdr_encode_YFSFid(bp, &new_dvnode->fid); + bp = xdr_encode_string(bp, new_name, n_namesz); + yfs_check_req(call, bp); + + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &orig_dvnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); +} + +/* + * Deliver reply data to a YFS.StoreData64 operation. + */ +static int yfs_deliver_fs_store_data(struct afs_call *call) +{ + struct afs_vnode *vnode = call->reply[0]; + const __be32 *bp; + int ret; + + _enter(""); + + ret = afs_transfer_reply(call); + if (ret < 0) + return ret; + + /* unmarshall the reply once we've received all of it */ + bp = call->buffer; + ret = yfs_decode_status(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL); + if (ret < 0) + return ret; + xdr_decode_YFSVolSync(&bp, NULL); + + afs_pages_written_back(vnode, call); + + _leave(" = 0 [done]"); + return 0; +} + +/* + * YFS.StoreData64 operation type. + */ +static const struct afs_call_type yfs_RXYFSStoreData64 = { + .name = "YFS.StoreData64", + .op = yfs_FS_StoreData64, + .deliver = yfs_deliver_fs_store_data, + .destructor = afs_flat_call_destructor, +}; + +/* + * Store a set of pages to a large file. + */ +int yfs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping, + pgoff_t first, pgoff_t last, + unsigned offset, unsigned to) +{ + struct afs_vnode *vnode = fc->vnode; + struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); + loff_t size, pos, i_size; + __be32 *bp; + + _enter(",%x,{%llx:%llu},,", + key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); + + size = (loff_t)to - (loff_t)offset; + if (first != last) + size += (loff_t)(last - first) << PAGE_SHIFT; + pos = (loff_t)first << PAGE_SHIFT; + pos += offset; + + i_size = i_size_read(&vnode->vfs_inode); + if (pos + size > i_size) + i_size = size + pos; + + _debug("size %llx, at %llx, i_size %llx", + (unsigned long long)size, (unsigned long long)pos, + (unsigned long long)i_size); + + call = afs_alloc_flat_call(net, &yfs_RXYFSStoreData64, + sizeof(__be32) + + sizeof(__be32) + + sizeof(struct yfs_xdr_YFSFid) + + sizeof(struct yfs_xdr_YFSStoreStatus) + + sizeof(struct yfs_xdr_u64) * 3, + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSVolSync)); + if (!call) + return -ENOMEM; + + call->key = fc->key; + call->mapping = mapping; + call->reply[0] = vnode; + call->first = first; + call->last = last; + call->first_offset = offset; + call->last_to = to; + call->send_pages = true; + call->expected_version = vnode->status.data_version + 1; + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSSTOREDATA64); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &vnode->fid); + bp = xdr_encode_YFSStoreStatus_mtime(bp, &vnode->vfs_inode.i_mtime); + bp = xdr_encode_u64(bp, pos); + bp = xdr_encode_u64(bp, size); + bp = xdr_encode_u64(bp, i_size); + yfs_check_req(call, bp); + + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); +} + +/* + * deliver reply data to an FS.StoreStatus + */ +static int yfs_deliver_fs_store_status(struct afs_call *call) +{ + struct afs_vnode *vnode = call->reply[0]; + const __be32 *bp; + int ret; + + _enter(""); + + ret = afs_transfer_reply(call); + if (ret < 0) + return ret; + + /* unmarshall the reply once we've received all of it */ + bp = call->buffer; + ret = yfs_decode_status(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL); + if (ret < 0) + return ret; + xdr_decode_YFSVolSync(&bp, NULL); + + _leave(" = 0 [done]"); + return 0; +} + +/* + * YFS.StoreStatus operation type + */ +static const struct afs_call_type yfs_RXYFSStoreStatus = { + .name = "YFS.StoreStatus", + .op = yfs_FS_StoreStatus, + .deliver = yfs_deliver_fs_store_status, + .destructor = afs_flat_call_destructor, +}; + +static const struct afs_call_type yfs_RXYFSStoreData64_as_Status = { + .name = "YFS.StoreData64", + .op = yfs_FS_StoreData64, + .deliver = yfs_deliver_fs_store_status, + .destructor = afs_flat_call_destructor, +}; + +/* + * Set the attributes on a file, using YFS.StoreData64 rather than + * YFS.StoreStatus so as to alter the file size also. + */ +static int yfs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr) +{ + struct afs_vnode *vnode = fc->vnode; + struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); + __be32 *bp; + + _enter(",%x,{%llx:%llu},,", + key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); + + call = afs_alloc_flat_call(net, &yfs_RXYFSStoreData64_as_Status, + sizeof(__be32) * 2 + + sizeof(struct yfs_xdr_YFSFid) + + sizeof(struct yfs_xdr_YFSStoreStatus) + + sizeof(struct yfs_xdr_u64) * 3, + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSVolSync)); + if (!call) + return -ENOMEM; + + call->key = fc->key; + call->reply[0] = vnode; + call->expected_version = vnode->status.data_version + 1; + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSSTOREDATA64); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &vnode->fid); + bp = xdr_encode_YFS_StoreStatus(bp, attr); + bp = xdr_encode_u64(bp, 0); /* position of start of write */ + bp = xdr_encode_u64(bp, 0); /* size of write */ + bp = xdr_encode_u64(bp, attr->ia_size); /* new file length */ + yfs_check_req(call, bp); + + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); +} + +/* + * Set the attributes on a file, using YFS.StoreData64 if there's a change in + * file size, and YFS.StoreStatus otherwise. + */ +int yfs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr) +{ + struct afs_vnode *vnode = fc->vnode; + struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); + __be32 *bp; + + if (attr->ia_valid & ATTR_SIZE) + return yfs_fs_setattr_size(fc, attr); + + _enter(",%x,{%llx:%llu},,", + key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); + + call = afs_alloc_flat_call(net, &yfs_RXYFSStoreStatus, + sizeof(__be32) * 2 + + sizeof(struct yfs_xdr_YFSFid) + + sizeof(struct yfs_xdr_YFSStoreStatus), + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSVolSync)); + if (!call) + return -ENOMEM; + + call->key = fc->key; + call->reply[0] = vnode; + call->expected_version = vnode->status.data_version; + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSSTORESTATUS); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &vnode->fid); + bp = xdr_encode_YFS_StoreStatus(bp, attr); + yfs_check_req(call, bp); + + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); +} + +/* + * Deliver reply data to a YFS.GetVolumeStatus operation. + */ +static int yfs_deliver_fs_get_volume_status(struct afs_call *call) +{ + const __be32 *bp; + char *p; + u32 size; + int ret; + + _enter("{%u}", call->unmarshall); + + switch (call->unmarshall) { + case 0: + call->unmarshall++; + afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSFetchVolumeStatus)); + + /* extract the returned status record */ + case 1: + _debug("extract status"); + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + bp = call->buffer; + xdr_decode_YFSFetchVolumeStatus(&bp, call->reply[1]); + call->unmarshall++; + afs_extract_to_tmp(call); + + /* extract the volume name length */ + case 2: + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + call->count = ntohl(call->tmp); + _debug("volname length: %u", call->count); + if (call->count >= AFSNAMEMAX) + return afs_protocol_error(call, -EBADMSG, + afs_eproto_volname_len); + size = (call->count + 3) & ~3; /* It's padded */ + afs_extract_begin(call, call->reply[2], size); + call->unmarshall++; + + /* extract the volume name */ + case 3: + _debug("extract volname"); + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + p = call->reply[2]; + p[call->count] = 0; + _debug("volname '%s'", p); + afs_extract_to_tmp(call); + call->unmarshall++; + + /* extract the offline message length */ + case 4: + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + call->count = ntohl(call->tmp); + _debug("offline msg length: %u", call->count); + if (call->count >= AFSNAMEMAX) + return afs_protocol_error(call, -EBADMSG, + afs_eproto_offline_msg_len); + size = (call->count + 3) & ~3; /* It's padded */ + afs_extract_begin(call, call->reply[2], size); + call->unmarshall++; + + /* extract the offline message */ + case 5: + _debug("extract offline"); + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + p = call->reply[2]; + p[call->count] = 0; + _debug("offline '%s'", p); + + afs_extract_to_tmp(call); + call->unmarshall++; + + /* extract the message of the day length */ + case 6: + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + call->count = ntohl(call->tmp); + _debug("motd length: %u", call->count); + if (call->count >= AFSNAMEMAX) + return afs_protocol_error(call, -EBADMSG, + afs_eproto_motd_len); + size = (call->count + 3) & ~3; /* It's padded */ + afs_extract_begin(call, call->reply[2], size); + call->unmarshall++; + + /* extract the message of the day */ + case 7: + _debug("extract motd"); + ret = afs_extract_data(call, false); + if (ret < 0) + return ret; + + p = call->reply[2]; + p[call->count] = 0; + _debug("motd '%s'", p); + + call->unmarshall++; + + case 8: + break; + } + + _leave(" = 0 [done]"); + return 0; +} + +/* + * Destroy a YFS.GetVolumeStatus call. + */ +static void yfs_get_volume_status_call_destructor(struct afs_call *call) +{ + kfree(call->reply[2]); + call->reply[2] = NULL; + afs_flat_call_destructor(call); +} + +/* + * YFS.GetVolumeStatus operation type + */ +static const struct afs_call_type yfs_RXYFSGetVolumeStatus = { + .name = "YFS.GetVolumeStatus", + .op = yfs_FS_GetVolumeStatus, + .deliver = yfs_deliver_fs_get_volume_status, + .destructor = yfs_get_volume_status_call_destructor, +}; + +/* + * fetch the status of a volume + */ +int yfs_fs_get_volume_status(struct afs_fs_cursor *fc, + struct afs_volume_status *vs) +{ + struct afs_vnode *vnode = fc->vnode; + struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); + __be32 *bp; + void *tmpbuf; + + _enter(""); + + tmpbuf = kmalloc(AFSOPAQUEMAX, GFP_KERNEL); + if (!tmpbuf) + return -ENOMEM; + + call = afs_alloc_flat_call(net, &yfs_RXYFSGetVolumeStatus, + sizeof(__be32) * 2 + + sizeof(struct yfs_xdr_u64), + sizeof(struct yfs_xdr_YFSFetchVolumeStatus) + + sizeof(__be32)); + if (!call) { + kfree(tmpbuf); + return -ENOMEM; + } + + call->key = fc->key; + call->reply[0] = vnode; + call->reply[1] = vs; + call->reply[2] = tmpbuf; + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSGETVOLUMESTATUS); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_u64(bp, vnode->fid.vid); + yfs_check_req(call, bp); + + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); +} + +/* + * Deliver reply data to an YFS.SetLock, YFS.ExtendLock or YFS.ReleaseLock + */ +static int yfs_deliver_fs_xxxx_lock(struct afs_call *call) +{ + struct afs_vnode *vnode = call->reply[0]; + const __be32 *bp; + int ret; + + _enter("{%u}", call->unmarshall); + + ret = afs_transfer_reply(call); + if (ret < 0) + return ret; + + /* unmarshall the reply once we've received all of it */ + bp = call->buffer; + ret = yfs_decode_status(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL); + if (ret < 0) + return ret; + xdr_decode_YFSVolSync(&bp, NULL); + + _leave(" = 0 [done]"); + return 0; +} + +/* + * YFS.SetLock operation type + */ +static const struct afs_call_type yfs_RXYFSSetLock = { + .name = "YFS.SetLock", + .op = yfs_FS_SetLock, + .deliver = yfs_deliver_fs_xxxx_lock, + .destructor = afs_flat_call_destructor, +}; + +/* + * YFS.ExtendLock operation type + */ +static const struct afs_call_type yfs_RXYFSExtendLock = { + .name = "YFS.ExtendLock", + .op = yfs_FS_ExtendLock, + .deliver = yfs_deliver_fs_xxxx_lock, + .destructor = afs_flat_call_destructor, +}; + +/* + * YFS.ReleaseLock operation type + */ +static const struct afs_call_type yfs_RXYFSReleaseLock = { + .name = "YFS.ReleaseLock", + .op = yfs_FS_ReleaseLock, + .deliver = yfs_deliver_fs_xxxx_lock, + .destructor = afs_flat_call_destructor, +}; + +/* + * Set a lock on a file + */ +int yfs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type) +{ + struct afs_vnode *vnode = fc->vnode; + struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); + __be32 *bp; + + _enter(""); + + call = afs_alloc_flat_call(net, &yfs_RXYFSSetLock, + sizeof(__be32) * 2 + + sizeof(struct yfs_xdr_YFSFid) + + sizeof(__be32), + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSVolSync)); + if (!call) + return -ENOMEM; + + call->key = fc->key; + call->reply[0] = vnode; + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSSETLOCK); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &vnode->fid); + bp = xdr_encode_u32(bp, type); + yfs_check_req(call, bp); + + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); +} + +/* + * extend a lock on a file + */ +int yfs_fs_extend_lock(struct afs_fs_cursor *fc) +{ + struct afs_vnode *vnode = fc->vnode; + struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); + __be32 *bp; + + _enter(""); + + call = afs_alloc_flat_call(net, &yfs_RXYFSExtendLock, + sizeof(__be32) * 2 + + sizeof(struct yfs_xdr_YFSFid), + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSVolSync)); + if (!call) + return -ENOMEM; + + call->key = fc->key; + call->reply[0] = vnode; + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSEXTENDLOCK); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &vnode->fid); + yfs_check_req(call, bp); + + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); +} + +/* + * release a lock on a file + */ +int yfs_fs_release_lock(struct afs_fs_cursor *fc) +{ + struct afs_vnode *vnode = fc->vnode; + struct afs_call *call; + struct afs_net *net = afs_v2net(vnode); + __be32 *bp; + + _enter(""); + + call = afs_alloc_flat_call(net, &yfs_RXYFSReleaseLock, + sizeof(__be32) * 2 + + sizeof(struct yfs_xdr_YFSFid), + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSVolSync)); + if (!call) + return -ENOMEM; + + call->key = fc->key; + call->reply[0] = vnode; + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSRELEASELOCK); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &vnode->fid); + yfs_check_req(call, bp); + + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &vnode->fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); +} + +/* + * Deliver reply data to an FS.FetchStatus with no vnode. + */ +static int yfs_deliver_fs_fetch_status(struct afs_call *call) +{ + struct afs_file_status *status = call->reply[1]; + struct afs_callback *callback = call->reply[2]; + struct afs_volsync *volsync = call->reply[3]; + struct afs_vnode *vnode = call->reply[0]; + const __be32 *bp; + int ret; + + ret = afs_transfer_reply(call); + if (ret < 0) + return ret; + + _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode); + + /* unmarshall the reply once we've received all of it */ + bp = call->buffer; + ret = yfs_decode_status(call, &bp, status, vnode, + &call->expected_version, NULL); + if (ret < 0) + return ret; + xdr_decode_YFSCallBack_raw(&bp, callback); + xdr_decode_YFSVolSync(&bp, volsync); + + _leave(" = 0 [done]"); + return 0; +} + +/* + * YFS.FetchStatus operation type + */ +static const struct afs_call_type yfs_RXYFSFetchStatus = { + .name = "YFS.FetchStatus", + .op = yfs_FS_FetchStatus, + .deliver = yfs_deliver_fs_fetch_status, + .destructor = afs_flat_call_destructor, +}; + +/* + * Fetch the status information for a fid without needing a vnode handle. + */ +int yfs_fs_fetch_status(struct afs_fs_cursor *fc, + struct afs_net *net, + struct afs_fid *fid, + struct afs_file_status *status, + struct afs_callback *callback, + struct afs_volsync *volsync) +{ + struct afs_call *call; + __be32 *bp; + + _enter(",%x,{%llx:%llu},,", + key_serial(fc->key), fid->vid, fid->vnode); + + call = afs_alloc_flat_call(net, &yfs_RXYFSFetchStatus, + sizeof(__be32) * 2 + + sizeof(struct yfs_xdr_YFSFid), + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSCallBack) + + sizeof(struct yfs_xdr_YFSVolSync)); + if (!call) { + fc->ac.error = -ENOMEM; + return -ENOMEM; + } + + call->key = fc->key; + call->reply[0] = NULL; /* vnode for fid[0] */ + call->reply[1] = status; + call->reply[2] = callback; + call->reply[3] = volsync; + call->expected_version = 1; /* vnode->status.data_version */ + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSFETCHSTATUS); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, fid); + yfs_check_req(call, bp); + + call->cb_break = fc->cb_break; + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, fid); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); +} + +/* + * Deliver reply data to an YFS.InlineBulkStatus call + */ +static int yfs_deliver_fs_inline_bulk_status(struct afs_call *call) +{ + struct afs_file_status *statuses; + struct afs_callback *callbacks; + struct afs_vnode *vnode = call->reply[0]; + const __be32 *bp; + u32 tmp; + int ret; + + _enter("{%u}", call->unmarshall); + + switch (call->unmarshall) { + case 0: + afs_extract_to_tmp(call); + call->unmarshall++; + + /* Extract the file status count and array in two steps */ + case 1: + _debug("extract status count"); + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + tmp = ntohl(call->tmp); + _debug("status count: %u/%u", tmp, call->count2); + if (tmp != call->count2) + return afs_protocol_error(call, -EBADMSG, + afs_eproto_ibulkst_count); + + call->count = 0; + call->unmarshall++; + more_counts: + afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSFetchStatus)); + + case 2: + _debug("extract status array %u", call->count); + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + bp = call->buffer; + statuses = call->reply[1]; + ret = yfs_decode_status(call, &bp, &statuses[call->count], + call->count == 0 ? vnode : NULL, + NULL, NULL); + if (ret < 0) + return ret; + + call->count++; + if (call->count < call->count2) + goto more_counts; + + call->count = 0; + call->unmarshall++; + afs_extract_to_tmp(call); + + /* Extract the callback count and array in two steps */ + case 3: + _debug("extract CB count"); + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + tmp = ntohl(call->tmp); + _debug("CB count: %u", tmp); + if (tmp != call->count2) + return afs_protocol_error(call, -EBADMSG, + afs_eproto_ibulkst_cb_count); + call->count = 0; + call->unmarshall++; + more_cbs: + afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSCallBack)); + + case 4: + _debug("extract CB array"); + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + _debug("unmarshall CB array"); + bp = call->buffer; + callbacks = call->reply[2]; + xdr_decode_YFSCallBack_raw(&bp, &callbacks[call->count]); + statuses = call->reply[1]; + if (call->count == 0 && vnode && statuses[0].abort_code == 0) { + bp = call->buffer; + xdr_decode_YFSCallBack(call, vnode, &bp); + } + call->count++; + if (call->count < call->count2) + goto more_cbs; + + afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSVolSync)); + call->unmarshall++; + + case 5: + ret = afs_extract_data(call, false); + if (ret < 0) + return ret; + + bp = call->buffer; + xdr_decode_YFSVolSync(&bp, call->reply[3]); + + call->unmarshall++; + + case 6: + break; + } + + _leave(" = 0 [done]"); + return 0; +} + +/* + * FS.InlineBulkStatus operation type + */ +static const struct afs_call_type yfs_RXYFSInlineBulkStatus = { + .name = "YFS.InlineBulkStatus", + .op = yfs_FS_InlineBulkStatus, + .deliver = yfs_deliver_fs_inline_bulk_status, + .destructor = afs_flat_call_destructor, +}; + +/* + * Fetch the status information for up to 1024 files + */ +int yfs_fs_inline_bulk_status(struct afs_fs_cursor *fc, + struct afs_net *net, + struct afs_fid *fids, + struct afs_file_status *statuses, + struct afs_callback *callbacks, + unsigned int nr_fids, + struct afs_volsync *volsync) +{ + struct afs_call *call; + __be32 *bp; + int i; + + _enter(",%x,{%llx:%llu},%u", + key_serial(fc->key), fids[0].vid, fids[1].vnode, nr_fids); + + call = afs_alloc_flat_call(net, &yfs_RXYFSInlineBulkStatus, + sizeof(__be32) + + sizeof(__be32) + + sizeof(__be32) + + sizeof(struct yfs_xdr_YFSFid) * nr_fids, + sizeof(struct yfs_xdr_YFSFetchStatus)); + if (!call) { + fc->ac.error = -ENOMEM; + return -ENOMEM; + } + + call->key = fc->key; + call->reply[0] = NULL; /* vnode for fid[0] */ + call->reply[1] = statuses; + call->reply[2] = callbacks; + call->reply[3] = volsync; + call->count2 = nr_fids; + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSINLINEBULKSTATUS); + bp = xdr_encode_u32(bp, 0); /* RPCFlags */ + bp = xdr_encode_u32(bp, nr_fids); + for (i = 0; i < nr_fids; i++) + bp = xdr_encode_YFSFid(bp, &fids[i]); + yfs_check_req(call, bp); + + call->cb_break = fc->cb_break; + afs_use_fs_server(call, fc->cbi); + trace_afs_make_fs_call(call, &fids[0]); + return afs_make_call(&fc->ac, call, GFP_NOFS, false); +} diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 8acd56e20a37..ed155042236b 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -54,6 +54,35 @@ enum afs_fs_operation { afs_FS_StoreData64 = 65538, /* AFS Store file data */ afs_FS_GiveUpAllCallBacks = 65539, /* AFS Give up all our callbacks on a server */ afs_FS_GetCapabilities = 65540, /* AFS Get FS server capabilities */ + + yfs_FS_FetchData = 130, /* YFS Fetch file data */ + yfs_FS_FetchACL = 64131, /* YFS Fetch file ACL */ + yfs_FS_FetchStatus = 64132, /* YFS Fetch file status */ + yfs_FS_StoreACL = 64134, /* YFS Store file ACL */ + yfs_FS_StoreStatus = 64135, /* YFS Store file status */ + yfs_FS_RemoveFile = 64136, /* YFS Remove a file */ + yfs_FS_CreateFile = 64137, /* YFS Create a file */ + yfs_FS_Rename = 64138, /* YFS Rename or move a file or directory */ + yfs_FS_Symlink = 64139, /* YFS Create a symbolic link */ + yfs_FS_Link = 64140, /* YFS Create a hard link */ + yfs_FS_MakeDir = 64141, /* YFS Create a directory */ + yfs_FS_RemoveDir = 64142, /* YFS Remove a directory */ + yfs_FS_GetVolumeStatus = 64149, /* YFS Get volume status information */ + yfs_FS_SetVolumeStatus = 64150, /* YFS Set volume status information */ + yfs_FS_SetLock = 64156, /* YFS Request a file lock */ + yfs_FS_ExtendLock = 64157, /* YFS Extend a file lock */ + yfs_FS_ReleaseLock = 64158, /* YFS Release a file lock */ + yfs_FS_Lookup = 64161, /* YFS lookup file in directory */ + yfs_FS_FlushCPS = 64165, + yfs_FS_FetchOpaqueACL = 64168, + yfs_FS_WhoAmI = 64170, + yfs_FS_RemoveACL = 64171, + yfs_FS_RemoveFile2 = 64173, + yfs_FS_StoreOpaqueACL2 = 64174, + yfs_FS_InlineBulkStatus = 64536, /* YFS Fetch multiple file statuses with errors */ + yfs_FS_FetchData64 = 64537, /* YFS Fetch file data */ + yfs_FS_StoreData64 = 64538, /* YFS Store file data */ + yfs_FS_UpdateSymlink = 64540, }; enum afs_vl_operation { @@ -156,7 +185,34 @@ enum afs_file_error { EM(afs_FS_FetchData64, "FS.FetchData64") \ EM(afs_FS_StoreData64, "FS.StoreData64") \ EM(afs_FS_GiveUpAllCallBacks, "FS.GiveUpAllCallBacks") \ - E_(afs_FS_GetCapabilities, "FS.GetCapabilities") + EM(afs_FS_GetCapabilities, "FS.GetCapabilities") \ + EM(yfs_FS_FetchACL, "YFS.FetchACL") \ + EM(yfs_FS_FetchStatus, "YFS.FetchStatus") \ + EM(yfs_FS_StoreACL, "YFS.StoreACL") \ + EM(yfs_FS_StoreStatus, "YFS.StoreStatus") \ + EM(yfs_FS_RemoveFile, "YFS.RemoveFile") \ + EM(yfs_FS_CreateFile, "YFS.CreateFile") \ + EM(yfs_FS_Rename, "YFS.Rename") \ + EM(yfs_FS_Symlink, "YFS.Symlink") \ + EM(yfs_FS_Link, "YFS.Link") \ + EM(yfs_FS_MakeDir, "YFS.MakeDir") \ + EM(yfs_FS_RemoveDir, "YFS.RemoveDir") \ + EM(yfs_FS_GetVolumeStatus, "YFS.GetVolumeStatus") \ + EM(yfs_FS_SetVolumeStatus, "YFS.SetVolumeStatus") \ + EM(yfs_FS_SetLock, "YFS.SetLock") \ + EM(yfs_FS_ExtendLock, "YFS.ExtendLock") \ + EM(yfs_FS_ReleaseLock, "YFS.ReleaseLock") \ + EM(yfs_FS_Lookup, "YFS.Lookup") \ + EM(yfs_FS_FlushCPS, "YFS.FlushCPS") \ + EM(yfs_FS_FetchOpaqueACL, "YFS.FetchOpaqueACL") \ + EM(yfs_FS_WhoAmI, "YFS.WhoAmI") \ + EM(yfs_FS_RemoveACL, "YFS.RemoveACL") \ + EM(yfs_FS_RemoveFile2, "YFS.RemoveFile2") \ + EM(yfs_FS_StoreOpaqueACL2, "YFS.StoreOpaqueACL2") \ + EM(yfs_FS_InlineBulkStatus, "YFS.InlineBulkStatus") \ + EM(yfs_FS_FetchData64, "YFS.FetchData64") \ + EM(yfs_FS_StoreData64, "YFS.StoreData64") \ + E_(yfs_FS_UpdateSymlink, "YFS.UpdateSymlink") #define afs_vl_operations \ EM(afs_VL_GetEntryByNameU, "VL.GetEntryByNameU") \ From 744bcd713a4eabb248246f7deccfad30c579b7f5 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 20 Oct 2018 00:57:58 +0100 Subject: [PATCH 22/26] afs: Allow dumping of server cursor on operation failure Provide an option to allow the file or volume location server cursor to be dumped if the rotation routine falls off the end without managing to contact a server. Signed-off-by: David Howells --- fs/afs/Kconfig | 12 ++++++++++ fs/afs/addr_list.c | 2 ++ fs/afs/internal.h | 3 +++ fs/afs/rotate.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++ fs/afs/vl_rotate.c | 53 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 127 insertions(+) diff --git a/fs/afs/Kconfig b/fs/afs/Kconfig index ebba3b18e5da..701aaa9b1899 100644 --- a/fs/afs/Kconfig +++ b/fs/afs/Kconfig @@ -27,3 +27,15 @@ config AFS_FSCACHE help Say Y here if you want AFS data to be cached locally on disk through the generic filesystem cache manager + +config AFS_DEBUG_CURSOR + bool "AFS server cursor debugging" + depends on AFS_FS + help + Say Y here to cause the contents of a server cursor to be dumped to + the dmesg log if the server rotation algorithm fails to successfully + contact a server. + + See for more information. + + If unsure, say N. diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c index 3f60b4012587..bc5ce31a4ae4 100644 --- a/fs/afs/addr_list.c +++ b/fs/afs/addr_list.c @@ -358,6 +358,8 @@ bool afs_iterate_addresses(struct afs_addr_cursor *ac) if (!ac->alist) return false; + ac->nr_iterations++; + if (ac->begun) { ac->index++; if (ac->index == ac->alist->nr_addrs) diff --git a/fs/afs/internal.h b/fs/afs/internal.h index ce79bd514331..ac9da1e4050e 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -660,6 +660,7 @@ struct afs_addr_cursor { short error; bool begun; /* T if we've begun iteration */ bool responded; /* T if the current address responded */ + unsigned short nr_iterations; /* Number of address iterations */ }; /* @@ -677,6 +678,7 @@ struct afs_vl_cursor { #define AFS_VL_CURSOR_STOP 0x0001 /* Set to cease iteration */ #define AFS_VL_CURSOR_RETRY 0x0002 /* Set to do a retry */ #define AFS_VL_CURSOR_RETRIED 0x0004 /* Set if started a retry */ + unsigned short nr_iterations; /* Number of server iterations */ }; /* @@ -700,6 +702,7 @@ struct afs_fs_cursor { #define AFS_FS_CURSOR_VNOVOL 0x0008 /* Set if seen VNOVOL */ #define AFS_FS_CURSOR_CUR_ONLY 0x0010 /* Set if current server only (file lock held) */ #define AFS_FS_CURSOR_NO_VSLEEP 0x0020 /* Set to prevent sleep on VBUSY, VOFFLINE, ... */ + unsigned short nr_iterations; /* Number of server iterations */ }; /* diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c index 41405dde0113..7c4487781637 100644 --- a/fs/afs/rotate.c +++ b/fs/afs/rotate.c @@ -156,6 +156,8 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc) return false; } + fc->nr_iterations++; + /* Evaluate the result of the previous operation, if there was one. */ switch (error) { case SHRT_MAX: @@ -519,6 +521,56 @@ iterate_address: return false; } +/* + * Dump cursor state in the case of the error being EDESTADDRREQ. + */ +static void afs_dump_edestaddrreq(const struct afs_fs_cursor *fc) +{ + static int count; + int i; + + if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3) + return; + count++; + + rcu_read_lock(); + + pr_notice("EDESTADDR occurred\n"); + pr_notice("FC: cbb=%x cbb2=%x fl=%hx err=%hd\n", + fc->cb_break, fc->cb_break_2, fc->flags, fc->error); + pr_notice("FC: st=%u ix=%u ni=%u\n", + fc->start, fc->index, fc->nr_iterations); + + if (fc->server_list) { + const struct afs_server_list *sl = fc->server_list; + pr_notice("FC: SL nr=%u ix=%u vnov=%hx\n", + sl->nr_servers, sl->index, sl->vnovol_mask); + for (i = 0; i < sl->nr_servers; i++) { + const struct afs_server *s = sl->servers[i].server; + pr_notice("FC: server fl=%lx av=%u %pU\n", + s->flags, s->addr_version, &s->uuid); + if (s->addresses) { + const struct afs_addr_list *a = + rcu_dereference(s->addresses); + pr_notice("FC: - av=%u nr=%u/%u/%u ax=%u\n", + a->version, + a->nr_ipv4, a->nr_addrs, a->max_addrs, + a->index); + pr_notice("FC: - pr=%lx yf=%lx\n", + a->probed, a->yfs); + if (a == fc->ac.alist) + pr_notice("FC: - current\n"); + } + } + } + + pr_notice("AC: as=%u ax=%u ac=%d er=%d b=%u r=%u ni=%u\n", + fc->ac.start, fc->ac.index, fc->ac.abort_code, fc->ac.error, + fc->ac.begun, fc->ac.responded, fc->ac.nr_iterations); + + rcu_read_unlock(); +} + /* * Tidy up a filesystem cursor and unlock the vnode. */ @@ -526,6 +578,11 @@ int afs_end_vnode_operation(struct afs_fs_cursor *fc) { struct afs_net *net = afs_v2net(fc->vnode); + if (fc->error == -EDESTADDRREQ || + fc->error == -ENETUNREACH || + fc->error == -EHOSTUNREACH) + afs_dump_edestaddrreq(fc); + mutex_unlock(&fc->vnode->io_lock); afs_end_cursor(&fc->ac); diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c index 44a936ad9c7a..5b99ea7be194 100644 --- a/fs/afs/vl_rotate.c +++ b/fs/afs/vl_rotate.c @@ -83,6 +83,8 @@ bool afs_select_vlserver(struct afs_vl_cursor *vc) return false; } + vc->nr_iterations++; + /* Evaluate the result of the previous operation, if there was one. */ switch (error) { case SHRT_MAX: @@ -234,6 +236,52 @@ failed: return false; } +/* + * Dump cursor state in the case of the error being EDESTADDRREQ. + */ +static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc) +{ + static int count; + int i; + + if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3) + return; + count++; + + rcu_read_lock(); + pr_notice("EDESTADDR occurred\n"); + pr_notice("VC: st=%u ix=%u ni=%hu fl=%hx err=%hd\n", + vc->start, vc->index, vc->nr_iterations, vc->flags, vc->error); + + if (vc->server_list) { + const struct afs_vlserver_list *sl = vc->server_list; + pr_notice("VC: SL nr=%u ix=%u\n", + sl->nr_servers, sl->index); + for (i = 0; i < sl->nr_servers; i++) { + const struct afs_vlserver *s = sl->servers[i].server; + pr_notice("VC: server fl=%lx %s+%hu\n", + s->flags, s->name, s->port); + if (s->addresses) { + const struct afs_addr_list *a = + rcu_dereference(s->addresses); + pr_notice("VC: - av=%u nr=%u/%u/%u ax=%u\n", + a->version, + a->nr_ipv4, a->nr_addrs, a->max_addrs, + a->index); + pr_notice("VC: - pr=%lx yf=%lx\n", + a->probed, a->yfs); + if (a == vc->ac.alist) + pr_notice("VC: - current\n"); + } + } + } + + pr_notice("AC: as=%u ax=%u ac=%d er=%d b=%u r=%u ni=%hu\n", + vc->ac.start, vc->ac.index, vc->ac.abort_code, vc->ac.error, + vc->ac.begun, vc->ac.responded, vc->ac.nr_iterations); + rcu_read_unlock(); +} + /* * Tidy up a volume location server cursor and unlock the vnode. */ @@ -241,6 +289,11 @@ int afs_end_vlserver_operation(struct afs_vl_cursor *vc) { struct afs_net *net = vc->cell->net; + if (vc->error == -EDESTADDRREQ || + vc->error == -ENETUNREACH || + vc->error == -EHOSTUNREACH) + afs_vl_dump_edestaddrreq(vc); + afs_end_cursor(&vc->ac); afs_put_vlserverlist(net, vc->server_list); From 2feeaf8433c8e68de3d0a06a0ffe7742bcd13c1a Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 20 Oct 2018 00:57:59 +0100 Subject: [PATCH 23/26] afs: Eliminate the address pointer from the address list cursor Eliminate the address pointer from the address list cursor as it's redundant (ac->addrs[ac->index] can be used to find the same address) and address lists must be replaced rather than being rearranged, so is of limited value. Signed-off-by: David Howells --- fs/afs/addr_list.c | 2 -- fs/afs/internal.h | 1 - fs/afs/rxrpc.c | 2 +- fs/afs/server.c | 2 -- fs/afs/vl_rotate.c | 2 +- fs/afs/volume.c | 6 +++--- 6 files changed, 5 insertions(+), 10 deletions(-) diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c index bc5ce31a4ae4..1536d1d21c33 100644 --- a/fs/afs/addr_list.c +++ b/fs/afs/addr_list.c @@ -371,7 +371,6 @@ bool afs_iterate_addresses(struct afs_addr_cursor *ac) ac->begun = true; ac->responded = false; - ac->addr = &ac->alist->addrs[ac->index]; return true; } @@ -389,7 +388,6 @@ int afs_end_cursor(struct afs_addr_cursor *ac) afs_put_addrlist(alist); } - ac->addr = NULL; ac->alist = NULL; ac->begun = false; return ac->error; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index ac9da1e4050e..e5b596bd8acf 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -653,7 +653,6 @@ struct afs_interface { */ struct afs_addr_cursor { struct afs_addr_list *alist; /* Current address list (pins ref) */ - struct sockaddr_rxrpc *addr; u32 abort_code; unsigned short start; /* Starting point in alist->addrs[] */ unsigned short index; /* Wrapping offset from start to current addr */ diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 444ba0d511ef..42e1ea7372e9 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -359,7 +359,7 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg) long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp, bool async) { - struct sockaddr_rxrpc *srx = ac->addr; + struct sockaddr_rxrpc *srx = &ac->alist->addrs[ac->index]; struct rxrpc_call *rxcall; struct msghdr msg; struct kvec iov[1]; diff --git a/fs/afs/server.c b/fs/afs/server.c index aa35cfae5440..7c1be8b4dc9a 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -367,7 +367,6 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server) .alist = alist, .start = alist->index, .index = 0, - .addr = &alist->addrs[alist->index], .error = 0, }; _enter("%p", server); @@ -518,7 +517,6 @@ static bool afs_do_probe_fileserver(struct afs_fs_cursor *fc) _enter(""); - fc->ac.addr = NULL; fc->ac.start = READ_ONCE(fc->ac.alist->index); fc->ac.index = fc->ac.start; fc->ac.error = 0; diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c index 5b99ea7be194..ead6dedbb561 100644 --- a/fs/afs/vl_rotate.c +++ b/fs/afs/vl_rotate.c @@ -209,7 +209,7 @@ iterate_address: if (!afs_iterate_addresses(&vc->ac)) goto next_server; - _leave(" = t %pISpc", &vc->ac.addr->transport); + _leave(" = t %pISpc", &vc->ac.alist->addrs[vc->ac.index].transport); return true; next_server: diff --git a/fs/afs/volume.c b/fs/afs/volume.c index f0020e35bf6f..7527c081726e 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -88,16 +88,16 @@ static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell, case VL_SERVICE: clear_bit(vc.ac.index, &vc.ac.alist->yfs); set_bit(vc.ac.index, &vc.ac.alist->probed); - vc.ac.addr->srx_service = ret; + vc.ac.alist->addrs[vc.ac.index].srx_service = ret; break; case YFS_VL_SERVICE: set_bit(vc.ac.index, &vc.ac.alist->yfs); set_bit(vc.ac.index, &vc.ac.alist->probed); - vc.ac.addr->srx_service = ret; + vc.ac.alist->addrs[vc.ac.index].srx_service = ret; break; } } - + vldb = afs_vl_get_entry_by_name_u(&vc, volname, volnamesz); } From 18ac61853cc4e44eb30e125fc8344a3b25c7b6fe Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 20 Oct 2018 00:57:59 +0100 Subject: [PATCH 24/26] afs: Fix callback handling In some circumstances, the callback interest pointer is NULL, so in such a case we can't dereference it when checking to see if the callback is broken. This causes an oops in some circumstances. Fix this by replacing the function that worked out the aggregate break counter with one that actually does the comparison, and then make that return true (ie. broken) if there is no callback interest as yet (ie. the pointer is NULL). Fixes: 68251f0a6818 ("afs: Fix whole-volume callback handling") Signed-off-by: David Howells --- fs/afs/fsclient.c | 2 +- fs/afs/internal.h | 9 ++++++--- fs/afs/security.c | 7 ++++--- fs/afs/yfsclient.c | 2 +- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 3975969719de..7c75a1813321 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -269,7 +269,7 @@ static void xdr_decode_AFSCallBack(struct afs_call *call, write_seqlock(&vnode->cb_lock); - if (call->cb_break == afs_cb_break_sum(vnode, cbi)) { + if (!afs_cb_is_broken(call->cb_break, vnode, cbi)) { vnode->cb_version = ntohl(*bp++); cb_expiry = ntohl(*bp++); vnode->cb_type = ntohl(*bp++); diff --git a/fs/afs/internal.h b/fs/afs/internal.h index e5b596bd8acf..b60d15212975 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -776,10 +776,13 @@ static inline unsigned int afs_calc_vnode_cb_break(struct afs_vnode *vnode) return vnode->cb_break + vnode->cb_s_break + vnode->cb_v_break; } -static inline unsigned int afs_cb_break_sum(struct afs_vnode *vnode, - struct afs_cb_interest *cbi) +static inline bool afs_cb_is_broken(unsigned int cb_break, + const struct afs_vnode *vnode, + const struct afs_cb_interest *cbi) { - return vnode->cb_break + cbi->server->cb_s_break + vnode->volume->cb_v_break; + return !cbi || cb_break != (vnode->cb_break + + cbi->server->cb_s_break + + vnode->volume->cb_v_break); } /* diff --git a/fs/afs/security.c b/fs/afs/security.c index d1ae53fd3739..5f58a9a17e69 100644 --- a/fs/afs/security.c +++ b/fs/afs/security.c @@ -147,7 +147,8 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key, break; } - if (cb_break != afs_cb_break_sum(vnode, vnode->cb_interest)) { + if (afs_cb_is_broken(cb_break, vnode, + vnode->cb_interest)) { changed = true; break; } @@ -177,7 +178,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key, } } - if (cb_break != afs_cb_break_sum(vnode, vnode->cb_interest)) + if (afs_cb_is_broken(cb_break, vnode, vnode->cb_interest)) goto someone_else_changed_it; /* We need a ref on any permits list we want to copy as we'll have to @@ -256,7 +257,7 @@ found: spin_lock(&vnode->lock); zap = rcu_access_pointer(vnode->permit_cache); - if (cb_break == afs_cb_break_sum(vnode, vnode->cb_interest) && + if (!afs_cb_is_broken(cb_break, vnode, vnode->cb_interest) && zap == permits) rcu_assign_pointer(vnode->permit_cache, replacement); else diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c index d5e3f0095040..12658c1363ae 100644 --- a/fs/afs/yfsclient.c +++ b/fs/afs/yfsclient.c @@ -324,7 +324,7 @@ static void xdr_decode_YFSCallBack(struct afs_call *call, write_seqlock(&vnode->cb_lock); - if (call->cb_break == afs_cb_break_sum(vnode, cbi)) { + if (!afs_cb_is_broken(call->cb_break, vnode, cbi)) { cb_expiry = xdr_to_u64(xdr->expiration_time); do_div(cb_expiry, 10 * 1000 * 1000); vnode->cb_version = ntohl(xdr->version); From 3bf0fb6f33dd545693da5e65f5b1b9b9f0bfc35e Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 20 Oct 2018 00:57:59 +0100 Subject: [PATCH 25/26] afs: Probe multiple fileservers simultaneously Send probes to all the unprobed fileservers in a fileserver list on all addresses simultaneously in an attempt to find out the fastest route whilst not getting stuck for 20s on any server or address that we don't get a reply from. This alleviates the problem whereby attempting to access a new server can take a long time because the rotation algorithm ends up rotating through all servers and addresses until it finds one that responds. Signed-off-by: David Howells --- fs/afs/Makefile | 4 +- fs/afs/addr_list.c | 40 ++++-- fs/afs/cmservice.c | 129 +++++++++++++----- fs/afs/fs_probe.c | 270 ++++++++++++++++++++++++++++++++++++ fs/afs/fsclient.c | 27 ++-- fs/afs/internal.h | 98 ++++++++++--- fs/afs/proc.c | 6 +- fs/afs/rotate.c | 174 +++++++++++++++-------- fs/afs/rxrpc.c | 44 +++--- fs/afs/server.c | 109 +-------------- fs/afs/server_list.c | 6 +- fs/afs/vl_list.c | 6 +- fs/afs/vl_probe.c | 273 +++++++++++++++++++++++++++++++++++++ fs/afs/vl_rotate.c | 159 +++++++++++++-------- fs/afs/vlclient.c | 35 +++-- fs/afs/volume.c | 16 --- include/trace/events/afs.h | 4 +- 17 files changed, 1050 insertions(+), 350 deletions(-) create mode 100644 fs/afs/fs_probe.c create mode 100644 fs/afs/vl_probe.c diff --git a/fs/afs/Makefile b/fs/afs/Makefile index cc942b790cff..0738e2bf5193 100644 --- a/fs/afs/Makefile +++ b/fs/afs/Makefile @@ -17,6 +17,7 @@ kafs-y := \ file.o \ flock.o \ fsclient.o \ + fs_probe.o \ inode.o \ main.o \ misc.o \ @@ -29,8 +30,9 @@ kafs-y := \ super.o \ netdevices.o \ vlclient.o \ - vl_rotate.o \ vl_list.o \ + vl_probe.o \ + vl_rotate.o \ volume.o \ write.o \ xattr.o \ diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c index 1536d1d21c33..967db336d11a 100644 --- a/fs/afs/addr_list.c +++ b/fs/afs/addr_list.c @@ -303,6 +303,8 @@ void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port) sizeof(alist->addrs[0]) * (alist->nr_addrs - i)); srx = &alist->addrs[i]; + srx->srx_family = AF_RXRPC; + srx->transport_type = SOCK_DGRAM; srx->transport_len = sizeof(srx->transport.sin); srx->transport.sin.sin_family = AF_INET; srx->transport.sin.sin_port = htons(port); @@ -341,6 +343,8 @@ void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port) sizeof(alist->addrs[0]) * (alist->nr_addrs - i)); srx = &alist->addrs[i]; + srx->srx_family = AF_RXRPC; + srx->transport_type = SOCK_DGRAM; srx->transport_len = sizeof(srx->transport.sin6); srx->transport.sin6.sin6_family = AF_INET6; srx->transport.sin6.sin6_port = htons(port); @@ -353,23 +357,32 @@ void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port) */ bool afs_iterate_addresses(struct afs_addr_cursor *ac) { - _enter("%hu+%hd", ac->start, (short)ac->index); + unsigned long set, failed; + int index; if (!ac->alist) return false; + set = ac->alist->responded; + failed = ac->alist->failed; + _enter("%lx-%lx-%lx,%d", set, failed, ac->tried, ac->index); + ac->nr_iterations++; - if (ac->begun) { - ac->index++; - if (ac->index == ac->alist->nr_addrs) - ac->index = 0; + set &= ~(failed | ac->tried); - if (ac->index == ac->start) - return false; - } + if (!set) + return false; - ac->begun = true; + index = READ_ONCE(ac->alist->preferred); + if (test_bit(index, &set)) + goto selected; + + index = __ffs(set); + +selected: + ac->index = index; + set_bit(index, &ac->tried); ac->responded = false; return true; } @@ -383,12 +396,13 @@ int afs_end_cursor(struct afs_addr_cursor *ac) alist = ac->alist; if (alist) { - if (ac->responded && ac->index != ac->start) - WRITE_ONCE(alist->index, ac->index); + if (ac->responded && + ac->index != alist->preferred && + test_bit(ac->alist->preferred, &ac->tried)) + WRITE_ONCE(alist->preferred, ac->index); afs_put_addrlist(alist); + ac->alist = NULL; } - ac->alist = NULL; - ac->begun = false; return ac->error; } diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 8cf8d10daa6c..8ee5972893ed 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -122,6 +122,8 @@ bool afs_cm_incoming_call(struct afs_call *call) { _enter("{%u, CB.OP %u}", call->service_id, call->operation_ID); + call->epoch = rxrpc_kernel_get_epoch(call->net->socket, call->rxcall); + switch (call->operation_ID) { case CBCallBack: call->type = &afs_SRXCBCallBack; @@ -151,6 +153,91 @@ bool afs_cm_incoming_call(struct afs_call *call) } } +/* + * Record a probe to the cache manager from a server. + */ +static int afs_record_cm_probe(struct afs_call *call, struct afs_server *server) +{ + _enter(""); + + if (test_bit(AFS_SERVER_FL_HAVE_EPOCH, &server->flags) && + !test_bit(AFS_SERVER_FL_PROBING, &server->flags)) { + if (server->cm_epoch == call->epoch) + return 0; + + if (!server->probe.said_rebooted) { + pr_notice("kAFS: FS rebooted %pU\n", &server->uuid); + server->probe.said_rebooted = true; + } + } + + spin_lock(&server->probe_lock); + + if (!test_bit(AFS_SERVER_FL_HAVE_EPOCH, &server->flags)) { + server->cm_epoch = call->epoch; + server->probe.cm_epoch = call->epoch; + goto out; + } + + if (server->probe.cm_probed && + call->epoch != server->probe.cm_epoch && + !server->probe.said_inconsistent) { + pr_notice("kAFS: FS endpoints inconsistent %pU\n", + &server->uuid); + server->probe.said_inconsistent = true; + } + + if (!server->probe.cm_probed || call->epoch == server->cm_epoch) + server->probe.cm_epoch = server->cm_epoch; + +out: + server->probe.cm_probed = true; + spin_unlock(&server->probe_lock); + return 0; +} + +/* + * Find the server record by peer address and record a probe to the cache + * manager from a server. + */ +static int afs_find_cm_server_by_peer(struct afs_call *call) +{ + struct sockaddr_rxrpc srx; + struct afs_server *server; + + rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx); + + server = afs_find_server(call->net, &srx); + if (!server) { + trace_afs_cm_no_server(call, &srx); + return 0; + } + + call->cm_server = server; + return afs_record_cm_probe(call, server); +} + +/* + * Find the server record by server UUID and record a probe to the cache + * manager from a server. + */ +static int afs_find_cm_server_by_uuid(struct afs_call *call, + struct afs_uuid *uuid) +{ + struct afs_server *server; + + rcu_read_lock(); + server = afs_find_server_by_uuid(call->net, call->request); + rcu_read_unlock(); + if (!server) { + trace_afs_cm_no_server_u(call, call->request); + return 0; + } + + call->cm_server = server; + return afs_record_cm_probe(call, server); +} + /* * Clean up a cache manager call. */ @@ -187,7 +274,6 @@ static void SRXAFSCB_CallBack(struct work_struct *work) static int afs_deliver_cb_callback(struct afs_call *call) { struct afs_callback_break *cb; - struct sockaddr_rxrpc srx; __be32 *bp; int ret, loop; @@ -276,12 +362,7 @@ static int afs_deliver_cb_callback(struct afs_call *call) /* we'll need the file server record as that tells us which set of * vnodes to operate upon */ - rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx); - call->cm_server = afs_find_server(call->net, &srx); - if (!call->cm_server) - trace_afs_cm_no_server(call, &srx); - - return afs_queue_call_work(call); + return afs_find_cm_server_by_peer(call); } /* @@ -305,13 +386,10 @@ static void SRXAFSCB_InitCallBackState(struct work_struct *work) */ static int afs_deliver_cb_init_call_back_state(struct afs_call *call) { - struct sockaddr_rxrpc srx; int ret; _enter(""); - rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx); - afs_extract_discard(call, 0); ret = afs_extract_data(call, false); if (ret < 0) @@ -319,11 +397,7 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call) /* we'll need the file server record as that tells us which set of * vnodes to operate upon */ - call->cm_server = afs_find_server(call->net, &srx); - if (!call->cm_server) - trace_afs_cm_no_server(call, &srx); - - return afs_queue_call_work(call); + return afs_find_cm_server_by_peer(call); } /* @@ -384,13 +458,7 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call) /* we'll need the file server record as that tells us which set of * vnodes to operate upon */ - rcu_read_lock(); - call->cm_server = afs_find_server_by_uuid(call->net, call->request); - rcu_read_unlock(); - if (!call->cm_server) - trace_afs_cm_no_server_u(call, call->request); - - return afs_queue_call_work(call); + return afs_find_cm_server_by_uuid(call, call->request); } /* @@ -422,8 +490,7 @@ static int afs_deliver_cb_probe(struct afs_call *call) if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING)) return afs_io_error(call, afs_io_error_cm_reply); - - return afs_queue_call_work(call); + return afs_find_cm_server_by_peer(call); } /* @@ -503,8 +570,7 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call) if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING)) return afs_io_error(call, afs_io_error_cm_reply); - - return afs_queue_call_work(call); + return afs_find_cm_server_by_uuid(call, call->request); } /* @@ -586,8 +652,7 @@ static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call) if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING)) return afs_io_error(call, afs_io_error_cm_reply); - - return afs_queue_call_work(call); + return afs_find_cm_server_by_peer(call); } /* @@ -596,7 +661,6 @@ static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call) static int afs_deliver_yfs_cb_callback(struct afs_call *call) { struct afs_callback_break *cb; - struct sockaddr_rxrpc srx; struct yfs_xdr_YFSFid *bp; size_t size; int ret, loop; @@ -664,10 +728,5 @@ static int afs_deliver_yfs_cb_callback(struct afs_call *call) /* We'll need the file server record as that tells us which set of * vnodes to operate upon. */ - rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx); - call->cm_server = afs_find_server(call->net, &srx); - if (!call->cm_server) - trace_afs_cm_no_server(call, &srx); - - return afs_queue_call_work(call); + return afs_find_cm_server_by_peer(call); } diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c new file mode 100644 index 000000000000..d049cb459742 --- /dev/null +++ b/fs/afs/fs_probe.c @@ -0,0 +1,270 @@ +/* AFS fileserver probing + * + * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include +#include +#include "afs_fs.h" +#include "internal.h" +#include "protocol_yfs.h" + +static bool afs_fs_probe_done(struct afs_server *server) +{ + if (!atomic_dec_and_test(&server->probe_outstanding)) + return false; + + wake_up_var(&server->probe_outstanding); + clear_bit_unlock(AFS_SERVER_FL_PROBING, &server->flags); + wake_up_bit(&server->flags, AFS_SERVER_FL_PROBING); + return true; +} + +/* + * Process the result of probing a fileserver. This is called after successful + * or failed delivery of an FS.GetCapabilities operation. + */ +void afs_fileserver_probe_result(struct afs_call *call) +{ + struct afs_addr_list *alist = call->alist; + struct afs_server *server = call->reply[0]; + unsigned int server_index = (long)call->reply[1]; + unsigned int index = call->addr_ix; + unsigned int rtt = UINT_MAX; + bool have_result = false; + u64 _rtt; + int ret = call->error; + + _enter("%pU,%u", &server->uuid, index); + + spin_lock(&server->probe_lock); + + switch (ret) { + case 0: + server->probe.error = 0; + goto responded; + case -ECONNABORTED: + if (!server->probe.responded) { + server->probe.abort_code = call->abort_code; + server->probe.error = ret; + } + goto responded; + case -ENOMEM: + case -ENONET: + server->probe.local_failure = true; + afs_io_error(call, afs_io_error_fs_probe_fail); + goto out; + case -ECONNRESET: /* Responded, but call expired. */ + case -ENETUNREACH: + case -EHOSTUNREACH: + case -ECONNREFUSED: + case -ETIMEDOUT: + case -ETIME: + default: + clear_bit(index, &alist->responded); + set_bit(index, &alist->failed); + if (!server->probe.responded && + (server->probe.error == 0 || + server->probe.error == -ETIMEDOUT || + server->probe.error == -ETIME)) + server->probe.error = ret; + afs_io_error(call, afs_io_error_fs_probe_fail); + goto out; + } + +responded: + set_bit(index, &alist->responded); + clear_bit(index, &alist->failed); + + if (call->service_id == YFS_FS_SERVICE) { + server->probe.is_yfs = true; + set_bit(AFS_SERVER_FL_IS_YFS, &server->flags); + alist->addrs[index].srx_service = call->service_id; + } else { + server->probe.not_yfs = true; + if (!server->probe.is_yfs) { + clear_bit(AFS_SERVER_FL_IS_YFS, &server->flags); + alist->addrs[index].srx_service = call->service_id; + } + } + + /* Get the RTT and scale it to fit into a 32-bit value that represents + * over a minute of time so that we can access it with one instruction + * on a 32-bit system. + */ + _rtt = rxrpc_kernel_get_rtt(call->net->socket, call->rxcall); + _rtt /= 64; + rtt = (_rtt > UINT_MAX) ? UINT_MAX : _rtt; + if (rtt < server->probe.rtt) { + server->probe.rtt = rtt; + alist->preferred = index; + have_result = true; + } + + smp_wmb(); /* Set rtt before responded. */ + server->probe.responded = true; + set_bit(AFS_SERVER_FL_PROBED, &server->flags); +out: + spin_unlock(&server->probe_lock); + + _debug("probe [%u][%u] %pISpc rtt=%u ret=%d", + server_index, index, &alist->addrs[index].transport, + (unsigned int)rtt, ret); + + have_result |= afs_fs_probe_done(server); + if (have_result) { + server->probe.have_result = true; + wake_up_var(&server->probe.have_result); + wake_up_all(&server->probe_wq); + } +} + +/* + * Probe all of a fileserver's addresses to find out the best route and to + * query its capabilities. + */ +static int afs_do_probe_fileserver(struct afs_net *net, + struct afs_server *server, + struct key *key, + unsigned int server_index) +{ + struct afs_addr_cursor ac = { + .index = 0, + }; + int ret; + + _enter("%pU", &server->uuid); + + read_lock(&server->fs_lock); + ac.alist = rcu_dereference_protected(server->addresses, + lockdep_is_held(&server->fs_lock)); + read_unlock(&server->fs_lock); + + atomic_set(&server->probe_outstanding, ac.alist->nr_addrs); + memset(&server->probe, 0, sizeof(server->probe)); + server->probe.rtt = UINT_MAX; + + for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++) { + ret = afs_fs_get_capabilities(net, server, &ac, key, server_index, + true); + if (ret != -EINPROGRESS) { + afs_fs_probe_done(server); + return ret; + } + } + + return 0; +} + +/* + * Send off probes to all unprobed servers. + */ +int afs_probe_fileservers(struct afs_net *net, struct key *key, + struct afs_server_list *list) +{ + struct afs_server *server; + int i, ret; + + for (i = 0; i < list->nr_servers; i++) { + server = list->servers[i].server; + if (test_bit(AFS_SERVER_FL_PROBED, &server->flags)) + continue; + + if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &server->flags)) { + ret = afs_do_probe_fileserver(net, server, key, i); + if (ret) + return ret; + } + } + + return 0; +} + +/* + * Wait for the first as-yet untried fileserver to respond. + */ +int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried) +{ + struct wait_queue_entry *waits; + struct afs_server *server; + unsigned int rtt = UINT_MAX; + bool have_responders = false; + int pref = -1, i; + + _enter("%u,%lx", slist->nr_servers, untried); + + /* Only wait for servers that have a probe outstanding. */ + for (i = 0; i < slist->nr_servers; i++) { + if (test_bit(i, &untried)) { + server = slist->servers[i].server; + if (!test_bit(AFS_SERVER_FL_PROBING, &server->flags)) + __clear_bit(i, &untried); + if (server->probe.responded) + have_responders = true; + } + } + if (have_responders || !untried) + return 0; + + waits = kmalloc(array_size(slist->nr_servers, sizeof(*waits)), GFP_KERNEL); + if (!waits) + return -ENOMEM; + + for (i = 0; i < slist->nr_servers; i++) { + if (test_bit(i, &untried)) { + server = slist->servers[i].server; + init_waitqueue_entry(&waits[i], current); + add_wait_queue(&server->probe_wq, &waits[i]); + } + } + + for (;;) { + bool still_probing = false; + + set_current_state(TASK_INTERRUPTIBLE); + for (i = 0; i < slist->nr_servers; i++) { + if (test_bit(i, &untried)) { + server = slist->servers[i].server; + if (server->probe.responded) + goto stop; + if (test_bit(AFS_SERVER_FL_PROBING, &server->flags)) + still_probing = true; + } + } + + if (!still_probing || unlikely(signal_pending(current))) + goto stop; + schedule(); + } + +stop: + set_current_state(TASK_RUNNING); + + for (i = 0; i < slist->nr_servers; i++) { + if (test_bit(i, &untried)) { + server = slist->servers[i].server; + if (server->probe.responded && + server->probe.rtt < rtt) { + pref = i; + rtt = server->probe.rtt; + } + + remove_wait_queue(&server->probe_wq, &waits[i]); + } + } + + kfree(waits); + + if (pref == -1 && signal_pending(current)) + return -ERESTARTSYS; + + if (pref >= 0) + slist->preferred = pref; + return 0; +} diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 7c75a1813321..ca08c83168f5 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -2006,7 +2006,6 @@ int afs_fs_give_up_all_callbacks(struct afs_net *net, */ static int afs_deliver_fs_get_capabilities(struct afs_call *call) { - struct afs_server *server = call->reply[0]; u32 count; int ret; @@ -2042,15 +2041,18 @@ static int afs_deliver_fs_get_capabilities(struct afs_call *call) break; } - if (call->service_id == YFS_FS_SERVICE) - set_bit(AFS_SERVER_FL_IS_YFS, &server->flags); - else - clear_bit(AFS_SERVER_FL_IS_YFS, &server->flags); - _leave(" = 0 [done]"); return 0; } +static void afs_destroy_fs_get_capabilities(struct afs_call *call) +{ + struct afs_server *server = call->reply[0]; + + afs_put_server(call->net, server); + afs_flat_call_destructor(call); +} + /* * FS.GetCapabilities operation type */ @@ -2058,7 +2060,8 @@ static const struct afs_call_type afs_RXFSGetCapabilities = { .name = "FS.GetCapabilities", .op = afs_FS_GetCapabilities, .deliver = afs_deliver_fs_get_capabilities, - .destructor = afs_flat_call_destructor, + .done = afs_fileserver_probe_result, + .destructor = afs_destroy_fs_get_capabilities, }; /* @@ -2068,7 +2071,9 @@ static const struct afs_call_type afs_RXFSGetCapabilities = { int afs_fs_get_capabilities(struct afs_net *net, struct afs_server *server, struct afs_addr_cursor *ac, - struct key *key) + struct key *key, + unsigned int server_index, + bool async) { struct afs_call *call; __be32 *bp; @@ -2080,8 +2085,10 @@ int afs_fs_get_capabilities(struct afs_net *net, return -ENOMEM; call->key = key; - call->reply[0] = server; + call->reply[0] = afs_get_server(server); + call->reply[1] = (void *)(long)server_index; call->upgrade = true; + call->want_reply_time = true; /* marshall the parameters */ bp = call->request; @@ -2089,7 +2096,7 @@ int afs_fs_get_capabilities(struct afs_net *net, /* Can't take a ref on server */ trace_afs_make_fs_call(call, NULL); - return afs_make_call(ac, call, GFP_NOFS, false); + return afs_make_call(ac, call, GFP_NOFS, async); } /* diff --git a/fs/afs/internal.h b/fs/afs/internal.h index b60d15212975..5da3b09b7518 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -76,12 +76,13 @@ struct afs_addr_list { u32 version; /* Version */ unsigned char max_addrs; unsigned char nr_addrs; - unsigned char index; /* Address currently in use */ + unsigned char preferred; /* Preferred address */ unsigned char nr_ipv4; /* Number of IPv4 addresses */ enum dns_record_source source:8; enum dns_lookup_status status:8; unsigned long probed; /* Mask of servers that have been probed */ - unsigned long yfs; /* Mask of servers that are YFS */ + unsigned long failed; /* Mask of addrs that failed locally/ICMP */ + unsigned long responded; /* Mask of addrs that responded */ struct sockaddr_rxrpc addrs[]; #define AFS_MAX_ADDRESSES ((unsigned int)(sizeof(unsigned long) * 8)) }; @@ -91,6 +92,7 @@ struct afs_addr_list { */ struct afs_call { const struct afs_call_type *type; /* type of call */ + struct afs_addr_list *alist; /* Address is alist[addr_ix] */ wait_queue_head_t waitq; /* processes awaiting completion */ struct work_struct async_work; /* async I/O processor */ struct work_struct work; /* actual work processor */ @@ -116,6 +118,7 @@ struct afs_call { spinlock_t state_lock; int error; /* error code */ u32 abort_code; /* Remote abort ID or 0 */ + u32 epoch; unsigned request_size; /* size of request data */ unsigned reply_max; /* maximum size of reply */ unsigned first_offset; /* offset into mapping[first] */ @@ -125,13 +128,14 @@ struct afs_call { unsigned count2; /* count used in unmarshalling */ }; unsigned char unmarshall; /* unmarshalling phase */ + unsigned char addr_ix; /* Address in ->alist */ bool incoming; /* T if incoming call */ bool send_pages; /* T if data from mapping should be sent */ bool need_attention; /* T if RxRPC poked us */ bool async; /* T if asynchronous */ bool ret_reply0; /* T if should return reply[0] on success */ bool upgrade; /* T to request service upgrade */ - bool want_reply_time; /* T if want reply_time */ + bool want_reply_time; /* T if want reply_time */ u16 service_id; /* Actual service ID (after upgrade) */ unsigned int debug_id; /* Trace ID */ u32 operation_ID; /* operation ID for an incoming call */ @@ -162,6 +166,9 @@ struct afs_call_type { /* Work function */ void (*work)(struct work_struct *work); + + /* Call done function (gets called immediately on success or failure) */ + void (*done)(struct afs_call *call); }; /* @@ -376,10 +383,27 @@ struct afs_vlserver { unsigned long flags; #define AFS_VLSERVER_FL_PROBED 0 /* The VL server has been probed */ #define AFS_VLSERVER_FL_PROBING 1 /* VL server is being probed */ +#define AFS_VLSERVER_FL_IS_YFS 2 /* Server is YFS not AFS */ rwlock_t lock; /* Lock on addresses */ atomic_t usage; - u16 name_len; /* Length of name */ + + /* Probe state */ + wait_queue_head_t probe_wq; + atomic_t probe_outstanding; + spinlock_t probe_lock; + struct { + unsigned int rtt; /* RTT as ktime/64 */ + u32 abort_code; + short error; + bool have_result; + bool responded:1; + bool is_yfs:1; + bool not_yfs:1; + bool local_failure:1; + } probe; + u16 port; + u16 name_len; /* Length of name */ char name[]; /* Server name, case-flattened */ }; @@ -399,6 +423,7 @@ struct afs_vlserver_list { atomic_t usage; u8 nr_servers; u8 index; /* Server currently in use */ + u8 preferred; /* Preferred server */ enum dns_record_source source:8; enum dns_lookup_status status:8; rwlock_t lock; @@ -461,8 +486,10 @@ struct afs_server { #define AFS_SERVER_FL_MAY_HAVE_CB 8 /* May have callbacks on this fileserver */ #define AFS_SERVER_FL_IS_YFS 9 /* Server is YFS not AFS */ #define AFS_SERVER_FL_NO_RM2 10 /* Fileserver doesn't support YFS.RemoveFile2 */ +#define AFS_SERVER_FL_HAVE_EPOCH 11 /* ->epoch is valid */ atomic_t usage; u32 addr_version; /* Address list version */ + u32 cm_epoch; /* Server RxRPC epoch */ /* file service access */ rwlock_t fs_lock; /* access lock */ @@ -471,6 +498,26 @@ struct afs_server { struct hlist_head cb_volumes; /* List of volume interests on this server */ unsigned cb_s_break; /* Break-everything counter. */ rwlock_t cb_break_lock; /* Volume finding lock */ + + /* Probe state */ + wait_queue_head_t probe_wq; + atomic_t probe_outstanding; + spinlock_t probe_lock; + struct { + unsigned int rtt; /* RTT as ktime/64 */ + u32 abort_code; + u32 cm_epoch; + short error; + bool have_result; + bool responded:1; + bool is_yfs:1; + bool not_yfs:1; + bool local_failure:1; + bool no_epoch:1; + bool cm_probed:1; + bool said_rebooted:1; + bool said_inconsistent:1; + } probe; }; /* @@ -505,8 +552,8 @@ struct afs_server_entry { struct afs_server_list { refcount_t usage; - unsigned short nr_servers; - unsigned short index; /* Server currently in use */ + unsigned char nr_servers; + unsigned char preferred; /* Preferred server */ unsigned short vnovol_mask; /* Servers to be skipped due to VNOVOL */ unsigned int seq; /* Set to ->servers_seq when installed */ rwlock_t lock; @@ -653,13 +700,12 @@ struct afs_interface { */ struct afs_addr_cursor { struct afs_addr_list *alist; /* Current address list (pins ref) */ - u32 abort_code; - unsigned short start; /* Starting point in alist->addrs[] */ - unsigned short index; /* Wrapping offset from start to current addr */ - short error; - bool begun; /* T if we've begun iteration */ + unsigned long tried; /* Tried addresses */ + signed char index; /* Current address */ bool responded; /* T if the current address responded */ unsigned short nr_iterations; /* Number of address iterations */ + short error; + u32 abort_code; }; /* @@ -669,9 +715,10 @@ struct afs_vl_cursor { struct afs_addr_cursor ac; struct afs_cell *cell; /* The cell we're querying */ struct afs_vlserver_list *server_list; /* Current server list (pins ref) */ + struct afs_vlserver *server; /* Server on which this resides */ struct key *key; /* Key for the server */ - unsigned char start; /* Initial index in server list */ - unsigned char index; /* Number of servers tried beyond start */ + unsigned long untried; /* Bitmask of untried servers */ + short index; /* Current server */ short error; unsigned short flags; #define AFS_VL_CURSOR_STOP 0x0001 /* Set to cease iteration */ @@ -689,10 +736,10 @@ struct afs_fs_cursor { struct afs_server_list *server_list; /* Current server list (pins ref) */ struct afs_cb_interest *cbi; /* Server on which this resides (pins ref) */ struct key *key; /* Key for the server */ + unsigned long untried; /* Bitmask of untried servers */ unsigned int cb_break; /* cb_break + cb_s_break before the call */ unsigned int cb_break_2; /* cb_break + cb_s_break (2nd vnode) */ - unsigned char start; /* Initial index in server list */ - unsigned char index; /* Number of servers tried beyond start */ + short index; /* Current server */ short error; unsigned short flags; #define AFS_FS_CURSOR_STOP 0x0001 /* Set to cease iteration */ @@ -888,7 +935,7 @@ extern int afs_fs_release_lock(struct afs_fs_cursor *); extern int afs_fs_give_up_all_callbacks(struct afs_net *, struct afs_server *, struct afs_addr_cursor *, struct key *); extern int afs_fs_get_capabilities(struct afs_net *, struct afs_server *, - struct afs_addr_cursor *, struct key *); + struct afs_addr_cursor *, struct key *, unsigned int, bool); extern int afs_fs_inline_bulk_status(struct afs_fs_cursor *, struct afs_net *, struct afs_fid *, struct afs_file_status *, struct afs_callback *, unsigned int, @@ -897,6 +944,13 @@ extern int afs_fs_fetch_status(struct afs_fs_cursor *, struct afs_net *, struct afs_fid *, struct afs_file_status *, struct afs_callback *, struct afs_volsync *); +/* + * fs_probe.c + */ +extern void afs_fileserver_probe_result(struct afs_call *); +extern int afs_probe_fileservers(struct afs_net *, struct key *, struct afs_server_list *); +extern int afs_wait_for_fs_probes(struct afs_server_list *, unsigned long); + /* * inode.c */ @@ -1013,7 +1067,6 @@ extern int __net_init afs_open_socket(struct afs_net *); extern void __net_exit afs_close_socket(struct afs_net *); extern void afs_charge_preallocation(struct work_struct *); extern void afs_put_call(struct afs_call *); -extern int afs_queue_call_work(struct afs_call *); extern long afs_make_call(struct afs_addr_cursor *, struct afs_call *, gfp_t, bool); extern struct afs_call *afs_alloc_flat_call(struct afs_net *, const struct afs_call_type *, @@ -1130,7 +1183,6 @@ extern void afs_put_server(struct afs_net *, struct afs_server *); extern void afs_manage_servers(struct work_struct *); extern void afs_servers_timer(struct timer_list *); extern void __net_exit afs_purge_servers(struct afs_net *); -extern bool afs_probe_fileserver(struct afs_fs_cursor *); extern bool afs_check_server_record(struct afs_fs_cursor *, struct afs_server *); /* @@ -1160,9 +1212,17 @@ extern void afs_fs_exit(void); extern struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_vl_cursor *, const char *, int); extern struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *, const uuid_t *); -extern int afs_vl_get_capabilities(struct afs_net *, struct afs_addr_cursor *, struct key *); +extern int afs_vl_get_capabilities(struct afs_net *, struct afs_addr_cursor *, struct key *, + struct afs_vlserver *, unsigned int, bool); extern struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *, const uuid_t *); +/* + * vl_probe.c + */ +extern void afs_vlserver_probe_result(struct afs_call *); +extern int afs_send_vl_probes(struct afs_net *, struct key *, struct afs_vlserver_list *); +extern int afs_wait_for_vl_probes(struct afs_vlserver_list *, unsigned long); + /* * vl_rotate.c */ diff --git a/fs/afs/proc.c b/fs/afs/proc.c index d887f822f4eb..be2ee3bbd0a9 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -312,7 +312,7 @@ static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v) if (alist) { for (i = 0; i < alist->nr_addrs; i++) seq_printf(m, " %c %pISpc\n", - alist->index == i ? '>' : '-', + alist->preferred == i ? '>' : '-', &alist->addrs[i].transport); } return 0; @@ -391,11 +391,11 @@ static int afs_proc_servers_show(struct seq_file *m, void *v) &server->uuid, atomic_read(&server->usage), &alist->addrs[0].transport, - alist->index == 0 ? "*" : ""); + alist->preferred == 0 ? "*" : ""); for (i = 1; i < alist->nr_addrs; i++) seq_printf(m, " %pISpc%s\n", &alist->addrs[i].transport, - alist->index == i ? "*" : ""); + alist->preferred == i ? "*" : ""); return 0; } diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c index 7c4487781637..00504254c1c2 100644 --- a/fs/afs/rotate.c +++ b/fs/afs/rotate.c @@ -18,14 +18,6 @@ #include "internal.h" #include "afs_fs.h" -/* - * Initialise a filesystem server cursor for iterating over FS servers. - */ -static void afs_init_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode) -{ - memset(fc, 0, sizeof(*fc)); -} - /* * Begin an operation on the fileserver. * @@ -35,7 +27,7 @@ static void afs_init_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode bool afs_begin_vnode_operation(struct afs_fs_cursor *fc, struct afs_vnode *vnode, struct key *key) { - afs_init_fs_cursor(fc, vnode); + memset(fc, 0, sizeof(*fc)); fc->vnode = vnode; fc->key = key; fc->ac.error = SHRT_MAX; @@ -66,12 +58,15 @@ static bool afs_start_fs_iteration(struct afs_fs_cursor *fc, fc->server_list = afs_get_serverlist(vnode->volume->servers); read_unlock(&vnode->volume->servers_lock); + fc->untried = (1UL << fc->server_list->nr_servers) - 1; + fc->index = READ_ONCE(fc->server_list->preferred); + cbi = vnode->cb_interest; if (cbi) { /* See if the vnode's preferred record is still available */ for (i = 0; i < fc->server_list->nr_servers; i++) { if (fc->server_list->servers[i].cb_interest == cbi) { - fc->start = i; + fc->index = i; goto found_interest; } } @@ -95,12 +90,9 @@ static bool afs_start_fs_iteration(struct afs_fs_cursor *fc, afs_put_cb_interest(afs_v2net(vnode), cbi); cbi = NULL; - } else { - fc->start = READ_ONCE(fc->server_list->index); } found_interest: - fc->index = fc->start; return true; } @@ -144,11 +136,12 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc) struct afs_addr_list *alist; struct afs_server *server; struct afs_vnode *vnode = fc->vnode; - int error = fc->ac.error; + u32 rtt, abort_code; + int error = fc->ac.error, i; - _enter("%u/%u,%u/%u,%d,%d", - fc->index, fc->start, - fc->ac.index, fc->ac.start, + _enter("%lx[%d],%lx[%d],%d,%d", + fc->untried, fc->index, + fc->ac.tried, fc->ac.index, error, fc->ac.abort_code); if (fc->flags & AFS_FS_CURSOR_STOP) { @@ -345,8 +338,50 @@ start: if (!afs_start_fs_iteration(fc, vnode)) goto failed; -use_server: - _debug("use"); + _debug("__ VOL %llx __", vnode->volume->vid); + error = afs_probe_fileservers(afs_v2net(vnode), fc->key, fc->server_list); + if (error < 0) + goto failed_set_error; + +pick_server: + _debug("pick [%lx]", fc->untried); + + error = afs_wait_for_fs_probes(fc->server_list, fc->untried); + if (error < 0) + goto failed_set_error; + + /* Pick the untried server with the lowest RTT. If we have outstanding + * callbacks, we stick with the server we're already using if we can. + */ + if (fc->cbi) { + _debug("cbi %u", fc->index); + if (test_bit(fc->index, &fc->untried)) + goto selected_server; + afs_put_cb_interest(afs_v2net(vnode), fc->cbi); + fc->cbi = NULL; + _debug("nocbi"); + } + + fc->index = -1; + rtt = U32_MAX; + for (i = 0; i < fc->server_list->nr_servers; i++) { + struct afs_server *s = fc->server_list->servers[i].server; + + if (!test_bit(i, &fc->untried) || !s->probe.responded) + continue; + if (s->probe.rtt < rtt) { + fc->index = i; + rtt = s->probe.rtt; + } + } + + if (fc->index == -1) + goto no_more_servers; + +selected_server: + _debug("use %d", fc->index); + __clear_bit(fc->index, &fc->untried); + /* We're starting on a different fileserver from the list. We need to * check it, create a callback intercept, find its address list and * probe its capabilities before we use it. @@ -379,60 +414,81 @@ use_server: memset(&fc->ac, 0, sizeof(fc->ac)); - /* Probe the current fileserver if we haven't done so yet. */ - if (!test_bit(AFS_SERVER_FL_PROBED, &server->flags)) { - fc->ac.alist = afs_get_addrlist(alist); - - if (!afs_probe_fileserver(fc)) { - switch (fc->ac.error) { - case -ENOMEM: - case -ERESTARTSYS: - case -EINTR: - goto failed; - default: - goto next_server; - } - } - } - if (!fc->ac.alist) fc->ac.alist = alist; else afs_put_addrlist(alist); - fc->ac.start = READ_ONCE(alist->index); - fc->ac.index = fc->ac.start; + fc->ac.index = -1; iterate_address: ASSERT(fc->ac.alist); - _debug("iterate %d/%d", fc->ac.index, fc->ac.alist->nr_addrs); /* Iterate over the current server's address list to try and find an * address on which it will respond to us. */ if (!afs_iterate_addresses(&fc->ac)) goto next_server; + _debug("address [%u] %u/%u", fc->index, fc->ac.index, fc->ac.alist->nr_addrs); + _leave(" = t"); return true; next_server: _debug("next"); afs_end_cursor(&fc->ac); - afs_put_cb_interest(afs_v2net(vnode), fc->cbi); - fc->cbi = NULL; - fc->index++; - if (fc->index >= fc->server_list->nr_servers) - fc->index = 0; - if (fc->index != fc->start) - goto use_server; + goto pick_server; +no_more_servers: /* That's all the servers poked to no good effect. Try again if some * of them were busy. */ if (fc->flags & AFS_FS_CURSOR_VBUSY) goto restart_from_beginning; - goto failed; + abort_code = 0; + error = -EDESTADDRREQ; + for (i = 0; i < fc->server_list->nr_servers; i++) { + struct afs_server *s = fc->server_list->servers[i].server; + int probe_error = READ_ONCE(s->probe.error); + + switch (probe_error) { + case 0: + continue; + default: + if (error == -ETIMEDOUT || + error == -ETIME) + continue; + case -ETIMEDOUT: + case -ETIME: + if (error == -ENOMEM || + error == -ENONET) + continue; + case -ENOMEM: + case -ENONET: + if (error == -ENETUNREACH) + continue; + case -ENETUNREACH: + if (error == -EHOSTUNREACH) + continue; + case -EHOSTUNREACH: + if (error == -ECONNREFUSED) + continue; + case -ECONNREFUSED: + if (error == -ECONNRESET) + continue; + case -ECONNRESET: /* Responded, but call expired. */ + if (error == -ECONNABORTED) + continue; + case -ECONNABORTED: + abort_code = s->probe.abort_code; + error = probe_error; + continue; + } + } + + if (error == -ECONNABORTED) + error = afs_abort_to_error(abort_code); failed_set_error: fc->error = error; @@ -480,8 +536,7 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc) memset(&fc->ac, 0, sizeof(fc->ac)); fc->ac.alist = alist; - fc->ac.start = READ_ONCE(alist->index); - fc->ac.index = fc->ac.start; + fc->ac.index = -1; goto iterate_address; case 0: @@ -538,13 +593,13 @@ static void afs_dump_edestaddrreq(const struct afs_fs_cursor *fc) pr_notice("EDESTADDR occurred\n"); pr_notice("FC: cbb=%x cbb2=%x fl=%hx err=%hd\n", fc->cb_break, fc->cb_break_2, fc->flags, fc->error); - pr_notice("FC: st=%u ix=%u ni=%u\n", - fc->start, fc->index, fc->nr_iterations); + pr_notice("FC: ut=%lx ix=%d ni=%u\n", + fc->untried, fc->index, fc->nr_iterations); if (fc->server_list) { const struct afs_server_list *sl = fc->server_list; - pr_notice("FC: SL nr=%u ix=%u vnov=%hx\n", - sl->nr_servers, sl->index, sl->vnovol_mask); + pr_notice("FC: SL nr=%u pr=%u vnov=%hx\n", + sl->nr_servers, sl->preferred, sl->vnovol_mask); for (i = 0; i < sl->nr_servers; i++) { const struct afs_server *s = sl->servers[i].server; pr_notice("FC: server fl=%lx av=%u %pU\n", @@ -552,22 +607,21 @@ static void afs_dump_edestaddrreq(const struct afs_fs_cursor *fc) if (s->addresses) { const struct afs_addr_list *a = rcu_dereference(s->addresses); - pr_notice("FC: - av=%u nr=%u/%u/%u ax=%u\n", + pr_notice("FC: - av=%u nr=%u/%u/%u pr=%u\n", a->version, a->nr_ipv4, a->nr_addrs, a->max_addrs, - a->index); - pr_notice("FC: - pr=%lx yf=%lx\n", - a->probed, a->yfs); + a->preferred); + pr_notice("FC: - pr=%lx R=%lx F=%lx\n", + a->probed, a->responded, a->failed); if (a == fc->ac.alist) pr_notice("FC: - current\n"); } } } - pr_notice("AC: as=%u ax=%u ac=%d er=%d b=%u r=%u ni=%u\n", - fc->ac.start, fc->ac.index, fc->ac.abort_code, fc->ac.error, - fc->ac.begun, fc->ac.responded, fc->ac.nr_iterations); - + pr_notice("AC: t=%lx ax=%u ac=%d er=%d r=%u ni=%u\n", + fc->ac.tried, fc->ac.index, fc->ac.abort_code, fc->ac.error, + fc->ac.responded, fc->ac.nr_iterations); rcu_read_unlock(); } diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 42e1ea7372e9..59970886690f 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -43,7 +43,6 @@ int afs_open_socket(struct afs_net *net) struct sockaddr_rxrpc srx; struct socket *socket; unsigned int min_level; - u16 service_upgrade[2]; int ret; _enter(""); @@ -82,13 +81,12 @@ int afs_open_socket(struct afs_net *net) if (ret < 0) goto error_2; - service_upgrade[0] = CM_SERVICE; - service_upgrade[1] = YFS_CM_SERVICE; - ret = kernel_setsockopt(socket, SOL_RXRPC, RXRPC_UPGRADEABLE_SERVICE, - (void *)service_upgrade, sizeof(service_upgrade)); - if (ret < 0) - goto error_2; - + /* Ideally, we'd turn on service upgrade here, but we can't because + * OpenAFS is buggy and leaks the userStatus field from packet to + * packet and between FS packets and CB packets - so if we try to do an + * upgrade on an FS packet, OpenAFS will leak that into the CB packet + * it sends back to us. + */ rxrpc_kernel_new_call_notification(socket, afs_rx_new_call, afs_rx_discard_new_call); @@ -192,6 +190,7 @@ void afs_put_call(struct afs_call *call) afs_put_server(call->net, call->cm_server); afs_put_cb_interest(call->net, call->cbi); + afs_put_addrlist(call->alist); kfree(call->request); trace_afs_call(call, afs_call_trace_free, 0, o, @@ -205,21 +204,22 @@ void afs_put_call(struct afs_call *call) } /* - * Queue the call for actual work. Returns 0 unconditionally for convenience. + * Queue the call for actual work. */ -int afs_queue_call_work(struct afs_call *call) +static void afs_queue_call_work(struct afs_call *call) { - int u = atomic_inc_return(&call->usage); + if (call->type->work) { + int u = atomic_inc_return(&call->usage); - trace_afs_call(call, afs_call_trace_work, u, - atomic_read(&call->net->nr_outstanding_calls), - __builtin_return_address(0)); + trace_afs_call(call, afs_call_trace_work, u, + atomic_read(&call->net->nr_outstanding_calls), + __builtin_return_address(0)); - INIT_WORK(&call->work, call->type->work); + INIT_WORK(&call->work, call->type->work); - if (!queue_work(afs_wq, &call->work)) - afs_put_call(call); - return 0; + if (!queue_work(afs_wq, &call->work)) + afs_put_call(call); + } } /* @@ -376,6 +376,8 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, atomic_read(&call->net->nr_outstanding_calls)); call->async = async; + call->addr_ix = ac->index; + call->alist = afs_get_addrlist(ac->alist); /* Work out the length we're going to transmit. This is awkward for * calls such as FS.StoreData where there's an extra injection of data @@ -407,6 +409,7 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, call->debug_id); if (IS_ERR(rxcall)) { ret = PTR_ERR(rxcall); + call->error = ret; goto error_kill_call; } @@ -458,6 +461,8 @@ error_do_abort: call->error = ret; trace_afs_call_done(call); error_kill_call: + if (call->type->done) + call->type->done(call); afs_put_call(call); ac->error = ret; _leave(" = %d", ret); @@ -509,6 +514,7 @@ static void afs_deliver_to_call(struct afs_call *call) state = READ_ONCE(call->state); switch (ret) { case 0: + afs_queue_call_work(call); if (state == AFS_CALL_CL_PROC_REPLY) { if (call->cbi) set_bit(AFS_SERVER_FL_MAY_HAVE_CB, @@ -546,6 +552,8 @@ static void afs_deliver_to_call(struct afs_call *call) } done: + if (call->type->done) + call->type->done(call); if (state == AFS_CALL_COMPLETE && call->incoming) afs_put_call(call); out: diff --git a/fs/afs/server.c b/fs/afs/server.c index 7c1be8b4dc9a..642afa2e9783 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -231,6 +231,8 @@ static struct afs_server *afs_alloc_server(struct afs_net *net, rwlock_init(&server->fs_lock); INIT_HLIST_HEAD(&server->cb_volumes); rwlock_init(&server->cb_break_lock); + init_waitqueue_head(&server->probe_wq); + spin_lock_init(&server->probe_lock); afs_inc_servers_outstanding(net); _leave(" = %p", server); @@ -254,7 +256,7 @@ static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell, ret = -ERESTARTSYS; if (afs_begin_vlserver_operation(&vc, cell, key)) { while (afs_select_vlserver(&vc)) { - if (test_bit(vc.ac.index, &vc.ac.alist->yfs)) + if (test_bit(AFS_VLSERVER_FL_IS_YFS, &vc.server->flags)) alist = afs_yfsvl_get_endpoints(&vc, uuid); else alist = afs_vl_get_addrs_u(&vc, uuid); @@ -365,8 +367,7 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server) struct afs_addr_list *alist = rcu_access_pointer(server->addresses); struct afs_addr_cursor ac = { .alist = alist, - .start = alist->index, - .index = 0, + .index = alist->preferred, .error = 0, }; _enter("%p", server); @@ -374,6 +375,9 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server) if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags)) afs_fs_give_up_all_callbacks(net, server, &ac, NULL); + wait_var_event(&server->probe_outstanding, + atomic_read(&server->probe_outstanding) == 0); + call_rcu(&server->rcu, afs_server_rcu); afs_dec_servers_outstanding(net); } @@ -506,105 +510,6 @@ void afs_purge_servers(struct afs_net *net) _leave(""); } -/* - * Probe a fileserver to find its capabilities. - * - * TODO: Try service upgrade. - */ -static bool afs_do_probe_fileserver(struct afs_fs_cursor *fc) -{ - int i; - - _enter(""); - - fc->ac.start = READ_ONCE(fc->ac.alist->index); - fc->ac.index = fc->ac.start; - fc->ac.error = 0; - fc->ac.begun = false; - - while (afs_iterate_addresses(&fc->ac)) { - afs_fs_get_capabilities(afs_v2net(fc->vnode), fc->cbi->server, - &fc->ac, fc->key); - switch (fc->ac.error) { - case 0: - if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) { - for (i = 0; i < fc->ac.alist->nr_addrs; i++) - fc->ac.alist->addrs[i].srx_service = - YFS_FS_SERVICE; - } - afs_end_cursor(&fc->ac); - set_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags); - return true; - case -ECONNABORTED: - fc->ac.error = afs_abort_to_error(fc->ac.abort_code); - goto error; - case -ENOMEM: - case -ENONET: - goto error; - case -ENETUNREACH: - case -EHOSTUNREACH: - case -ECONNREFUSED: - case -ETIMEDOUT: - case -ETIME: - break; - default: - fc->ac.error = afs_io_error(NULL, afs_io_error_fs_probe_fail); - goto error; - } - } - -error: - afs_end_cursor(&fc->ac); - return false; -} - -/* - * If we haven't already, try probing the fileserver to get its capabilities. - * We try not to instigate parallel probes, but it's possible that the parallel - * probes will fail due to authentication failure when ours would succeed. - * - * TODO: Try sending an anonymous probe if an authenticated probe fails. - */ -bool afs_probe_fileserver(struct afs_fs_cursor *fc) -{ - bool success; - int ret, retries = 0; - - _enter(""); - -retry: - if (test_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags)) { - _leave(" = t"); - return true; - } - - if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags)) { - success = afs_do_probe_fileserver(fc); - clear_bit_unlock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags); - wake_up_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING); - _leave(" = t"); - return success; - } - - _debug("wait"); - ret = wait_on_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING, - TASK_INTERRUPTIBLE); - if (ret == -ERESTARTSYS) { - fc->ac.error = ret; - _leave(" = f [%d]", ret); - return false; - } - - retries++; - if (retries == 4) { - fc->ac.error = -ESTALE; - _leave(" = f [stale]"); - return false; - } - _debug("retry"); - goto retry; -} - /* * Get an update for a server's address list. */ diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c index 8a5760aa5832..95d0761cdb34 100644 --- a/fs/afs/server_list.c +++ b/fs/afs/server_list.c @@ -118,11 +118,11 @@ bool afs_annotate_server_list(struct afs_server_list *new, return false; changed: - /* Maintain the same current server as before if possible. */ - cur = old->servers[old->index].server; + /* Maintain the same preferred server as before if possible. */ + cur = old->servers[old->preferred].server; for (j = 0; j < new->nr_servers; j++) { if (new->servers[j].server == cur) { - new->index = j; + new->preferred = j; break; } } diff --git a/fs/afs/vl_list.c b/fs/afs/vl_list.c index c1e316ba105a..b4f1a84519b9 100644 --- a/fs/afs/vl_list.c +++ b/fs/afs/vl_list.c @@ -23,6 +23,8 @@ struct afs_vlserver *afs_alloc_vlserver(const char *name, size_t name_len, if (vlserver) { atomic_set(&vlserver->usage, 1); rwlock_init(&vlserver->lock); + init_waitqueue_head(&vlserver->probe_wq); + spin_lock_init(&vlserver->probe_lock); vlserver->name_len = name_len; vlserver->port = port; memcpy(vlserver->name, name, name_len); @@ -141,7 +143,7 @@ static struct afs_addr_list *afs_extract_vl_addrs(const u8 **_b, const u8 *end, /* Start with IPv6 if available. */ if (alist->nr_ipv4 < alist->nr_addrs) - alist->index = alist->nr_ipv4; + alist->preferred = alist->nr_ipv4; *_b = b; return alist; @@ -307,6 +309,8 @@ struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *cell, (vllist->nr_servers - j) * sizeof(struct afs_vlserver_entry)); } + clear_bit(AFS_VLSERVER_FL_PROBED, &server->flags); + vllist->servers[j].priority = bs.priority; vllist->servers[j].weight = bs.weight; vllist->servers[j].server = server; diff --git a/fs/afs/vl_probe.c b/fs/afs/vl_probe.c new file mode 100644 index 000000000000..c0f616bd70cb --- /dev/null +++ b/fs/afs/vl_probe.c @@ -0,0 +1,273 @@ +/* AFS vlserver probing + * + * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include +#include +#include "afs_fs.h" +#include "internal.h" +#include "protocol_yfs.h" + +static bool afs_vl_probe_done(struct afs_vlserver *server) +{ + if (!atomic_dec_and_test(&server->probe_outstanding)) + return false; + + wake_up_var(&server->probe_outstanding); + clear_bit_unlock(AFS_VLSERVER_FL_PROBING, &server->flags); + wake_up_bit(&server->flags, AFS_VLSERVER_FL_PROBING); + return true; +} + +/* + * Process the result of probing a vlserver. This is called after successful + * or failed delivery of an VL.GetCapabilities operation. + */ +void afs_vlserver_probe_result(struct afs_call *call) +{ + struct afs_addr_list *alist = call->alist; + struct afs_vlserver *server = call->reply[0]; + unsigned int server_index = (long)call->reply[1]; + unsigned int index = call->addr_ix; + unsigned int rtt = UINT_MAX; + bool have_result = false; + u64 _rtt; + int ret = call->error; + + _enter("%s,%u,%u,%d,%d", server->name, server_index, index, ret, call->abort_code); + + spin_lock(&server->probe_lock); + + switch (ret) { + case 0: + server->probe.error = 0; + goto responded; + case -ECONNABORTED: + if (!server->probe.responded) { + server->probe.abort_code = call->abort_code; + server->probe.error = ret; + } + goto responded; + case -ENOMEM: + case -ENONET: + server->probe.local_failure = true; + afs_io_error(call, afs_io_error_vl_probe_fail); + goto out; + case -ECONNRESET: /* Responded, but call expired. */ + case -ENETUNREACH: + case -EHOSTUNREACH: + case -ECONNREFUSED: + case -ETIMEDOUT: + case -ETIME: + default: + clear_bit(index, &alist->responded); + set_bit(index, &alist->failed); + if (!server->probe.responded && + (server->probe.error == 0 || + server->probe.error == -ETIMEDOUT || + server->probe.error == -ETIME)) + server->probe.error = ret; + afs_io_error(call, afs_io_error_vl_probe_fail); + goto out; + } + +responded: + set_bit(index, &alist->responded); + clear_bit(index, &alist->failed); + + if (call->service_id == YFS_VL_SERVICE) { + server->probe.is_yfs = true; + set_bit(AFS_VLSERVER_FL_IS_YFS, &server->flags); + alist->addrs[index].srx_service = call->service_id; + } else { + server->probe.not_yfs = true; + if (!server->probe.is_yfs) { + clear_bit(AFS_VLSERVER_FL_IS_YFS, &server->flags); + alist->addrs[index].srx_service = call->service_id; + } + } + + /* Get the RTT and scale it to fit into a 32-bit value that represents + * over a minute of time so that we can access it with one instruction + * on a 32-bit system. + */ + _rtt = rxrpc_kernel_get_rtt(call->net->socket, call->rxcall); + _rtt /= 64; + rtt = (_rtt > UINT_MAX) ? UINT_MAX : _rtt; + if (rtt < server->probe.rtt) { + server->probe.rtt = rtt; + alist->preferred = index; + have_result = true; + } + + smp_wmb(); /* Set rtt before responded. */ + server->probe.responded = true; + set_bit(AFS_VLSERVER_FL_PROBED, &server->flags); +out: + spin_unlock(&server->probe_lock); + + _debug("probe [%u][%u] %pISpc rtt=%u ret=%d", + server_index, index, &alist->addrs[index].transport, + (unsigned int)rtt, ret); + + have_result |= afs_vl_probe_done(server); + if (have_result) { + server->probe.have_result = true; + wake_up_var(&server->probe.have_result); + wake_up_all(&server->probe_wq); + } +} + +/* + * Probe all of a vlserver's addresses to find out the best route and to + * query its capabilities. + */ +static int afs_do_probe_vlserver(struct afs_net *net, + struct afs_vlserver *server, + struct key *key, + unsigned int server_index) +{ + struct afs_addr_cursor ac = { + .index = 0, + }; + int ret; + + _enter("%s", server->name); + + read_lock(&server->lock); + ac.alist = rcu_dereference_protected(server->addresses, + lockdep_is_held(&server->lock)); + read_unlock(&server->lock); + + atomic_set(&server->probe_outstanding, ac.alist->nr_addrs); + memset(&server->probe, 0, sizeof(server->probe)); + server->probe.rtt = UINT_MAX; + + for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++) { + ret = afs_vl_get_capabilities(net, &ac, key, server, + server_index, true); + if (ret != -EINPROGRESS) { + afs_vl_probe_done(server); + return ret; + } + } + + return 0; +} + +/* + * Send off probes to all unprobed servers. + */ +int afs_send_vl_probes(struct afs_net *net, struct key *key, + struct afs_vlserver_list *vllist) +{ + struct afs_vlserver *server; + int i, ret; + + for (i = 0; i < vllist->nr_servers; i++) { + server = vllist->servers[i].server; + if (test_bit(AFS_VLSERVER_FL_PROBED, &server->flags)) + continue; + + if (!test_and_set_bit_lock(AFS_VLSERVER_FL_PROBING, &server->flags)) { + ret = afs_do_probe_vlserver(net, server, key, i); + if (ret) + return ret; + } + } + + return 0; +} + +/* + * Wait for the first as-yet untried server to respond. + */ +int afs_wait_for_vl_probes(struct afs_vlserver_list *vllist, + unsigned long untried) +{ + struct wait_queue_entry *waits; + struct afs_vlserver *server; + unsigned int rtt = UINT_MAX; + bool have_responders = false; + int pref = -1, i; + + _enter("%u,%lx", vllist->nr_servers, untried); + + /* Only wait for servers that have a probe outstanding. */ + for (i = 0; i < vllist->nr_servers; i++) { + if (test_bit(i, &untried)) { + server = vllist->servers[i].server; + if (!test_bit(AFS_VLSERVER_FL_PROBING, &server->flags)) + __clear_bit(i, &untried); + if (server->probe.responded) + have_responders = true; + } + } + if (have_responders || !untried) + return 0; + + waits = kmalloc(array_size(vllist->nr_servers, sizeof(*waits)), GFP_KERNEL); + if (!waits) + return -ENOMEM; + + for (i = 0; i < vllist->nr_servers; i++) { + if (test_bit(i, &untried)) { + server = vllist->servers[i].server; + init_waitqueue_entry(&waits[i], current); + add_wait_queue(&server->probe_wq, &waits[i]); + } + } + + for (;;) { + bool still_probing = false; + + set_current_state(TASK_INTERRUPTIBLE); + for (i = 0; i < vllist->nr_servers; i++) { + if (test_bit(i, &untried)) { + server = vllist->servers[i].server; + if (server->probe.responded) + goto stop; + if (test_bit(AFS_VLSERVER_FL_PROBING, &server->flags)) + still_probing = true; + } + } + + if (!still_probing || unlikely(signal_pending(current))) + goto stop; + schedule(); + } + +stop: + set_current_state(TASK_RUNNING); + + for (i = 0; i < vllist->nr_servers; i++) { + if (test_bit(i, &untried)) { + server = vllist->servers[i].server; + if (server->probe.responded && + server->probe.rtt < rtt) { + pref = i; + rtt = server->probe.rtt; + } + + remove_wait_queue(&server->probe_wq, &waits[i]); + } + } + + kfree(waits); + + if (pref == -1 && signal_pending(current)) + return -ERESTARTSYS; + + if (pref >= 0) + vllist->preferred = pref; + + _leave(" = 0 [%u]", pref); + return 0; +} diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c index ead6dedbb561..b64a284b99d2 100644 --- a/fs/afs/vl_rotate.c +++ b/fs/afs/vl_rotate.c @@ -58,8 +58,8 @@ static bool afs_start_vl_iteration(struct afs_vl_cursor *vc) if (!vc->server_list || !vc->server_list->nr_servers) return false; - vc->start = READ_ONCE(vc->server_list->index); - vc->index = vc->start; + vc->untried = (1UL << vc->server_list->nr_servers) - 1; + vc->index = -1; return true; } @@ -71,11 +71,12 @@ bool afs_select_vlserver(struct afs_vl_cursor *vc) { struct afs_addr_list *alist; struct afs_vlserver *vlserver; - int error = vc->ac.error; + u32 rtt; + int error = vc->ac.error, abort_code, i; - _enter("%u/%u,%u/%u,%d,%d", - vc->index, vc->start, - vc->ac.index, vc->ac.start, + _enter("%lx[%d],%lx[%d],%d,%d", + vc->untried, vc->index, + vc->ac.tried, vc->ac.index, error, vc->ac.abort_code); if (vc->flags & AFS_VL_CURSOR_STOP) { @@ -145,23 +146,52 @@ restart_from_beginning: start: _debug("start"); - /* TODO: Consider checking the VL server list */ - if (!afs_start_vl_iteration(vc)) goto failed; -use_server: - _debug("use"); + error = afs_send_vl_probes(vc->cell->net, vc->key, vc->server_list); + if (error < 0) + goto failed_set_error; + +pick_server: + _debug("pick [%lx]", vc->untried); + + error = afs_wait_for_vl_probes(vc->server_list, vc->untried); + if (error < 0) + goto failed_set_error; + + /* Pick the untried server with the lowest RTT. */ + vc->index = vc->server_list->preferred; + if (test_bit(vc->index, &vc->untried)) + goto selected_server; + + vc->index = -1; + rtt = U32_MAX; + for (i = 0; i < vc->server_list->nr_servers; i++) { + struct afs_vlserver *s = vc->server_list->servers[i].server; + + if (!test_bit(i, &vc->untried) || !s->probe.responded) + continue; + if (s->probe.rtt < rtt) { + vc->index = i; + rtt = s->probe.rtt; + } + } + + if (vc->index == -1) + goto no_more_servers; + +selected_server: + _debug("use %d", vc->index); + __clear_bit(vc->index, &vc->untried); + /* We're starting on a different vlserver from the list. We need to * check it, find its address list and probe its capabilities before we * use it. */ ASSERTCMP(vc->ac.alist, ==, NULL); vlserver = vc->server_list->servers[vc->index].server; - - // TODO: Check the vlserver occasionally - //if (!afs_check_vlserver_record(vc, vlserver)) - // goto failed; + vc->server = vlserver; _debug("USING VLSERVER: %s", vlserver->name); @@ -173,62 +203,84 @@ use_server: memset(&vc->ac, 0, sizeof(vc->ac)); - /* Probe the current vlserver if we haven't done so yet. */ -#if 0 // TODO - if (!test_bit(AFS_VLSERVER_FL_PROBED, &vlserver->flags)) { - vc->ac.alist = afs_get_addrlist(alist); - - if (!afs_probe_vlserver(vc)) { - error = vc->ac.error; - switch (error) { - case -ENOMEM: - case -ERESTARTSYS: - case -EINTR: - goto failed_set_error; - default: - goto next_server; - } - } - } -#endif - if (!vc->ac.alist) vc->ac.alist = alist; else afs_put_addrlist(alist); - vc->ac.start = READ_ONCE(alist->index); - vc->ac.index = vc->ac.start; + vc->ac.index = -1; iterate_address: ASSERT(vc->ac.alist); - _debug("iterate %d/%d", vc->ac.index, vc->ac.alist->nr_addrs); /* Iterate over the current server's address list to try and find an * address on which it will respond to us. */ if (!afs_iterate_addresses(&vc->ac)) goto next_server; + _debug("VL address %d/%d", vc->ac.index, vc->ac.alist->nr_addrs); + _leave(" = t %pISpc", &vc->ac.alist->addrs[vc->ac.index].transport); return true; next_server: _debug("next"); afs_end_cursor(&vc->ac); - vc->index++; - if (vc->index >= vc->server_list->nr_servers) - vc->index = 0; - if (vc->index != vc->start) - goto use_server; + goto pick_server; +no_more_servers: /* That's all the servers poked to no good effect. Try again if some * of them were busy. */ if (vc->flags & AFS_VL_CURSOR_RETRY) goto restart_from_beginning; - goto failed; + abort_code = 0; + error = -EDESTADDRREQ; + for (i = 0; i < vc->server_list->nr_servers; i++) { + struct afs_vlserver *s = vc->server_list->servers[i].server; + int probe_error = READ_ONCE(s->probe.error); + switch (probe_error) { + case 0: + continue; + default: + if (error == -ETIMEDOUT || + error == -ETIME) + continue; + case -ETIMEDOUT: + case -ETIME: + if (error == -ENOMEM || + error == -ENONET) + continue; + case -ENOMEM: + case -ENONET: + if (error == -ENETUNREACH) + continue; + case -ENETUNREACH: + if (error == -EHOSTUNREACH) + continue; + case -EHOSTUNREACH: + if (error == -ECONNREFUSED) + continue; + case -ECONNREFUSED: + if (error == -ECONNRESET) + continue; + case -ECONNRESET: /* Responded, but call expired. */ + if (error == -ECONNABORTED) + continue; + case -ECONNABORTED: + abort_code = s->probe.abort_code; + error = probe_error; + continue; + } + } + + if (error == -ECONNABORTED) + error = afs_abort_to_error(abort_code); + +failed_set_error: + vc->error = error; failed: vc->flags |= AFS_VL_CURSOR_STOP; afs_end_cursor(&vc->ac); @@ -250,8 +302,8 @@ static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc) rcu_read_lock(); pr_notice("EDESTADDR occurred\n"); - pr_notice("VC: st=%u ix=%u ni=%hu fl=%hx err=%hd\n", - vc->start, vc->index, vc->nr_iterations, vc->flags, vc->error); + pr_notice("VC: ut=%lx ix=%u ni=%hu fl=%hx err=%hd\n", + vc->untried, vc->index, vc->nr_iterations, vc->flags, vc->error); if (vc->server_list) { const struct afs_vlserver_list *sl = vc->server_list; @@ -259,26 +311,25 @@ static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc) sl->nr_servers, sl->index); for (i = 0; i < sl->nr_servers; i++) { const struct afs_vlserver *s = sl->servers[i].server; - pr_notice("VC: server fl=%lx %s+%hu\n", - s->flags, s->name, s->port); + pr_notice("VC: server %s+%hu fl=%lx E=%hd\n", + s->name, s->port, s->flags, s->probe.error); if (s->addresses) { const struct afs_addr_list *a = rcu_dereference(s->addresses); - pr_notice("VC: - av=%u nr=%u/%u/%u ax=%u\n", - a->version, + pr_notice("VC: - nr=%u/%u/%u pf=%u\n", a->nr_ipv4, a->nr_addrs, a->max_addrs, - a->index); - pr_notice("VC: - pr=%lx yf=%lx\n", - a->probed, a->yfs); + a->preferred); + pr_notice("VC: - pr=%lx R=%lx F=%lx\n", + a->probed, a->responded, a->failed); if (a == vc->ac.alist) pr_notice("VC: - current\n"); } } } - pr_notice("AC: as=%u ax=%u ac=%d er=%d b=%u r=%u ni=%hu\n", - vc->ac.start, vc->ac.index, vc->ac.abort_code, vc->ac.error, - vc->ac.begun, vc->ac.responded, vc->ac.nr_iterations); + pr_notice("AC: t=%lx ax=%u ac=%d er=%d r=%u ni=%u\n", + vc->ac.tried, vc->ac.index, vc->ac.abort_code, vc->ac.error, + vc->ac.responded, vc->ac.nr_iterations); rcu_read_unlock(); } diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c index 3127ab9b5521..c3d9e5a5f67e 100644 --- a/fs/afs/vlclient.c +++ b/fs/afs/vlclient.c @@ -348,12 +348,18 @@ static int afs_deliver_vl_get_capabilities(struct afs_call *call) break; } - call->reply[0] = (void *)(unsigned long)call->service_id; - _leave(" = 0 [done]"); return 0; } +static void afs_destroy_vl_get_capabilities(struct afs_call *call) +{ + struct afs_vlserver *server = call->reply[0]; + + afs_put_vlserver(call->net, server); + afs_flat_call_destructor(call); +} + /* * VL.GetCapabilities operation type */ @@ -361,7 +367,8 @@ static const struct afs_call_type afs_RXVLGetCapabilities = { .name = "VL.GetCapabilities", .op = afs_VL_GetCapabilities, .deliver = afs_deliver_vl_get_capabilities, - .destructor = afs_flat_call_destructor, + .done = afs_vlserver_probe_result, + .destructor = afs_destroy_vl_get_capabilities, }; /* @@ -371,8 +378,12 @@ static const struct afs_call_type afs_RXVLGetCapabilities = { * We use this to probe for service upgrade to determine what the server at the * other end supports. */ -int afs_vl_get_capabilities(struct afs_net *net, struct afs_addr_cursor *ac, - struct key *key) +int afs_vl_get_capabilities(struct afs_net *net, + struct afs_addr_cursor *ac, + struct key *key, + struct afs_vlserver *server, + unsigned int server_index, + bool async) { struct afs_call *call; __be32 *bp; @@ -384,9 +395,10 @@ int afs_vl_get_capabilities(struct afs_net *net, struct afs_addr_cursor *ac, return -ENOMEM; call->key = key; - call->upgrade = true; /* Let's see if this is a YFS server */ - call->reply[0] = (void *)VLGETCAPABILITIES; - call->ret_reply0 = true; + call->reply[0] = afs_get_vlserver(server); + call->reply[1] = (void *)(long)server_index; + call->upgrade = true; + call->want_reply_time = true; /* marshall the parameters */ bp = call->request; @@ -394,7 +406,7 @@ int afs_vl_get_capabilities(struct afs_net *net, struct afs_addr_cursor *ac, /* Can't take a ref on server */ trace_afs_make_vl_call(call); - return afs_make_call(ac, call, GFP_KERNEL, false); + return afs_make_call(ac, call, GFP_KERNEL, async); } /* @@ -591,11 +603,6 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call) } alist = call->reply[0]; - - /* Start with IPv6 if available. */ - if (alist->nr_ipv4 < alist->nr_addrs) - alist->index = alist->nr_ipv4; - _leave(" = 0 [done]"); return 0; } diff --git a/fs/afs/volume.c b/fs/afs/volume.c index 7527c081726e..00975ed3640f 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -82,22 +82,6 @@ static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell, return ERR_PTR(-ERESTARTSYS); while (afs_select_vlserver(&vc)) { - if (!test_bit(vc.ac.index, &vc.ac.alist->probed)) { - ret = afs_vl_get_capabilities(cell->net, &vc.ac, key); - switch (ret) { - case VL_SERVICE: - clear_bit(vc.ac.index, &vc.ac.alist->yfs); - set_bit(vc.ac.index, &vc.ac.alist->probed); - vc.ac.alist->addrs[vc.ac.index].srx_service = ret; - break; - case YFS_VL_SERVICE: - set_bit(vc.ac.index, &vc.ac.alist->yfs); - set_bit(vc.ac.index, &vc.ac.alist->probed); - vc.ac.alist->addrs[vc.ac.index].srx_service = ret; - break; - } - } - vldb = afs_vl_get_entry_by_name_u(&vc, volname, volnamesz); } diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index ed155042236b..33d291888ba9 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -137,6 +137,7 @@ enum afs_io_error { afs_io_error_extract, afs_io_error_fs_probe_fail, afs_io_error_vl_lookup_fail, + afs_io_error_vl_probe_fail, }; enum afs_file_error { @@ -261,7 +262,8 @@ enum afs_file_error { EM(afs_io_error_cm_reply, "CM_REPLY") \ EM(afs_io_error_extract, "EXTRACT") \ EM(afs_io_error_fs_probe_fail, "FS_PROBE_FAIL") \ - E_(afs_io_error_vl_lookup_fail, "VL_LOOKUP_FAIL") + EM(afs_io_error_vl_lookup_fail, "VL_LOOKUP_FAIL") \ + E_(afs_io_error_vl_probe_fail, "VL_PROBE_FAIL") #define afs_file_errors \ EM(afs_file_error_dir_bad_magic, "DIR_BAD_MAGIC") \ From 0e9b4a82710220c04100892fb7277b78fd33a747 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 1 Nov 2018 18:19:03 -0400 Subject: [PATCH 26/26] missing bits of "iov_iter: Separate type from direction and use accessor functions" sunrpc patches from nfs tree conflict with calling conventions change done in iov_iter work. Trivial fixup... Signed-off-by: Al Viro --- net/sunrpc/xprtsock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 1b51e04d3566..ae77c71c1f64 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -361,7 +361,7 @@ static ssize_t xs_read_kvec(struct socket *sock, struct msghdr *msg, int flags, struct kvec *kvec, size_t count, size_t seek) { - iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, kvec, 1, count); + iov_iter_kvec(&msg->msg_iter, READ, kvec, 1, count); return xs_sock_recvmsg(sock, msg, flags, seek); } @@ -370,7 +370,7 @@ xs_read_bvec(struct socket *sock, struct msghdr *msg, int flags, struct bio_vec *bvec, unsigned long nr, size_t count, size_t seek) { - iov_iter_bvec(&msg->msg_iter, READ | ITER_BVEC, bvec, nr, count); + iov_iter_bvec(&msg->msg_iter, READ, bvec, nr, count); return xs_sock_recvmsg(sock, msg, flags, seek); }