From 3ed5e2a2c394df4e03a680842c2d07a8680f133b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 4 Mar 2013 17:29:33 -0500 Subject: [PATCH 01/67] SUNRPC: Report network/connection errors correctly for SOFTCONN rpc tasks In the case of a SOFTCONN rpc task, we really want to ensure that it reports errors like ENETUNREACH back to the caller. Currently, only some of these errors are being reported back (connect errors are not), and they are being converted by the RPC layer into EIO. Reported-by: Jan Engelhardt Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 24 ++++++++++++++---------- net/sunrpc/xprtsock.c | 8 ++++---- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index dcc446e7fbf6..b95a0a2d5eea 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1644,22 +1644,26 @@ call_connect_status(struct rpc_task *task) dprint_status(task); - task->tk_status = 0; - if (status >= 0 || status == -EAGAIN) { - clnt->cl_stats->netreconn++; - task->tk_action = call_transmit; - return; - } - trace_rpc_connect_status(task, status); switch (status) { /* if soft mounted, test if we've timed out */ case -ETIMEDOUT: task->tk_action = call_timeout; - break; - default: - rpc_exit(task, -EIO); + return; + case -ECONNREFUSED: + case -ECONNRESET: + case -ENETUNREACH: + if (RPC_IS_SOFTCONN(task)) + break; + /* retry with existing socket, after a delay */ + case 0: + case -EAGAIN: + task->tk_status = 0; + clnt->cl_stats->netreconn++; + task->tk_action = call_transmit; + return; } + rpc_exit(task, status); } /* diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index c1d8476b7692..3081620cb02c 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2202,10 +2202,6 @@ static void xs_tcp_setup_socket(struct work_struct *work) */ xs_tcp_force_close(xprt); break; - case -ECONNREFUSED: - case -ECONNRESET: - case -ENETUNREACH: - /* retry with existing socket, after a delay */ case 0: case -EINPROGRESS: case -EALREADY: @@ -2216,6 +2212,10 @@ static void xs_tcp_setup_socket(struct work_struct *work) /* Happens, for instance, if the user specified a link * local IPv6 address without a scope-id. */ + case -ECONNREFUSED: + case -ECONNRESET: + case -ENETUNREACH: + /* retry with existing socket, after a delay */ goto out; } out_eagain: From 5d422301f97b821301efcdb6fc9d1a83a5c102d6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 14 Mar 2013 16:57:48 -0400 Subject: [PATCH 02/67] NFSv4: Fail I/O if the state recovery fails irrevocably If state recovery fails with an ESTALE or a ENOENT, then we shouldn't keep retrying. Instead, mark the stateid as being invalid and fail the I/O with an EIO error. For other operations such as POSIX and BSD file locking, truncate etc, fail with an EBADF to indicate that this file descriptor is no longer valid. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 8 +++++++- fs/nfs/nfs4filelayout.c | 12 +++++++++--- fs/nfs/nfs4proc.c | 37 +++++++++++++++++++++++++++++-------- fs/nfs/nfs4state.c | 19 ++++++++++++++----- fs/nfs/pnfs.c | 2 ++ 5 files changed, 61 insertions(+), 17 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 944c9a5c1039..9ce90135bf22 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -149,6 +149,7 @@ enum { NFS_STATE_RECLAIM_REBOOT, /* OPEN stateid server rebooted */ NFS_STATE_RECLAIM_NOGRACE, /* OPEN stateid needs to recover state */ NFS_STATE_POSIX_LOCKS, /* Posix locks are supported */ + NFS_STATE_RECOVERY_FAILED, /* OPEN stateid state recovery failed */ }; struct nfs4_state { @@ -347,7 +348,7 @@ extern int nfs4_wait_clnt_recover(struct nfs_client *clp); extern int nfs4_client_recover_expired_lease(struct nfs_client *clp); extern void nfs4_schedule_state_manager(struct nfs_client *); extern void nfs4_schedule_path_down_recovery(struct nfs_client *clp); -extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *); +extern int nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *); extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); extern void nfs41_handle_server_scope(struct nfs_client *, struct nfs41_server_scope **); @@ -412,6 +413,11 @@ static inline bool nfs4_stateid_match(const nfs4_stateid *dst, const nfs4_statei return memcmp(dst, src, sizeof(*dst)) == 0; } +static inline bool nfs4_valid_open_stateid(const struct nfs4_state *state) +{ + return test_bit(NFS_STATE_RECOVERY_FAILED, &state->flags) == 0; +} + #else #define nfs4_close_state(a, b) do { } while (0) diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 4fb234d3aefb..1ee5737211d7 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -158,11 +158,14 @@ static int filelayout_async_handle_error(struct rpc_task *task, case -NFS4ERR_OPENMODE: if (state == NULL) break; - nfs4_schedule_stateid_recovery(mds_server, state); + if (nfs4_schedule_stateid_recovery(mds_server, state) < 0) + goto out_bad_stateid; goto wait_on_recovery; case -NFS4ERR_EXPIRED: - if (state != NULL) - nfs4_schedule_stateid_recovery(mds_server, state); + if (state != NULL) { + if (nfs4_schedule_stateid_recovery(mds_server, state) < 0) + goto out_bad_stateid; + } nfs4_schedule_lease_recovery(mds_client); goto wait_on_recovery; /* DS session errors */ @@ -226,6 +229,9 @@ reset: out: task->tk_status = 0; return -EAGAIN; +out_bad_stateid: + task->tk_status = -EIO; + return 0; wait_on_recovery: rpc_sleep_on(&mds_client->cl_rpcwaitq, task, NULL); if (test_bit(NFS4CLNT_MANAGER_RUNNING, &mds_client->cl_state) == 0) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 26431cf62ddb..c3bbb6c53d61 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -295,7 +295,9 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc } if (state == NULL) break; - nfs4_schedule_stateid_recovery(server, state); + ret = nfs4_schedule_stateid_recovery(server, state); + if (ret < 0) + break; goto wait_on_recovery; case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: @@ -303,11 +305,16 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc if (state == NULL) break; nfs_remove_bad_delegation(state->inode); - nfs4_schedule_stateid_recovery(server, state); + ret = nfs4_schedule_stateid_recovery(server, state); + if (ret < 0) + break; goto wait_on_recovery; case -NFS4ERR_EXPIRED: - if (state != NULL) - nfs4_schedule_stateid_recovery(server, state); + if (state != NULL) { + ret = nfs4_schedule_stateid_recovery(server, state); + if (ret < 0) + break; + } case -NFS4ERR_STALE_STATEID: case -NFS4ERR_STALE_CLIENTID: nfs4_schedule_lease_recovery(clp); @@ -2053,7 +2060,7 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, nfs_fattr_init(fattr); - if (state != NULL) { + if (state != NULL && nfs4_valid_open_stateid(state)) { struct nfs_lockowner lockowner = { .l_owner = current->files, .l_pid = current->tgid, @@ -2201,6 +2208,8 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) calldata->arg.fmode &= ~FMODE_WRITE; } } + if (!nfs4_valid_open_stateid(state)) + call_close = 0; spin_unlock(&state->owner->so_lock); if (!call_close) { @@ -3980,11 +3989,14 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, case -NFS4ERR_OPENMODE: if (state == NULL) break; - nfs4_schedule_stateid_recovery(server, state); + if (nfs4_schedule_stateid_recovery(server, state) < 0) + goto stateid_invalid; goto wait_on_recovery; case -NFS4ERR_EXPIRED: - if (state != NULL) - nfs4_schedule_stateid_recovery(server, state); + if (state != NULL) { + if (nfs4_schedule_stateid_recovery(server, state) < 0) + goto stateid_invalid; + } case -NFS4ERR_STALE_STATEID: case -NFS4ERR_STALE_CLIENTID: nfs4_schedule_lease_recovery(clp); @@ -4016,6 +4028,9 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, } task->tk_status = nfs4_map_errors(task->tk_status); return 0; +stateid_invalid: + task->tk_status = -EIO; + return 0; wait_on_recovery: rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL); if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0) @@ -4632,12 +4647,18 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata) data->res.open_seqid = data->arg.open_seqid; } else data->arg.new_lock_owner = 0; + if (!nfs4_valid_open_stateid(state)) { + data->rpc_status = -EBADF; + task->tk_action = NULL; + goto out_release_open_seqid; + } data->timestamp = jiffies; if (nfs4_setup_sequence(data->server, &data->arg.seq_args, &data->res.seq_res, task) == 0) return; +out_release_open_seqid: nfs_release_seqid(data->arg.open_seqid); out_release_lock_seqid: nfs_release_seqid(data->arg.lock_seqid); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 6ace365c6334..fec1c5bb4863 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -699,6 +699,8 @@ __nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner) list_for_each_entry(state, &nfsi->open_states, inode_states) { if (state->owner != owner) continue; + if (!nfs4_valid_open_stateid(state)) + continue; if (atomic_inc_not_zero(&state->count)) return state; } @@ -1286,14 +1288,17 @@ static int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_s return 1; } -void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4_state *state) +int nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4_state *state) { struct nfs_client *clp = server->nfs_client; + if (!nfs4_valid_open_stateid(state)) + return -EBADF; nfs4_state_mark_reclaim_nograce(clp, state); dprintk("%s: scheduling stateid recovery for server %s\n", __func__, clp->cl_hostname); nfs4_schedule_state_manager(clp); + return 0; } EXPORT_SYMBOL_GPL(nfs4_schedule_stateid_recovery); @@ -1323,6 +1328,11 @@ void nfs_inode_find_state_and_recover(struct inode *inode, nfs4_schedule_state_manager(clp); } +static void nfs4_state_mark_recovery_failed(struct nfs4_state *state, int error) +{ + set_bit(NFS_STATE_RECOVERY_FAILED, &state->flags); +} + static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops) { @@ -1398,6 +1408,8 @@ restart: list_for_each_entry(state, &sp->so_states, open_states) { if (!test_and_clear_bit(ops->state_flag_bit, &state->flags)) continue; + if (!nfs4_valid_open_stateid(state)) + continue; if (state->state == 0) continue; atomic_inc(&state->count); @@ -1430,10 +1442,7 @@ restart: * Open state on this file cannot be recovered * All we can do is revert to using the zero stateid. */ - memset(&state->stateid, 0, - sizeof(state->stateid)); - /* Mark the file as being 'closed' */ - state->state = 0; + nfs4_state_mark_recovery_failed(state, status); break; case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_STALE_STATEID: diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 4bdffe0ba025..c5bd758e5637 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -718,6 +718,8 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, spin_lock(&lo->plh_inode->i_lock); if (pnfs_layoutgets_blocked(lo, 1)) { status = -EAGAIN; + } else if (!nfs4_valid_open_stateid(open_state)) { + status = -EBADF; } else if (list_empty(&lo->plh_segs)) { int seq; From c58c844187df61ef7cc103d0abb5dd6198bcfcd6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 18 Mar 2013 19:45:14 -0400 Subject: [PATCH 03/67] NFS: Don't accept more reads/writes if the open context recovery failed If the state recovery failed, we want to ensure that the application doesn't try to use the same file descriptor for more reads or writes. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayout.c | 8 ++++++++ fs/nfs/nfs4state.c | 16 ++++++++++++++++ fs/nfs/pagelist.c | 2 ++ fs/nfs/read.c | 2 ++ fs/nfs/write.c | 2 ++ include/linux/nfs_fs.h | 1 + 6 files changed, 31 insertions(+) diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 1ee5737211d7..4ba32e23eddd 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -305,6 +305,10 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data) { struct nfs_read_data *rdata = data; + if (unlikely(test_bit(NFS_CONTEXT_BAD, &rdata->args.context->flags))) { + rpc_exit(task, -EIO); + return; + } if (filelayout_reset_to_mds(rdata->header->lseg)) { dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid); filelayout_reset_read(rdata); @@ -407,6 +411,10 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data) { struct nfs_write_data *wdata = data; + if (unlikely(test_bit(NFS_CONTEXT_BAD, &wdata->args.context->flags))) { + rpc_exit(task, -EIO); + return; + } if (filelayout_reset_to_mds(wdata->header->lseg)) { dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid); filelayout_reset_write(wdata); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index fec1c5bb4863..8db102c7add6 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1328,9 +1328,25 @@ void nfs_inode_find_state_and_recover(struct inode *inode, nfs4_schedule_state_manager(clp); } +static void nfs4_state_mark_open_context_bad(struct nfs4_state *state) +{ + struct inode *inode = state->inode; + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_open_context *ctx; + + spin_lock(&inode->i_lock); + list_for_each_entry(ctx, &nfsi->open_files, list) { + if (ctx->state != state) + continue; + set_bit(NFS_CONTEXT_BAD, &ctx->flags); + } + spin_unlock(&inode->i_lock); +} + static void nfs4_state_mark_recovery_failed(struct nfs4_state *state, int error) { set_bit(NFS_STATE_RECOVERY_FAILED, &state->flags); + nfs4_state_mark_open_context_bad(state); } diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index e56e846e9d2d..7f0933086b36 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -104,6 +104,8 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, struct nfs_page *req; struct nfs_lock_context *l_ctx; + if (test_bit(NFS_CONTEXT_BAD, &ctx->flags)) + return ERR_PTR(-EBADF); /* try to allocate the request struct */ req = nfs_page_alloc(); if (req == NULL) diff --git a/fs/nfs/read.c b/fs/nfs/read.c index a5e5d9899d56..70a26c651f09 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -514,6 +514,8 @@ void nfs_read_prepare(struct rpc_task *task, void *calldata) { struct nfs_read_data *data = calldata; NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data); + if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags))) + rpc_exit(task, -EIO); } static const struct rpc_call_ops nfs_read_common_ops = { diff --git a/fs/nfs/write.c b/fs/nfs/write.c index c483cc50b82e..a2c7c28049d5 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1251,6 +1251,8 @@ void nfs_write_prepare(struct rpc_task *task, void *calldata) { struct nfs_write_data *data = calldata; NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data); + if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags))) + rpc_exit(task, -EIO); } void nfs_commit_prepare(struct rpc_task *task, void *calldata) diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 1cc25682b20b..f6b1956f3c86 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -77,6 +77,7 @@ struct nfs_open_context { unsigned long flags; #define NFS_CONTEXT_ERROR_WRITE (0) #define NFS_CONTEXT_RESEND_WRITES (1) +#define NFS_CONTEXT_BAD (2) int error; struct list_head list; From 8c86899f62738b8a22ca3a5f060e269b92e5545a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 15 Mar 2013 18:11:31 -0400 Subject: [PATCH 04/67] NFS: __nfs_find_lock_context needs to check ctx->lock_context for a match too Currently, we're forcing an unnecessary duplication of the initial nfs_lock_context in calls to nfs_get_lock_context, since __nfs_find_lock_context ignores the ctx->lock_context. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 1f941674b089..55b840f05ab2 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -565,16 +565,17 @@ static void nfs_init_lock_context(struct nfs_lock_context *l_ctx) static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context *ctx) { - struct nfs_lock_context *pos; + struct nfs_lock_context *head = &ctx->lock_context; + struct nfs_lock_context *pos = head; - list_for_each_entry(pos, &ctx->lock_context.list, list) { + do { if (pos->lockowner.l_owner != current->files) continue; if (pos->lockowner.l_pid != current->tgid) continue; atomic_inc(&pos->count); return pos; - } + } while ((pos = list_entry(pos->list.next, typeof(*pos), list)) != head); return NULL; } From 9b20614988199fb03580b335a28250922e902098 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 17 Mar 2013 15:52:00 -0400 Subject: [PATCH 05/67] NFSv4: The stateid must remain the same for replayed RPC calls If we replay a READ or WRITE call, we should not be changing the stateid. Currently, we may end up doing so, because the stateid is only selected at xdr encode time. This patch ensures that we select the stateid after we get an NFSv4.1 session slot, and that we keep that same stateid across retries. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 4 ++++ fs/nfs/nfs4filelayout.c | 14 ++++++++++---- fs/nfs/nfs4proc.c | 27 +++++++++++++++++++++++---- fs/nfs/nfs4xdr.c | 28 ++-------------------------- include/linux/nfs_xdr.h | 2 ++ 5 files changed, 41 insertions(+), 34 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 9ce90135bf22..8309e98c44f9 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -234,6 +234,10 @@ extern struct rpc_clnt *nfs4_proc_lookup_mountpoint(struct inode *, struct qstr extern int nfs4_proc_secinfo(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *); extern int nfs4_release_lockowner(struct nfs4_lock_state *); extern const struct xattr_handler *nfs4_xattr_handlers[]; +extern void nfs4_set_rw_stateid(nfs4_stateid *stateid, + const struct nfs_open_context *ctx, + const struct nfs_lock_context *l_ctx, + fmode_t fmode); #if defined(CONFIG_NFS_V4_1) static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server) diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 4ba32e23eddd..22d10623f5ee 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -317,10 +317,13 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data) } rdata->read_done_cb = filelayout_read_done_cb; - nfs41_setup_sequence(rdata->ds_clp->cl_session, + if (nfs41_setup_sequence(rdata->ds_clp->cl_session, &rdata->args.seq_args, &rdata->res.seq_res, - task); + task)) + return; + nfs4_set_rw_stateid(&rdata->args.stateid, rdata->args.context, + rdata->args.lock_context, FMODE_READ); } static void filelayout_read_call_done(struct rpc_task *task, void *data) @@ -421,10 +424,13 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data) rpc_exit(task, 0); return; } - nfs41_setup_sequence(wdata->ds_clp->cl_session, + if (nfs41_setup_sequence(wdata->ds_clp->cl_session, &wdata->args.seq_args, &wdata->res.seq_res, - task); + task)) + return; + nfs4_set_rw_stateid(&wdata->args.stateid, wdata->args.context, + wdata->args.lock_context, FMODE_WRITE); } static void filelayout_write_call_done(struct rpc_task *task, void *data) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c3bbb6c53d61..26176ce3d96a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3454,6 +3454,19 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, return err; } +void nfs4_set_rw_stateid(nfs4_stateid *stateid, + const struct nfs_open_context *ctx, + const struct nfs_lock_context *l_ctx, + fmode_t fmode) +{ + const struct nfs_lockowner *lockowner = NULL; + + if (l_ctx != NULL) + lockowner = &l_ctx->lockowner; + nfs4_select_rw_stateid(stateid, ctx->state, fmode, lockowner); +} +EXPORT_SYMBOL_GPL(nfs4_set_rw_stateid); + void __nfs4_read_done_cb(struct nfs_read_data *data) { nfs_invalidate_atime(data->header->inode); @@ -3496,10 +3509,13 @@ static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) { - nfs4_setup_sequence(NFS_SERVER(data->header->inode), + if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), &data->args.seq_args, &data->res.seq_res, - task); + task)) + return; + nfs4_set_rw_stateid(&data->args.stateid, data->args.context, + data->args.lock_context, FMODE_READ); } static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data) @@ -3560,10 +3576,13 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) { - nfs4_setup_sequence(NFS_SERVER(data->header->inode), + if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), &data->args.seq_args, &data->res.seq_res, - task); + task)) + return; + nfs4_set_rw_stateid(&data->args.stateid, data->args.context, + data->args.lock_context, FMODE_WRITE); } static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index e3edda554ac7..9d328777b4c1 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1506,35 +1506,12 @@ static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr) encode_op_hdr(xdr, OP_PUTROOTFH, decode_putrootfh_maxsz, hdr); } -static void encode_open_stateid(struct xdr_stream *xdr, - const struct nfs_open_context *ctx, - const struct nfs_lock_context *l_ctx, - fmode_t fmode, - int zero_seqid) -{ - nfs4_stateid stateid; - - if (ctx->state != NULL) { - const struct nfs_lockowner *lockowner = NULL; - - if (l_ctx != NULL) - lockowner = &l_ctx->lockowner; - nfs4_select_rw_stateid(&stateid, ctx->state, - fmode, lockowner); - if (zero_seqid) - stateid.seqid = 0; - encode_nfs4_stateid(xdr, &stateid); - } else - encode_nfs4_stateid(xdr, &zero_stateid); -} - static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr) { __be32 *p; encode_op_hdr(xdr, OP_READ, decode_read_maxsz, hdr); - encode_open_stateid(xdr, args->context, args->lock_context, - FMODE_READ, hdr->minorversion); + encode_nfs4_stateid(xdr, &args->stateid); p = reserve_space(xdr, 12); p = xdr_encode_hyper(p, args->offset); @@ -1670,8 +1647,7 @@ static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *arg __be32 *p; encode_op_hdr(xdr, OP_WRITE, decode_write_maxsz, hdr); - encode_open_stateid(xdr, args->context, args->lock_context, - FMODE_WRITE, hdr->minorversion); + encode_nfs4_stateid(xdr, &args->stateid); p = reserve_space(xdr, 16); p = xdr_encode_hyper(p, args->offset); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 4b993d358dad..90a4aa190b43 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -486,6 +486,7 @@ struct nfs_readargs { struct nfs_fh * fh; struct nfs_open_context *context; struct nfs_lock_context *lock_context; + nfs4_stateid stateid; __u64 offset; __u32 count; unsigned int pgbase; @@ -507,6 +508,7 @@ struct nfs_writeargs { struct nfs_fh * fh; struct nfs_open_context *context; struct nfs_lock_context *lock_context; + nfs4_stateid stateid; __u64 offset; __u32 count; enum nfs3_stable_how stable; From 5521abfdcf4d67c3441d4414f29e1acd7cc43380 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 16 Mar 2013 20:54:34 -0400 Subject: [PATCH 06/67] NFSv4: Resend the READ/WRITE RPC call if a stateid change causes an error Adds logic to ensure that if the server returns a BAD_STATEID, or other state related error, then we check if the stateid has already changed. If it has, then rather than start state recovery, we should just resend the failed RPC call with the new stateid. Allow nfs4_select_rw_stateid to notify that the stateid is unstable by having it return -EWOULDBLOCK if an RPC is underway that might change the stateid. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 4 +-- fs/nfs/nfs4proc.c | 64 +++++++++++++++++++++++++++++++++++++++++++--- fs/nfs/nfs4state.c | 32 ++++++++++++++++------- 3 files changed, 86 insertions(+), 14 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 8309e98c44f9..627a74f0e248 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -234,7 +234,7 @@ extern struct rpc_clnt *nfs4_proc_lookup_mountpoint(struct inode *, struct qstr extern int nfs4_proc_secinfo(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *); extern int nfs4_release_lockowner(struct nfs4_lock_state *); extern const struct xattr_handler *nfs4_xattr_handlers[]; -extern void nfs4_set_rw_stateid(nfs4_stateid *stateid, +extern int nfs4_set_rw_stateid(nfs4_stateid *stateid, const struct nfs_open_context *ctx, const struct nfs_lock_context *l_ctx, fmode_t fmode); @@ -358,7 +358,7 @@ extern void nfs41_handle_server_scope(struct nfs_client *, struct nfs41_server_scope **); extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); -extern void nfs4_select_rw_stateid(nfs4_stateid *, struct nfs4_state *, +extern int nfs4_select_rw_stateid(nfs4_stateid *, struct nfs4_state *, fmode_t, const struct nfs_lockowner *); extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 26176ce3d96a..6ad06121d88c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3454,7 +3454,7 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, return err; } -void nfs4_set_rw_stateid(nfs4_stateid *stateid, +int nfs4_set_rw_stateid(nfs4_stateid *stateid, const struct nfs_open_context *ctx, const struct nfs_lock_context *l_ctx, fmode_t fmode) @@ -3463,10 +3463,37 @@ void nfs4_set_rw_stateid(nfs4_stateid *stateid, if (l_ctx != NULL) lockowner = &l_ctx->lockowner; - nfs4_select_rw_stateid(stateid, ctx->state, fmode, lockowner); + return nfs4_select_rw_stateid(stateid, ctx->state, fmode, lockowner); } EXPORT_SYMBOL_GPL(nfs4_set_rw_stateid); +static bool nfs4_stateid_is_current(nfs4_stateid *stateid, + const struct nfs_open_context *ctx, + const struct nfs_lock_context *l_ctx, + fmode_t fmode) +{ + nfs4_stateid current_stateid; + + if (nfs4_set_rw_stateid(¤t_stateid, ctx, l_ctx, fmode)) + return false; + return nfs4_stateid_match(stateid, ¤t_stateid); +} + +static bool nfs4_error_stateid_expired(int err) +{ + switch (err) { + case -NFS4ERR_DELEG_REVOKED: + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_OLD_STATEID: + case -NFS4ERR_OPENMODE: + case -NFS4ERR_EXPIRED: + return true; + } + return false; +} + void __nfs4_read_done_cb(struct nfs_read_data *data) { nfs_invalidate_atime(data->header->inode); @@ -3487,6 +3514,20 @@ static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data) return 0; } +static bool nfs4_read_stateid_changed(struct rpc_task *task, + struct nfs_readargs *args) +{ + + if (!nfs4_error_stateid_expired(task->tk_status) || + nfs4_stateid_is_current(&args->stateid, + args->context, + args->lock_context, + FMODE_READ)) + return false; + rpc_restart_call_prepare(task); + return true; +} + static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) { @@ -3494,7 +3535,8 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) if (!nfs4_sequence_done(task, &data->res.seq_res)) return -EAGAIN; - + if (nfs4_read_stateid_changed(task, &data->args)) + return -EAGAIN; return data->read_done_cb ? data->read_done_cb(task, data) : nfs4_read_done_cb(task, data); } @@ -3533,10 +3575,26 @@ static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data return 0; } +static bool nfs4_write_stateid_changed(struct rpc_task *task, + struct nfs_writeargs *args) +{ + + if (!nfs4_error_stateid_expired(task->tk_status) || + nfs4_stateid_is_current(&args->stateid, + args->context, + args->lock_context, + FMODE_WRITE)) + return false; + rpc_restart_call_prepare(task); + return true; +} + static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) { if (!nfs4_sequence_done(task, &data->res.seq_res)) return -EAGAIN; + if (nfs4_write_stateid_changed(task, &data->args)) + return -EAGAIN; return data->write_done_cb ? data->write_done_cb(task, data) : nfs4_write_done_cb(task, data); } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 8db102c7add6..4e95bd72f480 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -989,13 +989,14 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl) return 0; } -static bool nfs4_copy_lock_stateid(nfs4_stateid *dst, struct nfs4_state *state, +static int nfs4_copy_lock_stateid(nfs4_stateid *dst, + struct nfs4_state *state, const struct nfs_lockowner *lockowner) { struct nfs4_lock_state *lsp; fl_owner_t fl_owner; pid_t fl_pid; - bool ret = false; + int ret = -ENOENT; if (lockowner == NULL) @@ -1010,7 +1011,10 @@ static bool nfs4_copy_lock_stateid(nfs4_stateid *dst, struct nfs4_state *state, lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE); if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) { nfs4_stateid_copy(dst, &lsp->ls_stateid); - ret = true; + ret = 0; + smp_rmb(); + if (!list_empty(&lsp->ls_seqid.list)) + ret = -EWOULDBLOCK; } spin_unlock(&state->state_lock); nfs4_put_lock_state(lsp); @@ -1018,28 +1022,38 @@ out: return ret; } -static void nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) +static int nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) { + int ret; int seq; do { seq = read_seqbegin(&state->seqlock); nfs4_stateid_copy(dst, &state->stateid); + ret = 0; + smp_rmb(); + if (!list_empty(&state->owner->so_seqid.list)) + ret = -EWOULDBLOCK; } while (read_seqretry(&state->seqlock, seq)); + return ret; } /* * Byte-range lock aware utility to initialize the stateid of read/write * requests. */ -void nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state, +int nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state, fmode_t fmode, const struct nfs_lockowner *lockowner) { + int ret = 0; if (nfs4_copy_delegation_stateid(dst, state->inode, fmode)) - return; - if (nfs4_copy_lock_stateid(dst, state, lockowner)) - return; - nfs4_copy_open_stateid(dst, state); + goto out; + ret = nfs4_copy_lock_stateid(dst, state, lockowner); + if (ret != -ENOENT) + goto out; + ret = nfs4_copy_open_stateid(dst, state); +out: + return ret; } struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask) From 39c6daae70fe7692fc2d28738c7ebd63f0318741 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 15 Mar 2013 16:11:57 -0400 Subject: [PATCH 07/67] NFSv4: Prepare for minorversion-specific nfs_server capabilities Clean up the setting of the nfs_server->caps, by shoving it all into nfs4_server_common_setup(). Then add an 'initial capabilities' field into struct nfs4_minor_version_ops. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 1 + fs/nfs/nfs4client.c | 24 +++++++++++++----------- fs/nfs/nfs4proc.c | 8 ++++++++ 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 627a74f0e248..7ef19ce6d832 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -36,6 +36,7 @@ enum nfs4_client_state { struct nfs4_minor_version_ops { u32 minor_version; + unsigned init_caps; int (*call_sync)(struct rpc_clnt *clnt, struct nfs_server *server, diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index ac4fc9a8fdbc..17b34b2da2df 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -717,6 +717,19 @@ static int nfs4_server_common_setup(struct nfs_server *server, if (error < 0) goto out; + /* Set the basic capabilities */ + server->caps |= server->nfs_client->cl_mvops->init_caps; + if (server->flags & NFS_MOUNT_NORDIRPLUS) + server->caps &= ~NFS_CAP_READDIRPLUS; + /* + * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower + * authentication. + */ + if (nfs4_disable_idmapping && + server->client->cl_auth->au_flavor == RPC_AUTH_UNIX) + server->caps |= NFS_CAP_UIDGID_NOMAP; + + /* Probe the root fh to retrieve its FSID and filehandle */ error = nfs4_get_rootfh(server, mntfh); if (error < 0) @@ -760,9 +773,6 @@ static int nfs4_init_server(struct nfs_server *server, /* Initialise the client representation from the mount data */ server->flags = data->flags; - server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR|NFS_CAP_POSIX_LOCK; - if (!(data->flags & NFS_MOUNT_NORDIRPLUS)) - server->caps |= NFS_CAP_READDIRPLUS; server->options = data->options; /* Get a client record */ @@ -779,13 +789,6 @@ static int nfs4_init_server(struct nfs_server *server, if (error < 0) goto error; - /* - * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower - * authentication. - */ - if (nfs4_disable_idmapping && data->auth_flavors[0] == RPC_AUTH_UNIX) - server->caps |= NFS_CAP_UIDGID_NOMAP; - if (data->rsize) server->rsize = nfs_block_size(data->rsize, NULL); if (data->wsize) @@ -863,7 +866,6 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, /* Initialise the client representation from the parent server */ nfs_server_copy_userdata(server, parent_server); - server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR; /* Get a client representation. * Note: NFSv4 always uses TCP, */ diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6ad06121d88c..22c80f9ed824 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6823,6 +6823,10 @@ static const struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = { static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { .minor_version = 0, + .init_caps = NFS_CAP_READDIRPLUS + | NFS_CAP_ATOMIC_OPEN + | NFS_CAP_CHANGE_ATTR + | NFS_CAP_POSIX_LOCK, .call_sync = _nfs4_call_sync, .match_stateid = nfs4_match_stateid, .find_root_sec = nfs4_find_root_sec, @@ -6834,6 +6838,10 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { #if defined(CONFIG_NFS_V4_1) static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { .minor_version = 1, + .init_caps = NFS_CAP_READDIRPLUS + | NFS_CAP_ATOMIC_OPEN + | NFS_CAP_CHANGE_ATTR + | NFS_CAP_POSIX_LOCK, .call_sync = nfs4_call_sync_sequence, .match_stateid = nfs41_match_stateid, .find_root_sec = nfs41_find_root_sec, From 3b66486c4c7136f8d4bbe1306d581fadc6bce4c7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 17 Mar 2013 15:31:15 -0400 Subject: [PATCH 08/67] NFSv4.1: Select the "most recent locking state" for read/write/setattr stateids Follow the practice described in section 8.2.2 of RFC5661: When sending a read/write or setattr stateid, set the seqid field to zero in order to signal that the NFS server should apply the most recent locking state. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 3 ++- fs/nfs/nfs4state.c | 2 ++ include/linux/nfs_fs_sb.h | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 22c80f9ed824..625a729ebcca 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6841,7 +6841,8 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { .init_caps = NFS_CAP_READDIRPLUS | NFS_CAP_ATOMIC_OPEN | NFS_CAP_CHANGE_ATTR - | NFS_CAP_POSIX_LOCK, + | NFS_CAP_POSIX_LOCK + | NFS_CAP_STATEID_NFSV41, .call_sync = nfs4_call_sync_sequence, .match_stateid = nfs41_match_stateid, .find_root_sec = nfs41_find_root_sec, diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 4e95bd72f480..685b1e953ed8 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1053,6 +1053,8 @@ int nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state, goto out; ret = nfs4_copy_open_stateid(dst, state); out: + if (nfs_server_capable(state->inode, NFS_CAP_STATEID_NFSV41)) + dst->seqid = 0; return ret; } diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 6c6ed153a9b4..74c9e52c9338 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -197,5 +197,6 @@ struct nfs_server { #define NFS_CAP_MTIME (1U << 13) #define NFS_CAP_POSIX_LOCK (1U << 14) #define NFS_CAP_UIDGID_NOMAP (1U << 15) +#define NFS_CAP_STATEID_NFSV41 (1U << 16) #endif From 4a1c089345a0f00ad0229bf895bf0e94d101f059 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 15 Mar 2013 14:57:33 -0400 Subject: [PATCH 09/67] NFSv4: Clean up nfs4_opendata_alloc in preparation for NFSv4.1 open modes Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 38 +++++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 625a729ebcca..7bbb06f280fc 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -782,6 +782,7 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p) static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, struct nfs4_state_owner *sp, fmode_t fmode, int flags, const struct iattr *attrs, + enum open_claim_type4 claim, gfp_t gfp_mask) { struct dentry *parent = dget_parent(dentry); @@ -800,7 +801,6 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, p->dir = parent; p->owner = sp; atomic_inc(&sp->so_count); - p->o_arg.fh = NFS_FH(dir); p->o_arg.open_flags = flags; p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE); /* don't put an ACCESS op in OPEN compound if O_EXCL, because ACCESS @@ -818,7 +818,19 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, p->o_arg.server = server; p->o_arg.bitmask = server->attr_bitmask; p->o_arg.open_bitmap = &nfs4_fattr_bitmap[0]; - p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; + p->o_arg.claim = claim; + switch (claim) { + case NFS4_OPEN_CLAIM_NULL: + case NFS4_OPEN_CLAIM_DELEGATE_CUR: + case NFS4_OPEN_CLAIM_DELEGATE_PREV: + p->o_arg.fh = NFS_FH(dir); + break; + case NFS4_OPEN_CLAIM_PREVIOUS: + case NFS4_OPEN_CLAIM_FH: + case NFS4_OPEN_CLAIM_DELEG_CUR_FH: + case NFS4_OPEN_CLAIM_DELEG_PREV_FH: + p->o_arg.fh = NFS_FH(dentry->d_inode); + } if (attrs != NULL && attrs->ia_valid != 0) { __be32 verf[2]; @@ -1200,11 +1212,13 @@ static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state * return ERR_PTR(-ENOENT); } -static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context *ctx, struct nfs4_state *state) +static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context *ctx, + struct nfs4_state *state, enum open_claim_type4 claim) { struct nfs4_opendata *opendata; - opendata = nfs4_opendata_alloc(ctx->dentry, state->owner, 0, 0, NULL, GFP_NOFS); + opendata = nfs4_opendata_alloc(ctx->dentry, state->owner, 0, 0, + NULL, claim, GFP_NOFS); if (opendata == NULL) return ERR_PTR(-ENOMEM); opendata->state = state; @@ -1290,11 +1304,10 @@ static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state fmode_t delegation_type = 0; int status; - opendata = nfs4_open_recoverdata_alloc(ctx, state); + opendata = nfs4_open_recoverdata_alloc(ctx, state, + NFS4_OPEN_CLAIM_PREVIOUS); if (IS_ERR(opendata)) return PTR_ERR(opendata); - opendata->o_arg.claim = NFS4_OPEN_CLAIM_PREVIOUS; - opendata->o_arg.fh = NFS_FH(state->inode); rcu_read_lock(); delegation = rcu_dereference(NFS_I(state->inode)->delegation); if (delegation != NULL && test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) != 0) @@ -1338,10 +1351,10 @@ static int _nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs struct nfs4_opendata *opendata; int ret; - opendata = nfs4_open_recoverdata_alloc(ctx, state); + opendata = nfs4_open_recoverdata_alloc(ctx, state, + NFS4_OPEN_CLAIM_DELEGATE_CUR); if (IS_ERR(opendata)) return PTR_ERR(opendata); - opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR; nfs4_stateid_copy(&opendata->o_arg.u.delegation, stateid); ret = nfs4_open_recover(opendata, state); nfs4_opendata_put(opendata); @@ -1727,7 +1740,8 @@ static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *s struct nfs4_opendata *opendata; int ret; - opendata = nfs4_open_recoverdata_alloc(ctx, state); + opendata = nfs4_open_recoverdata_alloc(ctx, state, + NFS4_OPEN_CLAIM_NULL); if (IS_ERR(opendata)) return PTR_ERR(opendata); ret = nfs4_open_recover(opendata, state); @@ -1927,7 +1941,9 @@ static int _nfs4_do_open(struct inode *dir, if (dentry->d_inode != NULL) nfs4_return_incompatible_delegation(dentry->d_inode, fmode); status = -ENOMEM; - opendata = nfs4_opendata_alloc(dentry, sp, fmode, flags, sattr, GFP_KERNEL); + opendata = nfs4_opendata_alloc(dentry, sp, fmode, flags, sattr, + NFS4_OPEN_CLAIM_NULL, + GFP_KERNEL); if (opendata == NULL) goto err_put_state_owner; From d9fc6619ca98943d56d2931ce9e4caa4039fd0f1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 15 Mar 2013 15:39:06 -0400 Subject: [PATCH 10/67] NFSv4.1: Add xdr support for CLAIM_FH and CLAIM_DELEG_CUR_FH opens Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 9d328777b4c1..650fa322b903 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1459,6 +1459,23 @@ static inline void encode_claim_delegate_cur(struct xdr_stream *xdr, const struc encode_string(xdr, name->len, name->name); } +static inline void encode_claim_fh(struct xdr_stream *xdr) +{ + __be32 *p; + + p = reserve_space(xdr, 4); + *p = cpu_to_be32(NFS4_OPEN_CLAIM_FH); +} + +static inline void encode_claim_delegate_cur_fh(struct xdr_stream *xdr, const nfs4_stateid *stateid) +{ + __be32 *p; + + p = reserve_space(xdr, 4); + *p = cpu_to_be32(NFS4_OPEN_CLAIM_DELEG_CUR_FH); + encode_nfs4_stateid(xdr, stateid); +} + static void encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg, struct compound_hdr *hdr) { encode_op_hdr(xdr, OP_OPEN, decode_open_maxsz, hdr); @@ -1474,6 +1491,12 @@ static void encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg, case NFS4_OPEN_CLAIM_DELEGATE_CUR: encode_claim_delegate_cur(xdr, arg->name, &arg->u.delegation); break; + case NFS4_OPEN_CLAIM_FH: + encode_claim_fh(xdr); + break; + case NFS4_OPEN_CLAIM_DELEG_CUR_FH: + encode_claim_delegate_cur_fh(xdr, &arg->u.delegation); + break; default: BUG(); } From 49f9a0fafd844c32f2abada047c0b9a5ba0d6255 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 15 Mar 2013 16:44:28 -0400 Subject: [PATCH 11/67] NFSv4.1: Enable open-by-filehandle Sometimes, we actually _want_ to do open-by-filehandle, for instance when recovering opens after a network partition, or when called from nfs4_file_open. Enable that functionality using a new capability NFS_CAP_ATOMIC_OPEN_V1, and which is only enabled for NFSv4.1 servers that support it. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 2 ++ fs/nfs/nfs4proc.c | 53 +++++++++++++++++++++++++++++++++------ include/linux/nfs_fs_sb.h | 1 + 3 files changed, 49 insertions(+), 7 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index f23f455be42b..e093e73178b7 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1486,6 +1486,8 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags) goto no_open; if (d_mountpoint(dentry)) goto no_open; + if (NFS_SB(dentry->d_sb)->caps & NFS_CAP_ATOMIC_OPEN_V1) + goto no_open; inode = dentry->d_inode; parent = dget_parent(dentry); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 7bbb06f280fc..732b76f703d6 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -767,6 +767,35 @@ struct nfs4_opendata { int cancelled; }; +static bool nfs4_clear_cap_atomic_open_v1(struct nfs_server *server, + int err, struct nfs4_exception *exception) +{ + if (err != -EINVAL) + return false; + if (!(server->caps & NFS_CAP_ATOMIC_OPEN_V1)) + return false; + server->caps &= ~NFS_CAP_ATOMIC_OPEN_V1; + exception->retry = 1; + return true; +} + +static enum open_claim_type4 +nfs4_map_atomic_open_claim(struct nfs_server *server, + enum open_claim_type4 claim) +{ + if (server->caps & NFS_CAP_ATOMIC_OPEN_V1) + return claim; + switch (claim) { + default: + return claim; + case NFS4_OPEN_CLAIM_FH: + return NFS4_OPEN_CLAIM_NULL; + case NFS4_OPEN_CLAIM_DELEG_CUR_FH: + return NFS4_OPEN_CLAIM_DELEGATE_CUR; + case NFS4_OPEN_CLAIM_DELEG_PREV_FH: + return NFS4_OPEN_CLAIM_DELEGATE_PREV; + } +} static void nfs4_init_opendata_res(struct nfs4_opendata *p) { @@ -818,8 +847,8 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, p->o_arg.server = server; p->o_arg.bitmask = server->attr_bitmask; p->o_arg.open_bitmap = &nfs4_fattr_bitmap[0]; - p->o_arg.claim = claim; - switch (claim) { + p->o_arg.claim = nfs4_map_atomic_open_claim(server, claim); + switch (p->o_arg.claim) { case NFS4_OPEN_CLAIM_NULL: case NFS4_OPEN_CLAIM_DELEGATE_CUR: case NFS4_OPEN_CLAIM_DELEGATE_PREV: @@ -1326,6 +1355,8 @@ static int nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state int err; do { err = _nfs4_do_open_reclaim(ctx, state); + if (nfs4_clear_cap_atomic_open_v1(server, err, &exception)) + continue; if (err != -NFS4ERR_DELAY) break; nfs4_handle_exception(server, err, &exception); @@ -1741,7 +1772,7 @@ static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *s int ret; opendata = nfs4_open_recoverdata_alloc(ctx, state, - NFS4_OPEN_CLAIM_NULL); + NFS4_OPEN_CLAIM_FH); if (IS_ERR(opendata)) return PTR_ERR(opendata); ret = nfs4_open_recover(opendata, state); @@ -1759,6 +1790,8 @@ static int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4_state do { err = _nfs4_open_expired(ctx, state); + if (nfs4_clear_cap_atomic_open_v1(server, err, &exception)) + continue; switch (err) { default: goto out; @@ -1926,6 +1959,7 @@ static int _nfs4_do_open(struct inode *dir, struct nfs4_state *state = NULL; struct nfs_server *server = NFS_SERVER(dir); struct nfs4_opendata *opendata; + enum open_claim_type4 claim = NFS4_OPEN_CLAIM_NULL; int status; /* Protect against reboot recovery conflicts */ @@ -1941,9 +1975,10 @@ static int _nfs4_do_open(struct inode *dir, if (dentry->d_inode != NULL) nfs4_return_incompatible_delegation(dentry->d_inode, fmode); status = -ENOMEM; + if (dentry->d_inode) + claim = NFS4_OPEN_CLAIM_FH; opendata = nfs4_opendata_alloc(dentry, sp, fmode, flags, sattr, - NFS4_OPEN_CLAIM_NULL, - GFP_KERNEL); + claim, GFP_KERNEL); if (opendata == NULL) goto err_put_state_owner; @@ -2001,6 +2036,7 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct rpc_cred *cred, struct nfs4_threshold **ctx_th) { + struct nfs_server *server = NFS_SERVER(dir); struct nfs4_exception exception = { }; struct nfs4_state *res; int status; @@ -2044,7 +2080,9 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, exception.retry = 1; continue; } - res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir), + if (nfs4_clear_cap_atomic_open_v1(server, status, &exception)) + continue; + res = ERR_PTR(nfs4_handle_exception(server, status, &exception)); } while (exception.retry); return res; @@ -6858,7 +6896,8 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { | NFS_CAP_ATOMIC_OPEN | NFS_CAP_CHANGE_ATTR | NFS_CAP_POSIX_LOCK - | NFS_CAP_STATEID_NFSV41, + | NFS_CAP_STATEID_NFSV41 + | NFS_CAP_ATOMIC_OPEN_V1, .call_sync = nfs4_call_sync_sequence, .match_stateid = nfs41_match_stateid, .find_root_sec = nfs41_find_root_sec, diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 74c9e52c9338..d8fdfdc7a8fe 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -198,5 +198,6 @@ struct nfs_server { #define NFS_CAP_POSIX_LOCK (1U << 14) #define NFS_CAP_UIDGID_NOMAP (1U << 15) #define NFS_CAP_STATEID_NFSV41 (1U << 16) +#define NFS_CAP_ATOMIC_OPEN_V1 (1U << 17) #endif From ccb46e2063fe0816c3f28a8d328a9bbac7e88648 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 18 Mar 2013 12:50:59 -0400 Subject: [PATCH 12/67] NFSv4.1: Use CLAIM_DELEG_CUR_FH opens when available Now that we do CLAIM_FH opens, we may run into situations where we get a delegation but don't have perfect knowledge of the file path. When returning the delegation, we might therefore not be able to us CLAIM_DELEGATE_CUR opens to convert the delegation into OPEN stateids and locks. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 732b76f703d6..06e5a72b5fa2 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1383,7 +1383,7 @@ static int _nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs int ret; opendata = nfs4_open_recoverdata_alloc(ctx, state, - NFS4_OPEN_CLAIM_DELEGATE_CUR); + NFS4_OPEN_CLAIM_DELEG_CUR_FH); if (IS_ERR(opendata)) return PTR_ERR(opendata); nfs4_stateid_copy(&opendata->o_arg.u.delegation, stateid); From 6e3cf2415269894895074f139ef2e95ef14ddd5d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 23 Mar 2013 15:22:45 -0400 Subject: [PATCH 13/67] NFSv4: Add a mapping for NFS4ERR_FILE_OPEN in nfs4_map_errors With unlink is an asynchronous operation in the sillyrename case, it expects nfs4_async_handle_error() to map the error correctly. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 06e5a72b5fa2..3e7d42fb775c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -107,6 +107,8 @@ static int nfs4_map_errors(int err) return -EPROTONOSUPPORT; case -NFS4ERR_ACCESS: return -EACCES; + case -NFS4ERR_FILE_OPEN: + return -EBUSY; default: dprintk("%s could not handle NFSv4 error %d\n", __func__, -err); From 91876b13b8b3cbff5cca8476d4b4eebe5f6038cc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 28 Mar 2013 14:01:33 -0400 Subject: [PATCH 14/67] NFSv4: Fix another reboot recovery race If the open_context for the file is not yet fully initialised, then open recovery cannot succeed, and since nfs4_state_find_open_context returns an ENOENT, we end up treating the file as being irrecoverable. What we really want to do, is just defer the recovery until later. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 8 +++----- fs/nfs/nfs4state.c | 2 ++ 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3e7d42fb775c..81343944e096 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1373,7 +1373,7 @@ static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *sta ctx = nfs4_state_find_open_context(state); if (IS_ERR(ctx)) - return PTR_ERR(ctx); + return -EAGAIN; ret = nfs4_do_open_reclaim(ctx, state); put_nfs_open_context(ctx); return ret; @@ -1814,7 +1814,7 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta ctx = nfs4_state_find_open_context(state); if (IS_ERR(ctx)) - return PTR_ERR(ctx); + return -EAGAIN; ret = nfs4_do_open_expired(ctx, state); put_nfs_open_context(ctx); return ret; @@ -1936,10 +1936,8 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, if (ret != 0) goto out; - if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) { + if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) nfs4_schedule_stateid_recovery(server, state); - nfs4_wait_clnt_recover(server->nfs_client); - } *res = state; out: return ret; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 685b1e953ed8..b924bdd69494 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1476,6 +1476,8 @@ restart: */ nfs4_state_mark_recovery_failed(state, status); break; + case -EAGAIN: + ssleep(1); case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_BAD_STATEID: From 809b426c7fc46b07065eb382c0289710c34b37e4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 27 Mar 2013 11:54:45 -0400 Subject: [PATCH 15/67] NFSv4: Fix Oopses in the fs_locations code If the server sends us a pathname with more components than the client limit of NFS4_PATHNAME_MAXCOMPONENTS, more server entries than the client limit of NFS4_FS_LOCATION_MAXSERVERS, or sends a total number of fs_locations entries than the client limit of NFS4_FS_LOCATIONS_MAXENTRIES then we will currently Oops because the limit checks are done _after_ we've decoded the data into the arrays. Reported-by: fanchaoting Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 43 ++++++++++++++++++++----------------------- 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 650fa322b903..0b744895b9e1 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -3496,8 +3496,11 @@ static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path) if (n == 0) goto root_path; dprintk("pathname4: "); - path->ncomponents = 0; - while (path->ncomponents < n) { + if (n > NFS4_PATHNAME_MAXCOMPONENTS) { + dprintk("cannot parse %d components in path\n", n); + goto out_eio; + } + for (path->ncomponents = 0; path->ncomponents < n; path->ncomponents++) { struct nfs4_string *component = &path->components[path->ncomponents]; status = decode_opaque_inline(xdr, &component->len, &component->data); if (unlikely(status != 0)) @@ -3506,12 +3509,6 @@ static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path) pr_cont("%s%.*s ", (path->ncomponents != n ? "/ " : ""), component->len, component->data); - if (path->ncomponents < NFS4_PATHNAME_MAXCOMPONENTS) - path->ncomponents++; - else { - dprintk("cannot parse %d components in path\n", n); - goto out_eio; - } } out: return status; @@ -3556,27 +3553,23 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st n = be32_to_cpup(p); if (n <= 0) goto out_eio; - res->nlocations = 0; - while (res->nlocations < n) { + for (res->nlocations = 0; res->nlocations < n; res->nlocations++) { u32 m; - struct nfs4_fs_location *loc = &res->locations[res->nlocations]; + struct nfs4_fs_location *loc; + if (res->nlocations == NFS4_FS_LOCATIONS_MAXENTRIES) + break; + loc = &res->locations[res->nlocations]; p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) goto out_overflow; m = be32_to_cpup(p); - loc->nservers = 0; dprintk("%s: servers:\n", __func__); - while (loc->nservers < m) { - struct nfs4_string *server = &loc->servers[loc->nservers]; - status = decode_opaque_inline(xdr, &server->len, &server->data); - if (unlikely(status != 0)) - goto out_eio; - dprintk("%s ", server->data); - if (loc->nservers < NFS4_FS_LOCATION_MAXSERVERS) - loc->nservers++; - else { + for (loc->nservers = 0; loc->nservers < m; loc->nservers++) { + struct nfs4_string *server; + + if (loc->nservers == NFS4_FS_LOCATION_MAXSERVERS) { unsigned int i; dprintk("%s: using first %u of %u servers " "returned for location %u\n", @@ -3590,13 +3583,17 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st if (unlikely(status != 0)) goto out_eio; } + break; } + server = &loc->servers[loc->nservers]; + status = decode_opaque_inline(xdr, &server->len, &server->data); + if (unlikely(status != 0)) + goto out_eio; + dprintk("%s ", server->data); } status = decode_pathname(xdr, &loc->rootpath); if (unlikely(status != 0)) goto out_eio; - if (res->nlocations < NFS4_FS_LOCATIONS_MAXENTRIES) - res->nlocations++; } if (res->nlocations != 0) status = NFS_ATTR_FATTR_V4_LOCATIONS; From 71afa85e798ebc81054164c947b189d163c92b30 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 16 Mar 2013 15:54:16 -0400 Subject: [PATCH 16/67] SUNRPC: Missing module alias for auth_rpcgss.ko Commit f344f6df "SUNRPC: Auto-load RPC authentication kernel modules", Mon Mar 20 13:44:08 2006, adds a request_module() call in rpcauth_create() to auto-load RPC security modules when a ULP tries to create a credential of that flavor. In rpcauth_create(), the name of the module to load is built like this: request_module("rpc-auth-%u", flavor); This means that for, say, RPC_AUTH_GSS, request_module() is looking for a module or alias called "rpc-auth-6". The GSS module is named "auth_rpcgss", and commit f344f6df does not add any new module aliases. There is also no such alias provided in /etc/modprobe.d on my system (Fedora 16). Without this alias, the GSS module is not loaded on demand. This is used by rpcauth_create(). The pseudoflavor_to_flavor() call can return RPC_AUTH_GSS, which is passed to request_module(). Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/auth_gss.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 5257d2982ba5..282dfb14db05 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1733,6 +1733,7 @@ static void __exit exit_rpcsec_gss(void) rcu_barrier(); /* Wait for completion of call_rcu()'s */ } +MODULE_ALIAS("rpc-auth-6"); MODULE_LICENSE("GPL"); module_param_named(expired_cred_retry_delay, gss_expired_cred_retry_delay, From 72f4dc117b57e05120aaac6e218b8abc09a5c350 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 16 Mar 2013 15:54:25 -0400 Subject: [PATCH 17/67] NFS: Remove unneeded forward declaration I've built with NFSv4 enabled and disabled. This forward declaration does not seem to be required. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 90a4aa190b43..c1ca1f3f4935 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -14,9 +14,6 @@ #define NFS_DEF_FILE_IO_SIZE (4096U) #define NFS_MIN_FILE_IO_SIZE (1024U) -/* Forward declaration for NFS v3 */ -struct nfs4_secinfo_flavors; - struct nfs4_string { unsigned int len; char *data; From fb15b26f8ba3ff629a052faf3f4a4744585ca2dc Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 16 Mar 2013 15:54:34 -0400 Subject: [PATCH 18/67] SUNRPC: Define rpcsec_gss_info structure The NFSv4 SECINFO procedure returns a list of security flavors. Any GSS flavor also has a GSS tuple containing an OID, a quality-of- protection value, and a service value, which specifies a particular GSS pseudoflavor. For simplicity and efficiency, I'd like to return each GSS tuple from the NFSv4 SECINFO XDR decoder and pass it straight into the RPC client. Define a data structure that is visible to both the NFS client and the RPC client. Take structure and field names from the relevant standards to avoid confusion. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4namespace.c | 12 ++++++------ fs/nfs/nfs4xdr.c | 21 ++++++++++++--------- include/linux/nfs_xdr.h | 21 +++++---------------- include/linux/sunrpc/gss_api.h | 14 ++++++++++++-- net/sunrpc/auth_gss/gss_krb5_mech.c | 2 +- 5 files changed, 36 insertions(+), 34 deletions(-) diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index 0dd766079e1c..88231c92317c 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -138,23 +138,23 @@ rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors) { struct gss_api_mech *mech; struct xdr_netobj oid; - int i; + unsigned int i; rpc_authflavor_t pseudoflavor = RPC_AUTH_UNIX; for (i = 0; i < flavors->num_flavors; i++) { - struct nfs4_secinfo_flavor *flavor; - flavor = &flavors->flavors[i]; + struct nfs4_secinfo4 *flavor = &flavors->flavors[i]; if (flavor->flavor == RPC_AUTH_NULL || flavor->flavor == RPC_AUTH_UNIX) { pseudoflavor = flavor->flavor; break; } else if (flavor->flavor == RPC_AUTH_GSS) { - oid.len = flavor->gss.sec_oid4.len; - oid.data = flavor->gss.sec_oid4.data; + oid.len = flavor->flavor_info.oid.len; + oid.data = flavor->flavor_info.oid.data; mech = gss_mech_get_by_OID(&oid); if (!mech) continue; - pseudoflavor = gss_svc_to_pseudoflavor(mech, flavor->gss.service); + pseudoflavor = gss_svc_to_pseudoflavor(mech, + flavor->flavor_info.service); gss_mech_put(mech); break; } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 0b744895b9e1..a38fd179c34f 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -5205,27 +5205,30 @@ static int decode_delegreturn(struct xdr_stream *xdr) return decode_op_hdr(xdr, OP_DELEGRETURN); } -static int decode_secinfo_gss(struct xdr_stream *xdr, struct nfs4_secinfo_flavor *flavor) +static int decode_secinfo_gss(struct xdr_stream *xdr, + struct nfs4_secinfo4 *flavor) { + u32 oid_len; __be32 *p; p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) goto out_overflow; - flavor->gss.sec_oid4.len = be32_to_cpup(p); - if (flavor->gss.sec_oid4.len > GSS_OID_MAX_LEN) + oid_len = be32_to_cpup(p); + if (oid_len > GSS_OID_MAX_LEN) goto out_err; - p = xdr_inline_decode(xdr, flavor->gss.sec_oid4.len); + p = xdr_inline_decode(xdr, oid_len); if (unlikely(!p)) goto out_overflow; - memcpy(flavor->gss.sec_oid4.data, p, flavor->gss.sec_oid4.len); + memcpy(flavor->flavor_info.oid.data, p, oid_len); + flavor->flavor_info.oid.len = oid_len; p = xdr_inline_decode(xdr, 8); if (unlikely(!p)) goto out_overflow; - flavor->gss.qop4 = be32_to_cpup(p++); - flavor->gss.service = be32_to_cpup(p); + flavor->flavor_info.qop = be32_to_cpup(p++); + flavor->flavor_info.service = be32_to_cpup(p); return 0; @@ -5238,10 +5241,10 @@ out_err: static int decode_secinfo_common(struct xdr_stream *xdr, struct nfs4_secinfo_res *res) { - struct nfs4_secinfo_flavor *sec_flavor; + struct nfs4_secinfo4 *sec_flavor; + unsigned int i, num_flavors; int status; __be32 *p; - int i, num_flavors; p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index c1ca1f3f4935..b759467741eb 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1049,25 +1049,14 @@ struct nfs4_fs_locations_res { struct nfs4_fs_locations *fs_locations; }; -struct nfs4_secinfo_oid { - unsigned int len; - char data[GSS_OID_MAX_LEN]; -}; - -struct nfs4_secinfo_gss { - struct nfs4_secinfo_oid sec_oid4; - unsigned int qop4; - unsigned int service; -}; - -struct nfs4_secinfo_flavor { - unsigned int flavor; - struct nfs4_secinfo_gss gss; +struct nfs4_secinfo4 { + u32 flavor; + struct rpcsec_gss_info flavor_info; }; struct nfs4_secinfo_flavors { - unsigned int num_flavors; - struct nfs4_secinfo_flavor flavors[0]; + unsigned int num_flavors; + struct nfs4_secinfo4 flavors[0]; }; struct nfs4_secinfo_arg { diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h index a19e2547ae6a..98950e5a8877 100644 --- a/include/linux/sunrpc/gss_api.h +++ b/include/linux/sunrpc/gss_api.h @@ -25,10 +25,20 @@ struct gss_ctx { #define GSS_C_NO_BUFFER ((struct xdr_netobj) 0) #define GSS_C_NO_CONTEXT ((struct gss_ctx *) 0) -#define GSS_C_NULL_OID ((struct xdr_netobj) 0) /*XXX arbitrary length - is this set somewhere? */ #define GSS_OID_MAX_LEN 32 +struct rpcsec_gss_oid { + unsigned int len; + u8 data[GSS_OID_MAX_LEN]; +}; + +/* From RFC 3530 */ +struct rpcsec_gss_info { + struct rpcsec_gss_oid oid; + u32 qop; + u32 service; +}; /* gss-api prototypes; note that these are somewhat simplified versions of * the prototypes specified in RFC 2744. */ @@ -76,7 +86,7 @@ struct pf_desc { struct gss_api_mech { struct list_head gm_list; struct module *gm_owner; - struct xdr_netobj gm_oid; + struct rpcsec_gss_oid gm_oid; char *gm_name; const struct gss_api_ops *gm_ops; /* pseudoflavors supported by this mechanism: */ diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index d3611f11a8df..61d36ce3b366 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -754,7 +754,7 @@ MODULE_ALIAS("rpc-auth-gss-390005"); static struct gss_api_mech gss_kerberos_mech = { .gm_name = "krb5", .gm_owner = THIS_MODULE, - .gm_oid = {9, (void *)"\x2a\x86\x48\x86\xf7\x12\x01\x02\x02"}, + .gm_oid = { 9, "\x2a\x86\x48\x86\xf7\x12\x01\x02\x02" }, .gm_ops = &gss_kerberos_ops, .gm_pf_num = ARRAY_SIZE(gss_kerberos_pfs), .gm_pfs = gss_kerberos_pfs, From 9568c5e9a61de49f67f524404a27a1014a8d7f1e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 16 Mar 2013 15:54:43 -0400 Subject: [PATCH 19/67] SUNRPC: Introduce rpcauth_get_pseudoflavor() A SECINFO reply may contain flavors whose kernel module is not yet loaded by the client's kernel. A new RPC client API, called rpcauth_get_pseudoflavor(), is introduced to do proper checking for support of a security flavor. When this API is invoked, the RPC client now tries to load the module for each flavor first before performing the "is this supported?" check. This means if a module is available on the client, but has not been loaded yet, it will be loaded and registered automatically when the SECINFO reply is processed. The new API can take a full GSS tuple (OID, QoP, and service). Previously only the OID and service were considered. nfs_find_best_sec() is updated to verify all flavors requested in a SECINFO reply, including AUTH_NULL and AUTH_UNIX. Previously these two flavors were simply assumed to be supported without consulting the RPC client. Note that the replaced version of nfs_find_best_sec() can return RPC_AUTH_MAXFLAVOR if the server returns a recognized OID but an unsupported "service" value. nfs_find_best_sec() now returns RPC_AUTH_UNIX in this case. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4namespace.c | 39 +++++++++++++++------------ include/linux/sunrpc/auth.h | 5 ++++ include/linux/sunrpc/gss_api.h | 5 ++-- net/sunrpc/auth.c | 35 ++++++++++++++++++++++++ net/sunrpc/auth_gss/auth_gss.c | 1 + net/sunrpc/auth_gss/gss_mech_switch.c | 28 +++++++++++++++---- 6 files changed, 88 insertions(+), 25 deletions(-) diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index 88231c92317c..cdb0b41a4810 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -134,33 +134,38 @@ static size_t nfs_parse_server_name(char *string, size_t len, return ret; } +/** + * nfs_find_best_sec - Find a security mechanism supported locally + * @flavors: List of security tuples returned by SECINFO procedure + * + * Return the pseudoflavor of the first security mechanism in + * "flavors" that is locally supported. Return RPC_AUTH_UNIX if + * no matching flavor is found in the array. The "flavors" array + * is searched in the order returned from the server, per RFC 3530 + * recommendation. + */ rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors) { - struct gss_api_mech *mech; - struct xdr_netobj oid; + rpc_authflavor_t pseudoflavor; + struct nfs4_secinfo4 *secinfo; unsigned int i; - rpc_authflavor_t pseudoflavor = RPC_AUTH_UNIX; for (i = 0; i < flavors->num_flavors; i++) { - struct nfs4_secinfo4 *flavor = &flavors->flavors[i]; + secinfo = &flavors->flavors[i]; - if (flavor->flavor == RPC_AUTH_NULL || flavor->flavor == RPC_AUTH_UNIX) { - pseudoflavor = flavor->flavor; - break; - } else if (flavor->flavor == RPC_AUTH_GSS) { - oid.len = flavor->flavor_info.oid.len; - oid.data = flavor->flavor_info.oid.data; - mech = gss_mech_get_by_OID(&oid); - if (!mech) - continue; - pseudoflavor = gss_svc_to_pseudoflavor(mech, - flavor->flavor_info.service); - gss_mech_put(mech); + switch (secinfo->flavor) { + case RPC_AUTH_NULL: + case RPC_AUTH_UNIX: + case RPC_AUTH_GSS: + pseudoflavor = rpcauth_get_pseudoflavor(secinfo->flavor, + &secinfo->flavor_info); + if (pseudoflavor != RPC_AUTH_MAXFLAVOR) + return pseudoflavor; break; } } - return pseudoflavor; + return RPC_AUTH_UNIX; } static rpc_authflavor_t nfs4_negotiate_security(struct inode *inode, struct qstr *name) diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 58fda1c3c783..6851da4cb416 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -22,6 +22,8 @@ /* size of the nodename buffer */ #define UNX_MAXNODENAME 32 +struct rpcsec_gss_info; + /* Work around the lack of a VFS credential */ struct auth_cred { kuid_t uid; @@ -103,6 +105,7 @@ struct rpc_authops { int (*pipes_create)(struct rpc_auth *); void (*pipes_destroy)(struct rpc_auth *); int (*list_pseudoflavors)(rpc_authflavor_t *, int); + rpc_authflavor_t (*info2flavor)(struct rpcsec_gss_info *); }; struct rpc_credops { @@ -137,6 +140,8 @@ int rpcauth_register(const struct rpc_authops *); int rpcauth_unregister(const struct rpc_authops *); struct rpc_auth * rpcauth_create(rpc_authflavor_t, struct rpc_clnt *); void rpcauth_release(struct rpc_auth *); +rpc_authflavor_t rpcauth_get_pseudoflavor(rpc_authflavor_t, + struct rpcsec_gss_info *); int rpcauth_list_flavors(rpc_authflavor_t *, int); struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int); void rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *); diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h index 98950e5a8877..aba7687ca884 100644 --- a/include/linux/sunrpc/gss_api.h +++ b/include/linux/sunrpc/gss_api.h @@ -127,9 +127,8 @@ struct gss_api_ops { int gss_mech_register(struct gss_api_mech *); void gss_mech_unregister(struct gss_api_mech *); -/* returns a mechanism descriptor given an OID, and increments the mechanism's - * reference count. */ -struct gss_api_mech * gss_mech_get_by_OID(struct xdr_netobj *); +/* Given a GSS security tuple, look up a pseudoflavor */ +rpc_authflavor_t gss_mech_info2flavor(struct rpcsec_gss_info *); /* Returns a reference to a mechanism, given a name like "krb5" etc. */ struct gss_api_mech *gss_mech_get_by_name(const char *); diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index f5294047df77..9b81be8d9946 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -123,6 +123,41 @@ rpcauth_unregister(const struct rpc_authops *ops) } EXPORT_SYMBOL_GPL(rpcauth_unregister); +/** + * rpcauth_get_pseudoflavor - check if security flavor is supported + * @flavor: a security flavor + * @info: a GSS mech OID, quality of protection, and service value + * + * Verifies that an appropriate kernel module is available or already loaded. + * Returns an equivalent pseudoflavor, or RPC_AUTH_MAXFLAVOR if "flavor" is + * not supported locally. + */ +rpc_authflavor_t +rpcauth_get_pseudoflavor(rpc_authflavor_t flavor, struct rpcsec_gss_info *info) +{ + const struct rpc_authops *ops; + rpc_authflavor_t pseudoflavor; + + ops = auth_flavors[flavor]; + if (ops == NULL) + request_module("rpc-auth-%u", flavor); + spin_lock(&rpc_authflavor_lock); + ops = auth_flavors[flavor]; + if (ops == NULL || !try_module_get(ops->owner)) { + spin_unlock(&rpc_authflavor_lock); + return RPC_AUTH_MAXFLAVOR; + } + spin_unlock(&rpc_authflavor_lock); + + pseudoflavor = flavor; + if (ops->info2flavor != NULL) + pseudoflavor = ops->info2flavor(info); + + module_put(ops->owner); + return pseudoflavor; +} +EXPORT_SYMBOL_GPL(rpcauth_get_pseudoflavor); + /** * rpcauth_list_flavors - discover registered flavors and pseudoflavors * @array: array to fill in diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 282dfb14db05..a7420076ef39 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1641,6 +1641,7 @@ static const struct rpc_authops authgss_ops = { .pipes_create = gss_pipes_dentries_create, .pipes_destroy = gss_pipes_dentries_destroy, .list_pseudoflavors = gss_mech_list_pseudoflavors, + .info2flavor = gss_mech_info2flavor, }; static const struct rpc_credops gss_credops = { diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index f0f4eee63a35..4db66f5f490e 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -171,8 +171,7 @@ struct gss_api_mech * gss_mech_get_by_name(const char *name) } EXPORT_SYMBOL_GPL(gss_mech_get_by_name); -struct gss_api_mech * -gss_mech_get_by_OID(struct xdr_netobj *obj) +static struct gss_api_mech *gss_mech_get_by_OID(struct rpcsec_gss_oid *obj) { struct gss_api_mech *pos, *gm = NULL; @@ -188,11 +187,8 @@ gss_mech_get_by_OID(struct xdr_netobj *obj) } spin_unlock(®istered_mechs_lock); return gm; - } -EXPORT_SYMBOL_GPL(gss_mech_get_by_OID); - static inline int mech_supports_pseudoflavor(struct gss_api_mech *gm, u32 pseudoflavor) { @@ -282,6 +278,28 @@ gss_svc_to_pseudoflavor(struct gss_api_mech *gm, u32 service) } EXPORT_SYMBOL_GPL(gss_svc_to_pseudoflavor); +/** + * gss_mech_info2flavor - look up a pseudoflavor given a GSS tuple + * @info: a GSS mech OID, quality of protection, and service value + * + * Returns a matching pseudoflavor, or RPC_AUTH_MAXFLAVOR if the tuple is + * not supported. + */ +rpc_authflavor_t gss_mech_info2flavor(struct rpcsec_gss_info *info) +{ + rpc_authflavor_t pseudoflavor; + struct gss_api_mech *gm; + + gm = gss_mech_get_by_OID(&info->oid); + if (gm == NULL) + return RPC_AUTH_MAXFLAVOR; + + pseudoflavor = gss_svc_to_pseudoflavor(gm, info->service); + + gss_mech_put(gm); + return pseudoflavor; +} + u32 gss_pseudoflavor_to_service(struct gss_api_mech *gm, u32 pseudoflavor) { From f783288f0cf47f1988f5284c89e325bd22c74f91 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 16 Mar 2013 15:54:52 -0400 Subject: [PATCH 20/67] SUNRPC: Load GSS kernel module by OID The current GSS mech switch can find and load GSS pseudoflavor modules by name ("krb5") or pseudoflavor number ("390003"), but cannot find GSS modules by GSS tuple: [ "1.2.840.113554.1.2.2", GSS_C_QOP_DEFAULT, RPC_GSS_SVC_NONE ] This is important when dealing with a SECINFO request. A SECINFO reply contains a list of flavors the server supports for the requested export, but GSS flavors also have a GSS tuple that maps to a pseudoflavor (like 390003 for krb5). If the GSS module that supports the OID in the tuple is not loaded, our client is not able to load that module dynamically to support that pseudoflavor. Add a way for the GSS mech switch to load GSS pseudoflavor support by OID before searching for the pseudoflavor that matches the OID and service. Signed-off-by: Chuck Lever Cc: David Howells Signed-off-by: Trond Myklebust --- net/sunrpc/Kconfig | 1 + net/sunrpc/auth_gss/gss_krb5_mech.c | 1 + net/sunrpc/auth_gss/gss_mech_switch.c | 7 +++++++ 3 files changed, 9 insertions(+) diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig index 516fe2caac2c..804f4f623ca4 100644 --- a/net/sunrpc/Kconfig +++ b/net/sunrpc/Kconfig @@ -3,6 +3,7 @@ config SUNRPC config SUNRPC_GSS tristate + select OID_REGISTRY config SUNRPC_BACKCHANNEL bool diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 61d36ce3b366..b822ec5cdc58 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -750,6 +750,7 @@ MODULE_ALIAS("rpc-auth-gss-krb5p"); MODULE_ALIAS("rpc-auth-gss-390003"); MODULE_ALIAS("rpc-auth-gss-390004"); MODULE_ALIAS("rpc-auth-gss-390005"); +MODULE_ALIAS("rpc-auth-gss-1.2.840.113554.1.2.2"); static struct gss_api_mech gss_kerberos_mech = { .gm_name = "krb5", diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index 4db66f5f490e..92a72404e6d5 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -174,6 +175,12 @@ EXPORT_SYMBOL_GPL(gss_mech_get_by_name); static struct gss_api_mech *gss_mech_get_by_OID(struct rpcsec_gss_oid *obj) { struct gss_api_mech *pos, *gm = NULL; + char buf[32]; + + if (sprint_oid(obj->data, obj->len, buf, sizeof(buf)) < 0) + return NULL; + dprintk("RPC: %s(%s)\n", __func__, buf); + request_module("rpc-auth-gss-%s", buf); spin_lock(®istered_mechs_lock); list_for_each_entry(pos, ®istered_mechs, gm_list) { From 83523d083a045a2069e5f3443d2e4f810a6e6d9a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 16 Mar 2013 15:55:01 -0400 Subject: [PATCH 21/67] SUNRPC: Consider qop when looking up pseudoflavors The NFSv4 SECINFO operation returns a list of security flavors that the server supports for a particular share. An NFSv4 client is supposed to pick a pseudoflavor it supports that corresponds to one of the flavors returned by the server. GSS flavors in this list have a GSS tuple that identify a specific GSS pseudoflavor. Currently our client ignores the GSS tuple's "qop" value. A matching pseudoflavor is chosen based only on the OID and service value. So far this omission has not had much effect on Linux. The NFSv4 protocol currently supports only one qop value: GSS_C_QOP_DEFAULT, also known as zero. However, if an NFSv4 server happens to return something other than zero in the qop field, our client won't notice. This could cause the client to behave in incorrect ways that could have security implications. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/gss_api.h | 5 ++++- net/sunrpc/auth_gss/gss_krb5_mech.c | 3 +++ net/sunrpc/auth_gss/gss_mech_switch.c | 20 ++++++++++++++------ net/sunrpc/auth_gss/svcauth_gss.c | 4 +++- 4 files changed, 24 insertions(+), 8 deletions(-) diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h index aba7687ca884..96e5a81a54d7 100644 --- a/include/linux/sunrpc/gss_api.h +++ b/include/linux/sunrpc/gss_api.h @@ -25,6 +25,7 @@ struct gss_ctx { #define GSS_C_NO_BUFFER ((struct xdr_netobj) 0) #define GSS_C_NO_CONTEXT ((struct gss_ctx *) 0) +#define GSS_C_QOP_DEFAULT (0) /*XXX arbitrary length - is this set somewhere? */ #define GSS_OID_MAX_LEN 32 @@ -68,12 +69,14 @@ u32 gss_unwrap( u32 gss_delete_sec_context( struct gss_ctx **ctx_id); -u32 gss_svc_to_pseudoflavor(struct gss_api_mech *, u32 service); +rpc_authflavor_t gss_svc_to_pseudoflavor(struct gss_api_mech *, u32 qop, + u32 service); u32 gss_pseudoflavor_to_service(struct gss_api_mech *, u32 pseudoflavor); char *gss_service_to_auth_domain_name(struct gss_api_mech *, u32 service); struct pf_desc { u32 pseudoflavor; + u32 qop; u32 service; char *name; char *auth_domain_name; diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index b822ec5cdc58..33255ff889c0 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -729,16 +729,19 @@ static const struct gss_api_ops gss_kerberos_ops = { static struct pf_desc gss_kerberos_pfs[] = { [0] = { .pseudoflavor = RPC_AUTH_GSS_KRB5, + .qop = GSS_C_QOP_DEFAULT, .service = RPC_GSS_SVC_NONE, .name = "krb5", }, [1] = { .pseudoflavor = RPC_AUTH_GSS_KRB5I, + .qop = GSS_C_QOP_DEFAULT, .service = RPC_GSS_SVC_INTEGRITY, .name = "krb5i", }, [2] = { .pseudoflavor = RPC_AUTH_GSS_KRB5P, + .qop = GSS_C_QOP_DEFAULT, .service = RPC_GSS_SVC_PRIVACY, .name = "krb5p", }, diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index 92a72404e6d5..81fb6f3e2424 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -271,19 +271,27 @@ int gss_mech_list_pseudoflavors(rpc_authflavor_t *array_ptr, int size) return i; } -u32 -gss_svc_to_pseudoflavor(struct gss_api_mech *gm, u32 service) +/** + * gss_svc_to_pseudoflavor - map a GSS service number to a pseudoflavor + * @gm: GSS mechanism handle + * @qop: GSS quality-of-protection value + * @service: GSS service value + * + * Returns a matching security flavor, or RPC_AUTH_MAXFLAVOR if none is found. + */ +rpc_authflavor_t gss_svc_to_pseudoflavor(struct gss_api_mech *gm, u32 qop, + u32 service) { int i; for (i = 0; i < gm->gm_pf_num; i++) { - if (gm->gm_pfs[i].service == service) { + if (gm->gm_pfs[i].qop == qop && + gm->gm_pfs[i].service == service) { return gm->gm_pfs[i].pseudoflavor; } } - return RPC_AUTH_MAXFLAVOR; /* illegal value */ + return RPC_AUTH_MAXFLAVOR; } -EXPORT_SYMBOL_GPL(gss_svc_to_pseudoflavor); /** * gss_mech_info2flavor - look up a pseudoflavor given a GSS tuple @@ -301,7 +309,7 @@ rpc_authflavor_t gss_mech_info2flavor(struct rpcsec_gss_info *info) if (gm == NULL) return RPC_AUTH_MAXFLAVOR; - pseudoflavor = gss_svc_to_pseudoflavor(gm, info->service); + pseudoflavor = gss_svc_to_pseudoflavor(gm, info->qop, info->service); gss_mech_put(gm); return pseudoflavor; diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index f7d34e7b6f81..74f6d30f5ded 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1216,7 +1216,9 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) svcdata->rsci = rsci; cache_get(&rsci->h); rqstp->rq_cred.cr_flavor = gss_svc_to_pseudoflavor( - rsci->mechctx->mech_type, gc->gc_svc); + rsci->mechctx->mech_type, + GSS_C_QOP_DEFAULT, + gc->gc_svc); ret = SVC_OK; goto out; } From a77c806fb9d097bb7733b64207cf52fc2c6438bb Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 16 Mar 2013 15:55:10 -0400 Subject: [PATCH 22/67] SUNRPC: Refactor nfsd4_do_encode_secinfo() Clean up. This matches a similar API for the client side, and keeps ULP fingers out the of the GSS mech switch. Signed-off-by: Chuck Lever Acked-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- fs/nfsd/nfs4xdr.c | 24 +++++++++--------- include/linux/sunrpc/auth.h | 4 +++ include/linux/sunrpc/gss_api.h | 3 +++ net/sunrpc/auth.c | 35 +++++++++++++++++++++++++++ net/sunrpc/auth_gss/auth_gss.c | 1 + net/sunrpc/auth_gss/gss_mech_switch.c | 35 +++++++++++++++++++++++++-- 6 files changed, 87 insertions(+), 15 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 01168865dd37..2a2745615b42 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3138,10 +3138,9 @@ nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ static __be32 nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp, - __be32 nfserr,struct svc_export *exp) + __be32 nfserr, struct svc_export *exp) { - int i = 0; - u32 nflavs; + u32 i, nflavs; struct exp_flavor_info *flavs; struct exp_flavor_info def_flavs[2]; __be32 *p; @@ -3172,30 +3171,29 @@ nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp, WRITE32(nflavs); ADJUST_ARGS(); for (i = 0; i < nflavs; i++) { - u32 flav = flavs[i].pseudoflavor; - struct gss_api_mech *gm = gss_mech_get_by_pseudoflavor(flav); + struct rpcsec_gss_info info; - if (gm) { + if (rpcauth_get_gssinfo(flavs[i].pseudoflavor, &info) == 0) { RESERVE_SPACE(4); WRITE32(RPC_AUTH_GSS); ADJUST_ARGS(); - RESERVE_SPACE(4 + gm->gm_oid.len); - WRITE32(gm->gm_oid.len); - WRITEMEM(gm->gm_oid.data, gm->gm_oid.len); + RESERVE_SPACE(4 + info.oid.len); + WRITE32(info.oid.len); + WRITEMEM(info.oid.data, info.oid.len); ADJUST_ARGS(); RESERVE_SPACE(4); - WRITE32(0); /* qop */ + WRITE32(info.qop); ADJUST_ARGS(); RESERVE_SPACE(4); - WRITE32(gss_pseudoflavor_to_service(gm, flav)); + WRITE32(info.service); ADJUST_ARGS(); - gss_mech_put(gm); } else { RESERVE_SPACE(4); - WRITE32(flav); + WRITE32(flavs[i].pseudoflavor); ADJUST_ARGS(); } } + out: if (exp) exp_put(exp); diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 6851da4cb416..0dd00f4f6810 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -106,6 +106,8 @@ struct rpc_authops { void (*pipes_destroy)(struct rpc_auth *); int (*list_pseudoflavors)(rpc_authflavor_t *, int); rpc_authflavor_t (*info2flavor)(struct rpcsec_gss_info *); + int (*flavor2info)(rpc_authflavor_t, + struct rpcsec_gss_info *); }; struct rpc_credops { @@ -142,6 +144,8 @@ struct rpc_auth * rpcauth_create(rpc_authflavor_t, struct rpc_clnt *); void rpcauth_release(struct rpc_auth *); rpc_authflavor_t rpcauth_get_pseudoflavor(rpc_authflavor_t, struct rpcsec_gss_info *); +int rpcauth_get_gssinfo(rpc_authflavor_t, + struct rpcsec_gss_info *); int rpcauth_list_flavors(rpc_authflavor_t *, int); struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int); void rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *); diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h index 96e5a81a54d7..fca23380e667 100644 --- a/include/linux/sunrpc/gss_api.h +++ b/include/linux/sunrpc/gss_api.h @@ -133,6 +133,9 @@ void gss_mech_unregister(struct gss_api_mech *); /* Given a GSS security tuple, look up a pseudoflavor */ rpc_authflavor_t gss_mech_info2flavor(struct rpcsec_gss_info *); +/* Given a pseudoflavor, look up a GSS security tuple */ +int gss_mech_flavor2info(rpc_authflavor_t, struct rpcsec_gss_info *); + /* Returns a reference to a mechanism, given a name like "krb5" etc. */ struct gss_api_mech *gss_mech_get_by_name(const char *); diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 9b81be8d9946..2bc0cc2196e0 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -158,6 +158,41 @@ rpcauth_get_pseudoflavor(rpc_authflavor_t flavor, struct rpcsec_gss_info *info) } EXPORT_SYMBOL_GPL(rpcauth_get_pseudoflavor); +/** + * rpcauth_get_gssinfo - find GSS tuple matching a GSS pseudoflavor + * @pseudoflavor: GSS pseudoflavor to match + * @info: rpcsec_gss_info structure to fill in + * + * Returns zero and fills in "info" if pseudoflavor matches a + * supported mechanism. + */ +int +rpcauth_get_gssinfo(rpc_authflavor_t pseudoflavor, struct rpcsec_gss_info *info) +{ + rpc_authflavor_t flavor = pseudoflavor_to_flavor(pseudoflavor); + const struct rpc_authops *ops; + int result; + + ops = auth_flavors[flavor]; + if (ops == NULL) + request_module("rpc-auth-%u", flavor); + spin_lock(&rpc_authflavor_lock); + ops = auth_flavors[flavor]; + if (ops == NULL || !try_module_get(ops->owner)) { + spin_unlock(&rpc_authflavor_lock); + return -ENOENT; + } + spin_unlock(&rpc_authflavor_lock); + + result = -ENOENT; + if (ops->flavor2info != NULL) + result = ops->flavor2info(pseudoflavor, info); + + module_put(ops->owner); + return result; +} +EXPORT_SYMBOL_GPL(rpcauth_get_gssinfo); + /** * rpcauth_list_flavors - discover registered flavors and pseudoflavors * @array: array to fill in diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index a7420076ef39..51415b07174e 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1642,6 +1642,7 @@ static const struct rpc_authops authgss_ops = { .pipes_destroy = gss_pipes_dentries_destroy, .list_pseudoflavors = gss_mech_list_pseudoflavors, .info2flavor = gss_mech_info2flavor, + .flavor2info = gss_mech_flavor2info, }; static const struct rpc_credops gss_credops = { diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index 81fb6f3e2424..deaa7ae81cdf 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -240,8 +240,6 @@ gss_mech_get_by_pseudoflavor(u32 pseudoflavor) return gm; } -EXPORT_SYMBOL_GPL(gss_mech_get_by_pseudoflavor); - /** * gss_mech_list_pseudoflavors - Discover registered GSS pseudoflavors * @array: array to fill in @@ -315,6 +313,39 @@ rpc_authflavor_t gss_mech_info2flavor(struct rpcsec_gss_info *info) return pseudoflavor; } +/** + * gss_mech_flavor2info - look up a GSS tuple for a given pseudoflavor + * @pseudoflavor: GSS pseudoflavor to match + * @info: rpcsec_gss_info structure to fill in + * + * Returns zero and fills in "info" if pseudoflavor matches a + * supported mechanism. Otherwise a negative errno is returned. + */ +int gss_mech_flavor2info(rpc_authflavor_t pseudoflavor, + struct rpcsec_gss_info *info) +{ + struct gss_api_mech *gm; + int i; + + gm = gss_mech_get_by_pseudoflavor(pseudoflavor); + if (gm == NULL) + return -ENOENT; + + for (i = 0; i < gm->gm_pf_num; i++) { + if (gm->gm_pfs[i].pseudoflavor == pseudoflavor) { + memcpy(info->oid.data, gm->gm_oid.data, gm->gm_oid.len); + info->oid.len = gm->gm_oid.len; + info->qop = gm->gm_pfs[i].qop; + info->service = gm->gm_pfs[i].service; + gss_mech_put(gm); + return 0; + } + } + + gss_mech_put(gm); + return -ENOENT; +} + u32 gss_pseudoflavor_to_service(struct gss_api_mech *gm, u32 pseudoflavor) { From 6599c0acae10e929b5315821c1d064cd13fe7648 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 16 Mar 2013 15:55:19 -0400 Subject: [PATCH 23/67] SUNRPC: Make gss_mech_get() static gss_mech_get() is no longer used outside of gss_mech_switch.c. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/gss_api.h | 3 --- net/sunrpc/auth_gss/gss_mech_switch.c | 5 +---- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h index fca23380e667..f32b7a47e13f 100644 --- a/include/linux/sunrpc/gss_api.h +++ b/include/linux/sunrpc/gss_api.h @@ -145,9 +145,6 @@ struct gss_api_mech *gss_mech_get_by_pseudoflavor(u32); /* Fill in an array with a list of supported pseudoflavors */ int gss_mech_list_pseudoflavors(rpc_authflavor_t *, int); -/* Just increments the mechanism's reference count and returns its input: */ -struct gss_api_mech * gss_mech_get(struct gss_api_mech *); - /* For every successful gss_mech_get or gss_mech_get_by_* call there must be a * corresponding call to gss_mech_put. */ void gss_mech_put(struct gss_api_mech *); diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index deaa7ae81cdf..89416522ef79 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -132,15 +132,12 @@ gss_mech_unregister(struct gss_api_mech *gm) EXPORT_SYMBOL_GPL(gss_mech_unregister); -struct gss_api_mech * -gss_mech_get(struct gss_api_mech *gm) +static struct gss_api_mech *gss_mech_get(struct gss_api_mech *gm) { __module_get(gm->gm_owner); return gm; } -EXPORT_SYMBOL_GPL(gss_mech_get); - static struct gss_api_mech * _gss_mech_get_by_name(const char *name) { From 5007220b87d2ae9da1ab2025bfa35f6e2b3376f5 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 16 Mar 2013 15:55:27 -0400 Subject: [PATCH 24/67] SUNRPC: Remove EXPORT_SYMBOL_GPL() from GSS mech switch Clean up: Reduce the symbol table footprint for auth_rpcgss.ko by removing exported symbols for functions that are no longer used outside of auth_rpcgss.ko. The remaining two EXPORTs in gss_mech_switch.c get documenting comments. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/gss_mech_switch.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index 89416522ef79..79881d6e68a1 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -103,8 +103,13 @@ out: return status; } -int -gss_mech_register(struct gss_api_mech *gm) +/** + * gss_mech_register - register a GSS mechanism + * @gm: GSS mechanism handle + * + * Returns zero if successful, or a negative errno. + */ +int gss_mech_register(struct gss_api_mech *gm) { int status; @@ -117,11 +122,14 @@ gss_mech_register(struct gss_api_mech *gm) dprintk("RPC: registered gss mechanism %s\n", gm->gm_name); return 0; } - EXPORT_SYMBOL_GPL(gss_mech_register); -void -gss_mech_unregister(struct gss_api_mech *gm) +/** + * gss_mech_unregister - release a GSS mechanism + * @gm: GSS mechanism handle + * + */ +void gss_mech_unregister(struct gss_api_mech *gm) { spin_lock(®istered_mechs_lock); list_del(&gm->gm_list); @@ -129,7 +137,6 @@ gss_mech_unregister(struct gss_api_mech *gm) dprintk("RPC: unregistered gss mechanism %s\n", gm->gm_name); gss_mech_free(gm); } - EXPORT_SYMBOL_GPL(gss_mech_unregister); static struct gss_api_mech *gss_mech_get(struct gss_api_mech *gm) @@ -167,7 +174,6 @@ struct gss_api_mech * gss_mech_get_by_name(const char *name) } return gm; } -EXPORT_SYMBOL_GPL(gss_mech_get_by_name); static struct gss_api_mech *gss_mech_get_by_OID(struct rpcsec_gss_oid *obj) { @@ -355,8 +361,6 @@ gss_pseudoflavor_to_service(struct gss_api_mech *gm, u32 pseudoflavor) return 0; } -EXPORT_SYMBOL_GPL(gss_pseudoflavor_to_service); - char * gss_service_to_auth_domain_name(struct gss_api_mech *gm, u32 service) { @@ -369,8 +373,6 @@ gss_service_to_auth_domain_name(struct gss_api_mech *gm, u32 service) return NULL; } -EXPORT_SYMBOL_GPL(gss_service_to_auth_domain_name); - void gss_mech_put(struct gss_api_mech * gm) { @@ -378,8 +380,6 @@ gss_mech_put(struct gss_api_mech * gm) module_put(gm->gm_owner); } -EXPORT_SYMBOL_GPL(gss_mech_put); - /* The mech could probably be determined from the token instead, but it's just * as easy for now to pass it in. */ int From 75bc8821bd9ae76f2ffb92a46f7abdbb0aaa0536 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 16 Mar 2013 15:55:36 -0400 Subject: [PATCH 25/67] NFS: Handle missing rpc.gssd when looking up root FH When rpc.gssd is not running, any NFS operation that needs to use a GSS security flavor of course does not work. If looking up a server's root file handle results in an NFS4ERR_WRONGSEC, nfs4_find_root_sec() is called to try a bunch of security flavors until one works or all reasonable flavors have been tried. When rpc.gssd isn't running, this loop seems to fail immediately after rpcauth_create() craps out on the first GSS flavor. When the rpcauth_create() call in nfs4_lookup_root_sec() fails because rpc.gssd is not available, nfs4_lookup_root_sec() unconditionally returns -EIO. This prevents nfs4_find_root_sec() from retrying any other flavors; it drops out of its loop and fails immediately. Having nfs4_lookup_root_sec() return -EACCES instead allows nfs4_find_root_sec() to try all flavors in its list. Signed-off-by: Chuck Lever Cc: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 81343944e096..f462dd884385 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2506,7 +2506,7 @@ static int nfs4_lookup_root_sec(struct nfs_server *server, struct nfs_fh *fhandl auth = rpcauth_create(flavor, server->client); if (IS_ERR(auth)) { - ret = -EIO; + ret = -EACCES; goto out; } ret = nfs4_lookup_root(server, fhandle, info); From 2ed4b95b7ee5e5bcf77576ce5df4d7036584cd3c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 16 Mar 2013 15:55:45 -0400 Subject: [PATCH 26/67] NFS: Clean up nfs4_proc_get_rootfh The long lines with no vertical white space make this function difficult for humans to read. Add a proper documenting comment while we're here. Signed-off-by: Chuck Lever Cc: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f462dd884385..173c988177e3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2547,24 +2547,36 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, return status; } -/* - * get the file handle for the "/" directory on the server +static int nfs4_do_find_root_sec(struct nfs_server *server, + struct nfs_fh *fhandle, struct nfs_fsinfo *info) +{ + int mv = server->nfs_client->cl_minorversion; + return nfs_v4_minor_ops[mv]->find_root_sec(server, fhandle, info); +} + +/** + * nfs4_proc_get_rootfh - get file handle for server's pseudoroot + * @server: initialized nfs_server handle + * @fhandle: we fill in the pseudo-fs root file handle + * @info: we fill in an FSINFO struct + * + * Returns zero on success, or a negative errno. */ int nfs4_proc_get_rootfh(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *info) { - int minor_version = server->nfs_client->cl_minorversion; - int status = nfs4_lookup_root(server, fhandle, info); - if ((status == -NFS4ERR_WRONGSEC) && !(server->flags & NFS_MOUNT_SECFLAVOUR)) - /* - * A status of -NFS4ERR_WRONGSEC will be mapped to -EPERM - * by nfs4_map_errors() as this function exits. - */ - status = nfs_v4_minor_ops[minor_version]->find_root_sec(server, fhandle, info); + int status; + + status = nfs4_lookup_root(server, fhandle, info); + if ((status == -NFS4ERR_WRONGSEC) && + !(server->flags & NFS_MOUNT_SECFLAVOUR)) + status = nfs4_do_find_root_sec(server, fhandle, info); + if (status == 0) status = nfs4_server_capabilities(server, fhandle); if (status == 0) status = nfs4_do_fsinfo(server, fhandle, info); + return nfs4_map_errors(status); } From 83ca7f5ab31f57506bb35482e4b0426be653196a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 16 Mar 2013 15:55:53 -0400 Subject: [PATCH 27/67] NFS: Avoid PUTROOTFH when managing leases Currently, the compound operation the Linux NFS client sends to the server to confirm a client ID looks like this: { SETCLIENTID_CONFIRM; PUTROOTFH; GETATTR(lease_time) } Once the lease is confirmed, it makes sense to know how long before the client will have to renew it. And, performing these operations in the same compound saves a round trip. Unfortunately, this arrangement assumes that the security flavor used for establishing a client ID can also be used to access the server's pseudo-fs. If the server requires a different security flavor to access its pseudo-fs than it allowed for the client's SETCLIENTID operation, the PUTROOTFH in this compound fails with NFS4ERR_WRONGSEC. Even though the SETCLIENTID_CONFIRM succeeded, our client's trunking detection logic interprets the failure of the compound as a failure by the server to confirm the client ID. As part of server trunking detection, the client then begins another SETCLIENTID pass with the same nfs4_client_id. This fails with NFS4ERR_CLID_INUSE because the first SETCLIENTID/SETCLIENTID_CONFIRM already succeeded in confirming that client ID -- it was the PUTROOTFH operation that caused the SETCLIENTID_CONFIRM compound to fail. To address this issue, separate the "establish client ID" step from the "accessing the server's pseudo-fs root" step. The first access of the server's pseudo-fs may require retrying the PUTROOTFH operation with different security flavors. This access is done in nfs4_proc_get_rootfh(). That leaves the matter of how to retrieve the server's lease time. nfs4_proc_fsinfo() already retrieves the lease time value, though none of its callers do anything with the retrieved value (nor do they mark the lease as "renewed"). Note that NFSv4.1 state recovery invokes nfs4_proc_get_lease_time() using the lease management security flavor. This may cause some heartburn if that security flavor isn't the same as the security flavor the server requires for accessing the pseudo-fs. Signed-off-by: Chuck Lever Cc: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 25 ++++++++++++------------- fs/nfs/nfs4xdr.c | 18 +++--------------- 2 files changed, 15 insertions(+), 28 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 173c988177e3..510a7dd84c46 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3455,12 +3455,21 @@ static int _nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo) { struct nfs4_exception exception = { }; + unsigned long now = jiffies; int err; do { - err = nfs4_handle_exception(server, - _nfs4_do_fsinfo(server, fhandle, fsinfo), - &exception); + err = _nfs4_do_fsinfo(server, fhandle, fsinfo); + if (err == 0) { + struct nfs_client *clp = server->nfs_client; + + spin_lock(&clp->cl_lock); + clp->cl_lease_time = fsinfo->lease_time * HZ; + clp->cl_last_renewal = now; + spin_unlock(&clp->cl_lock); + break; + } + err = nfs4_handle_exception(server, err, &exception); } while (exception.retry); return err; } @@ -4301,27 +4310,17 @@ int nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct nfs4_setclientid_res *arg, struct rpc_cred *cred) { - struct nfs_fsinfo fsinfo; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID_CONFIRM], .rpc_argp = arg, - .rpc_resp = &fsinfo, .rpc_cred = cred, }; - unsigned long now; int status; dprintk("NFS call setclientid_confirm auth=%s, (client ID %llx)\n", clp->cl_rpcclient->cl_auth->au_ops->au_name, clp->cl_clientid); - now = jiffies; status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); - if (status == 0) { - spin_lock(&clp->cl_lock); - clp->cl_lease_time = fsinfo.lease_time * HZ; - clp->cl_last_renewal = now; - spin_unlock(&clp->cl_lock); - } dprintk("NFS reply setclientid_confirm: %d\n", status); return status; } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index a38fd179c34f..ae0190b82694 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -530,14 +530,10 @@ static int nfs4_stat_to_errno(int); decode_setclientid_maxsz) #define NFS4_enc_setclientid_confirm_sz \ (compound_encode_hdr_maxsz + \ - encode_setclientid_confirm_maxsz + \ - encode_putrootfh_maxsz + \ - encode_fsinfo_maxsz) + encode_setclientid_confirm_maxsz) #define NFS4_dec_setclientid_confirm_sz \ (compound_decode_hdr_maxsz + \ - decode_setclientid_confirm_maxsz + \ - decode_putrootfh_maxsz + \ - decode_fsinfo_maxsz) + decode_setclientid_confirm_maxsz) #define NFS4_enc_lock_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ @@ -2608,12 +2604,9 @@ static void nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, struct compound_hdr hdr = { .nops = 0, }; - const u32 lease_bitmap[3] = { FATTR4_WORD0_LEASE_TIME }; encode_compound_hdr(xdr, req, &hdr); encode_setclientid_confirm(xdr, arg, &hdr); - encode_putrootfh(xdr, &hdr); - encode_fsinfo(xdr, lease_bitmap, &hdr); encode_nops(&hdr); } @@ -6647,8 +6640,7 @@ static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, * Decode SETCLIENTID_CONFIRM response */ static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req, - struct xdr_stream *xdr, - struct nfs_fsinfo *fsinfo) + struct xdr_stream *xdr) { struct compound_hdr hdr; int status; @@ -6656,10 +6648,6 @@ static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req, status = decode_compound_hdr(xdr, &hdr); if (!status) status = decode_setclientid_confirm(xdr); - if (!status) - status = decode_putrootfh(xdr); - if (!status) - status = decode_fsinfo(xdr, fsinfo); return status; } From 9a744ba3983698307a7690541c00549ac7edf316 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 16 Mar 2013 15:56:02 -0400 Subject: [PATCH 28/67] NFS: Use static list of security flavors during root FH lookup recovery If the Linux NFS client receives an NFS4ERR_WRONGSEC error while trying to look up an NFS server's root file handle, it retries the lookup operation with various security flavors to see what flavor the NFS server will accept for pseudo-fs access. The list of flavors the client uses during retry consists only of flavors that are currently registered in the kernel RPC client. This list may not include any GSS pseudoflavors if auth_rpcgss.ko has not yet been loaded. Let's instead use a static list of security flavors that the NFS standard requires the server to implement (RFC 3530bis, section 3.2.1). The RPC client should now be able to load support for these dynamically; if not, they are skipped. Recovery behavior here is prescribed by RFC 3530bis, section 15.33.5: > For LOOKUPP, PUTROOTFH and PUTPUBFH, the client will be unable to > use the SECINFO operation since SECINFO requires a current > filehandle and none exist for these two [sic] operations. Therefore, > the client must iterate through the security triples available at > the client and reattempt the PUTROOTFH or PUTPUBFH operation. In > the unfortunate event none of the MANDATORY security triples are > supported by the client and server, the client SHOULD try using > others that support integrity. Failing that, the client can try > using AUTH_NONE, but because such forms lack integrity checks, > this puts the client at risk. Signed-off-by: Chuck Lever Cc: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 510a7dd84c46..dd3a43792df0 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2514,27 +2514,35 @@ out: return ret; } +/* + * Retry pseudoroot lookup with various security flavors. We do this when: + * + * NFSv4.0: the PUTROOTFH operation returns NFS4ERR_WRONGSEC + * NFSv4.1: the server does not support the SECINFO_NO_NAME operation + * + * Returns zero on success, or a negative NFS4ERR value, or a + * negative errno value. + */ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *info) { - int i, len, status = 0; - rpc_authflavor_t flav_array[NFS_MAX_SECFLAVORS]; - - len = rpcauth_list_flavors(flav_array, ARRAY_SIZE(flav_array)); - if (len < 0) - return len; - - for (i = 0; i < len; i++) { - /* AUTH_UNIX is the default flavor if none was specified, - * thus has already been tried. */ - if (flav_array[i] == RPC_AUTH_UNIX) - continue; + /* Per 3530bis 15.33.5 */ + static const rpc_authflavor_t flav_array[] = { + RPC_AUTH_GSS_KRB5P, + RPC_AUTH_GSS_KRB5I, + RPC_AUTH_GSS_KRB5, + RPC_AUTH_NULL, + }; + int status = -EPERM; + size_t i; + for (i = 0; i < ARRAY_SIZE(flav_array); i++) { status = nfs4_lookup_root_sec(server, fhandle, info, flav_array[i]); if (status == -NFS4ERR_WRONGSEC || status == -EACCES) continue; break; } + /* * -EACCESS could mean that the user doesn't have correct permissions * to access the mount. It could also mean that we tried to mount From c4eafe1135809c2b35b873a395af8f3a86a3ee98 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 16 Mar 2013 15:56:11 -0400 Subject: [PATCH 29/67] NFS: Try AUTH_UNIX when PUTROOTFH gets NFS4ERR_WRONGSEC Most NFSv4 servers implement AUTH_UNIX, and administrators will prefer this over AUTH_NULL. It is harmless for our client to try this flavor in addition to the flavors mandated by RFC 3530/5661. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index dd3a43792df0..a8e6d802383b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2531,6 +2531,7 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, RPC_AUTH_GSS_KRB5P, RPC_AUTH_GSS_KRB5I, RPC_AUTH_GSS_KRB5, + RPC_AUTH_UNIX, /* courtesy */ RPC_AUTH_NULL, }; int status = -EPERM; From 4edaa308888b4bd629fa025cc6d5b2bf1a2a51db Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 16 Mar 2013 15:56:20 -0400 Subject: [PATCH 30/67] NFS: Use "krb5i" to establish NFSv4 state whenever possible Currently our client uses AUTH_UNIX for state management on Kerberos NFS mounts in some cases. For example, if the first mount of a server specifies "sec=sys," the SETCLIENTID operation is performed with AUTH_UNIX. Subsequent mounts using stronger security flavors can not change the flavor used for lease establishment. This might be less security than an administrator was expecting. Dave Noveck's migration issues draft recommends the use of an integrity-protecting security flavor for the SETCLIENTID operation. Let's ignore the mount's sec= setting and use krb5i as the default security flavor for SETCLIENTID. If our client can't establish a GSS context (eg. because it doesn't have a keytab or the server doesn't support Kerberos) we fall back to using AUTH_NULL. For an operation that requires a machine credential (which never represents a particular user) AUTH_NULL is as secure as AUTH_UNIX. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4client.c | 2 +- fs/nfs/nfs4state.c | 37 +++++-------------------------------- 2 files changed, 6 insertions(+), 33 deletions(-) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 17b34b2da2df..bb9789ef1d49 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -199,7 +199,7 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp, clp->rpc_ops = &nfs_v4_clientops; __set_bit(NFS_CS_DISCRTRY, &clp->cl_flags); - error = nfs_create_rpc_client(clp, timeparms, authflavour); + error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_GSS_KRB5I); if (error < 0) goto error; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index b924bdd69494..a30f51eb048f 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1866,26 +1866,13 @@ int nfs4_discover_server_trunking(struct nfs_client *clp, { const struct nfs4_state_recovery_ops *ops = clp->cl_mvops->reboot_recovery_ops; - rpc_authflavor_t *flavors, flav, save; struct rpc_clnt *clnt; struct rpc_cred *cred; - int i, len, status; + int i, status; dprintk("NFS: %s: testing '%s'\n", __func__, clp->cl_hostname); - len = NFS_MAX_SECFLAVORS; - flavors = kcalloc(len, sizeof(*flavors), GFP_KERNEL); - if (flavors == NULL) { - status = -ENOMEM; - goto out; - } - len = rpcauth_list_flavors(flavors, len); - if (len < 0) { - status = len; - goto out_free; - } clnt = clp->cl_rpcclient; - save = clnt->cl_auth->au_flavor; i = 0; mutex_lock(&nfs_clid_init_mutex); @@ -1900,12 +1887,6 @@ again: switch (status) { case 0: break; - - case -EACCES: - if (clp->cl_machine_cred == NULL) - break; - /* Handle case where the user hasn't set up machine creds */ - nfs4_clear_machine_cred(clp); case -NFS4ERR_DELAY: case -ETIMEDOUT: case -EAGAIN: @@ -1914,17 +1895,12 @@ again: dprintk("NFS: %s after status %d, retrying\n", __func__, status); goto again; - + case -EACCES: + if (i++) + break; case -NFS4ERR_CLID_INUSE: case -NFS4ERR_WRONGSEC: - status = -EPERM; - if (i >= len) - break; - - flav = flavors[i++]; - if (flav == save) - flav = flavors[i++]; - clnt = rpc_clone_client_set_auth(clnt, flav); + clnt = rpc_clone_client_set_auth(clnt, RPC_AUTH_NULL); if (IS_ERR(clnt)) { status = PTR_ERR(clnt); break; @@ -1944,9 +1920,6 @@ again: out_unlock: mutex_unlock(&nfs_clid_init_mutex); -out_free: - kfree(flavors); -out: dprintk("NFS: %s: status = %d\n", __func__, status); return status; } From 1c74a244fcb61e6e1983d5725b8ccd5d3f51889c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 22 Mar 2013 12:53:08 -0400 Subject: [PATCH 31/67] SUNRPC: Don't recognize RPC_AUTH_MAXFLAVOR RPC_AUTH_MAXFLAVOR is an invalid flavor, on purpose. Don't allow any processing whatsoever if a caller passes it to rpcauth_create() or rpcauth_get_gssinfo(). Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/auth.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 2bc0cc2196e0..ed2fdd210c0b 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -82,7 +82,7 @@ MODULE_PARM_DESC(auth_hashtable_size, "RPC credential cache hashtable size"); static u32 pseudoflavor_to_flavor(u32 flavor) { - if (flavor >= RPC_AUTH_MAXFLAVOR) + if (flavor > RPC_AUTH_MAXFLAVOR) return RPC_AUTH_GSS; return flavor; } @@ -173,6 +173,9 @@ rpcauth_get_gssinfo(rpc_authflavor_t pseudoflavor, struct rpcsec_gss_info *info) const struct rpc_authops *ops; int result; + if (flavor >= RPC_AUTH_MAXFLAVOR) + return -EINVAL; + ops = auth_flavors[flavor]; if (ops == NULL) request_module("rpc-auth-%u", flavor); From 4580a92d44e2b21c2254fa5fef0f1bfb43c82318 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 22 Mar 2013 12:53:17 -0400 Subject: [PATCH 32/67] NFS: Use server-recommended security flavor by default (NFSv3) Since commit ec88f28d in 2009, checking if the user-specified flavor is in the server's flavor list has been the source of a few noticeable regressions (now fixed), but there is one that is still vexing. An NFS server can list AUTH_NULL in its flavor list, which suggests a client should try to mount the server with the flavor of the client's choice, but the server will squash all accesses. In some cases, our client fails to mount a server because of this check, when the mount could have proceeded successfully. Skip this check if the user has specified "sec=" on the mount command line. But do consult the server-provided flavor list to choose a security flavor if no sec= option is specified on the mount command. If a server lists Kerberos pseudoflavors before "sys" in its export options, our client now chooses Kerberos over AUTH_UNIX for mount points, when no security flavor is specified by the mount command. This could be surprising to some administrators or users, who would then need to have Kerberos credentials to access the export. Or, a client administrator may not have enabled rpc.gssd. In this case, auth_rpcgss.ko might still be loadable, which is enough for the new logic to choose Kerberos over AUTH_UNIX. But the mount would fail since no GSS context can be created without rpc.gssd running. To retain the use of AUTH_UNIX by default: o The server administrator can ensure that "sys" is listed before Kerberos flavors in its export security options (see exports(5)), o The client administrator can explicitly specify "sec=sys" on its mount command line (see nfs(5)), o The client administrator can use "Sec=sys" in an appropriate section of /etc/nfsmount.conf (see nfsmount.conf(5)), or o The client administrator can blacklist auth_rpcgss.ko. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4super.c | 2 ++ fs/nfs/super.c | 80 ++++++++++++++++++++++++---------------------- 2 files changed, 44 insertions(+), 38 deletions(-) diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 569b166cc050..a5e1a3026d48 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -252,6 +252,8 @@ struct dentry *nfs4_try_mount(int flags, const char *dev_name, dfprintk(MOUNT, "--> nfs4_try_mount()\n"); + if (data->auth_flavors[0] == RPC_AUTH_MAXFLAVOR) + data->auth_flavors[0] = RPC_AUTH_UNIX; export_path = data->nfs_server.export_path; data->nfs_server.export_path = "/"; root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, mount_info, diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 17b32b722457..3bb8318f6d0c 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -917,7 +917,7 @@ static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(void) data->mount_server.port = NFS_UNSPEC_PORT; data->nfs_server.port = NFS_UNSPEC_PORT; data->nfs_server.protocol = XPRT_TRANSPORT_TCP; - data->auth_flavors[0] = RPC_AUTH_UNIX; + data->auth_flavors[0] = RPC_AUTH_MAXFLAVOR; data->auth_flavor_len = 1; data->minorversion = 0; data->need_mount = true; @@ -1605,49 +1605,57 @@ out_security_failure: } /* - * Match the requested auth flavors with the list returned by - * the server. Returns zero and sets the mount's authentication - * flavor on success; returns -EACCES if server does not support - * the requested flavor. + * Select a security flavor for this mount. The selected flavor + * is planted in args->auth_flavors[0]. */ -static int nfs_walk_authlist(struct nfs_parsed_mount_data *args, - struct nfs_mount_request *request) +static void nfs_select_flavor(struct nfs_parsed_mount_data *args, + struct nfs_mount_request *request) { - unsigned int i, j, server_authlist_len = *(request->auth_flav_len); + unsigned int i, count = *(request->auth_flav_len); + rpc_authflavor_t flavor; + + if (args->auth_flavors[0] != RPC_AUTH_MAXFLAVOR) + goto out; + + /* + * The NFSv2 MNT operation does not return a flavor list. + */ + if (args->mount_server.version != NFS_MNT3_VERSION) + goto out_default; /* * Certain releases of Linux's mountd return an empty - * flavor list. To prevent behavioral regression with - * these servers (ie. rejecting mounts that used to - * succeed), revert to pre-2.6.32 behavior (no checking) - * if the returned flavor list is empty. + * flavor list in some cases. */ - if (server_authlist_len == 0) - return 0; + if (count == 0) + goto out_default; /* - * We avoid sophisticated negotiating here, as there are - * plenty of cases where we can get it wrong, providing - * either too little or too much security. - * * RFC 2623, section 2.7 suggests we SHOULD prefer the * flavor listed first. However, some servers list - * AUTH_NULL first. Our caller plants AUTH_SYS, the - * preferred default, in args->auth_flavors[0] if user - * didn't specify sec= mount option. + * AUTH_NULL first. Avoid ever choosing AUTH_NULL. */ - for (i = 0; i < args->auth_flavor_len; i++) - for (j = 0; j < server_authlist_len; j++) - if (args->auth_flavors[i] == request->auth_flavs[j]) { - dfprintk(MOUNT, "NFS: using auth flavor %d\n", - request->auth_flavs[j]); - args->auth_flavors[0] = request->auth_flavs[j]; - return 0; - } + for (i = 0; i < count; i++) { + struct rpcsec_gss_info info; - dfprintk(MOUNT, "NFS: server does not support requested auth flavor\n"); - nfs_umount(request); - return -EACCES; + flavor = request->auth_flavs[i]; + switch (flavor) { + case RPC_AUTH_UNIX: + goto out_set; + case RPC_AUTH_NULL: + continue; + default: + if (rpcauth_get_gssinfo(flavor, &info) == 0) + goto out_set; + } + } + +out_default: + flavor = RPC_AUTH_UNIX; +out_set: + args->auth_flavors[0] = flavor; +out: + dfprintk(MOUNT, "NFS: using auth flavor %d\n", args->auth_flavors[0]); } /* @@ -1710,12 +1718,8 @@ static int nfs_request_mount(struct nfs_parsed_mount_data *args, return status; } - /* - * MNTv1 (NFSv2) does not support auth flavor negotiation. - */ - if (args->mount_server.version != NFS_MNT3_VERSION) - return 0; - return nfs_walk_authlist(args, &request); + nfs_select_flavor(args, &request); + return 0; } struct dentry *nfs_try_mount(int flags, const char *dev_name, From 23631227a672de5e81e167c3178b39e9a50f6541 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 4 Apr 2013 16:14:11 -0400 Subject: [PATCH 33/67] NFSv4: Fix the fallback to AUTH_NULL if krb5i is not available If the rpcsec_gss_krb5 module cannot be loaded, the attempt to create an rpc_client in nfs4_init_client will currently fail with an EINVAL. Fix is to retry with AUTH_NULL. Regression introduced by the commit "NFS: Use "krb5i" to establish NFSv4 state whenever possible" Signed-off-by: Trond Myklebust Cc: Chuck Lever Cc: Bryan Schumaker --- fs/nfs/nfs4client.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index bb9789ef1d49..a4f2100fd9c2 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -200,6 +200,8 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp, __set_bit(NFS_CS_DISCRTRY, &clp->cl_flags); error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_GSS_KRB5I); + if (error == -EINVAL) + error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_NULL); if (error < 0) goto error; From ea33e6c3e79d23ef693b7ed3171ef90dddfbfc15 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 5 Apr 2013 13:22:50 -0400 Subject: [PATCH 34/67] NFSv4: Fix issues in nfs4_discover_server_trunking - Ensure that we exit with ENOENT if the call to ops->get_clid_cred() fails. - Handle the case where ops->detect_trunking() exits with an unexpected error, and return EIO. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index a30f51eb048f..209df6976a5b 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1876,8 +1876,8 @@ int nfs4_discover_server_trunking(struct nfs_client *clp, i = 0; mutex_lock(&nfs_clid_init_mutex); - status = -ENOENT; again: + status = -ENOENT; cred = ops->get_clid_cred(clp); if (cred == NULL) goto out_unlock; @@ -1916,6 +1916,11 @@ again: case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery * in nfs4_exchange_id */ status = -EKEYEXPIRED; + break; + default: + pr_warn("NFS: %s unhandled error %d. Exiting with error EIO\n", + __func__, status); + status = -EIO; } out_unlock: From 845cbceb22c67030df76552892ad4935669bf2e5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 5 Apr 2013 15:37:04 -0400 Subject: [PATCH 35/67] NFSv4: Don't clear the machine cred when client establish returns EACCES The expected behaviour is that the client will decide at mount time whether or not to use a krb5i machine cred, or AUTH_NULL. Signed-off-by: Trond Myklebust Cc: Chuck Lever Cc: Bryan Schumaker --- fs/nfs/nfs4state.c | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 209df6976a5b..c54600962bc6 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -154,18 +154,6 @@ struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp) return cred; } -static void nfs4_clear_machine_cred(struct nfs_client *clp) -{ - struct rpc_cred *cred; - - spin_lock(&clp->cl_lock); - cred = clp->cl_machine_cred; - clp->cl_machine_cred = NULL; - spin_unlock(&clp->cl_lock); - if (cred != NULL) - put_rpccred(cred); -} - static struct rpc_cred * nfs4_get_renew_cred_server_locked(struct nfs_server *server) { @@ -1768,10 +1756,6 @@ static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status) clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); return -EPERM; case -EACCES: - if (clp->cl_machine_cred == NULL) - return -EACCES; - /* Handle case where the user hasn't set up machine creds */ - nfs4_clear_machine_cred(clp); case -NFS4ERR_DELAY: case -ETIMEDOUT: case -EAGAIN: From 25d280aad82e559cc7bc762410cd35ea5b98e294 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 25 Mar 2013 07:59:57 -0400 Subject: [PATCH 36/67] nfs: allow the v4.1 callback thread to freeze The v4.1 callback thread has set_freezable() at the top, but it doesn't ever try to freeze within the loop. Have it call try_to_freeze() at the top of the loop. If a freeze event occurs, recheck kthread_should_stop() after thawing. Reported-by: Yanchuan Nian Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/callback.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 5088b57b078a..cff089a412c7 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -125,6 +125,9 @@ nfs41_callback_svc(void *vrqstp) set_freezable(); while (!kthread_should_stop()) { + if (try_to_freeze()) + continue; + prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_INTERRUPTIBLE); spin_lock_bh(&serv->sv_cb_lock); if (!list_empty(&serv->sv_cb_list)) { From cb6071874703565762c190e1e7ebde762e525063 Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Sat, 9 Mar 2013 17:02:31 +0100 Subject: [PATCH 37/67] sunrpc: drop "select NETVM" The Kconfig entry for SUNRPC_SWAP selects NETVM. That select statement was added in commit a564b8f0398636ba30b07c0eaebdef7ff7837249 ("nfs: enable swap on NFS"). But there's no Kconfig symbol NETVM. It apparently was only in used in development versions of the swap over nfs functionality but never entered mainline. Anyhow, it is a nop and can safely be dropped. Signed-off-by: Paul Bolle Signed-off-by: Trond Myklebust --- net/sunrpc/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig index 516fe2caac2c..262caf03bd5f 100644 --- a/net/sunrpc/Kconfig +++ b/net/sunrpc/Kconfig @@ -24,7 +24,6 @@ config SUNRPC_XPRT_RDMA config SUNRPC_SWAP bool depends on SUNRPC - select NETVM config RPCSEC_GSS_KRB5 tristate "Secure RPC: Kerberos V mechanism" From dbb21c25a35a71baf413f5176f028ee11b88cfbc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 1 Apr 2013 14:27:29 -0400 Subject: [PATCH 38/67] NFSv4: Handle NFS4ERR_DELAY and NFS4ERR_GRACE in nfs4_lock_delegation_recall A server shouldn't normally return NFS4ERR_GRACE if the client holds a delegation, since no conflicting lock reclaims can be granted, however the spec does not require the server to grant the lock in this instance. Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org --- fs/nfs/nfs4proc.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 81343944e096..dc4037c52cf7 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5176,6 +5176,12 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl) nfs4_schedule_stateid_recovery(server, state); err = 0; goto out; + case -NFS4ERR_DELAY: + case -NFS4ERR_GRACE: + set_bit(NFS_DELEGATED_STATE, &state->flags); + ssleep(1); + err = -EAGAIN; + goto out; case -ENOMEM: case -NFS4ERR_DENIED: /* kill_proc(fl->fl_pid, SIGLOST, 1); */ From 8b6cc4d6f841d31f72fe7478453759166d366274 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 1 Apr 2013 15:34:05 -0400 Subject: [PATCH 39/67] NFSv4: Handle NFS4ERR_DELAY and NFS4ERR_GRACE in nfs4_open_delegation_recall A server shouldn't normally return NFS4ERR_GRACE if the client holds a delegation, since no conflicting lock reclaims can be granted, however the spec does not require the server to grant the open in this instance Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org --- fs/nfs/nfs4proc.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index dc4037c52cf7..6630db1336a9 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1432,6 +1432,12 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state case -ENOMEM: err = 0; goto out; + case -NFS4ERR_DELAY: + case -NFS4ERR_GRACE: + set_bit(NFS_DELEGATED_STATE, &state->flags); + ssleep(1); + err = -EAGAIN; + goto out; } set_bit(NFS_DELEGATED_STATE, &state->flags); err = nfs4_handle_exception(server, err, &exception); From 4a706fa09fd59b9c73b2a933953d27f3e18619c1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 1 Apr 2013 14:47:22 -0400 Subject: [PATCH 40/67] NFSv4: Clean up nfs4_lock_delegation_recall All error cases are handled by the switch() statement, meaning that the call to nfs4_handle_exception() is unreachable. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 86 +++++++++++++++++++++-------------------------- 1 file changed, 38 insertions(+), 48 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6630db1336a9..5f2010e0926e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5144,59 +5144,49 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl) { struct nfs_server *server = NFS_SERVER(state->inode); - struct nfs4_exception exception = { }; int err; err = nfs4_set_lock_state(state, fl); if (err != 0) goto out; - do { - err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW); - switch (err) { - default: - printk(KERN_ERR "NFS: %s: unhandled error " - "%d.\n", __func__, err); - case 0: - case -ESTALE: - goto out; - case -NFS4ERR_STALE_CLIENTID: - case -NFS4ERR_STALE_STATEID: - set_bit(NFS_DELEGATED_STATE, &state->flags); - case -NFS4ERR_EXPIRED: - nfs4_schedule_lease_recovery(server->nfs_client); - err = -EAGAIN; - goto out; - case -NFS4ERR_BADSESSION: - case -NFS4ERR_BADSLOT: - case -NFS4ERR_BAD_HIGH_SLOT: - case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: - case -NFS4ERR_DEADSESSION: - set_bit(NFS_DELEGATED_STATE, &state->flags); - nfs4_schedule_session_recovery(server->nfs_client->cl_session, err); - err = -EAGAIN; - goto out; - case -NFS4ERR_DELEG_REVOKED: - case -NFS4ERR_ADMIN_REVOKED: - case -NFS4ERR_BAD_STATEID: - case -NFS4ERR_OPENMODE: - nfs4_schedule_stateid_recovery(server, state); - err = 0; - goto out; - case -NFS4ERR_DELAY: - case -NFS4ERR_GRACE: - set_bit(NFS_DELEGATED_STATE, &state->flags); - ssleep(1); - err = -EAGAIN; - goto out; - case -ENOMEM: - case -NFS4ERR_DENIED: - /* kill_proc(fl->fl_pid, SIGLOST, 1); */ - err = 0; - goto out; - } - set_bit(NFS_DELEGATED_STATE, &state->flags); - err = nfs4_handle_exception(server, err, &exception); - } while (exception.retry); + err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW); + switch (err) { + default: + printk(KERN_ERR "NFS: %s: unhandled error " + "%d.\n", __func__, err); + case 0: + case -ESTALE: + goto out; + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_STALE_STATEID: + set_bit(NFS_DELEGATED_STATE, &state->flags); + case -NFS4ERR_EXPIRED: + nfs4_schedule_lease_recovery(server->nfs_client); + return -EAGAIN; + case -NFS4ERR_BADSESSION: + case -NFS4ERR_BADSLOT: + case -NFS4ERR_BAD_HIGH_SLOT: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + case -NFS4ERR_DEADSESSION: + set_bit(NFS_DELEGATED_STATE, &state->flags); + nfs4_schedule_session_recovery(server->nfs_client->cl_session, err); + return -EAGAIN; + case -NFS4ERR_DELEG_REVOKED: + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_OPENMODE: + nfs4_schedule_stateid_recovery(server, state); + return 0; + case -NFS4ERR_DELAY: + case -NFS4ERR_GRACE: + set_bit(NFS_DELEGATED_STATE, &state->flags); + ssleep(1); + return -EAGAIN; + case -ENOMEM: + case -NFS4ERR_DENIED: + /* kill_proc(fl->fl_pid, SIGLOST, 1); */ + return 0; + } out: return err; } From be76b5b68d15861d50ccd1d3e66dac3cb19963dd Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 1 Apr 2013 15:40:44 -0400 Subject: [PATCH 41/67] NFSv4: Clean up nfs4_open_delegation_recall Make it symmetric with nfs4_lock_delegation_recall Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 96 ++++++++++++++++++++--------------------------- 1 file changed, 41 insertions(+), 55 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5f2010e0926e..57b5fb778f16 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1379,70 +1379,56 @@ static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *sta return ret; } -static int _nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid) +int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid) { + struct nfs_server *server = NFS_SERVER(state->inode); struct nfs4_opendata *opendata; - int ret; + int err; opendata = nfs4_open_recoverdata_alloc(ctx, state, NFS4_OPEN_CLAIM_DELEG_CUR_FH); if (IS_ERR(opendata)) return PTR_ERR(opendata); nfs4_stateid_copy(&opendata->o_arg.u.delegation, stateid); - ret = nfs4_open_recover(opendata, state); + err = nfs4_open_recover(opendata, state); nfs4_opendata_put(opendata); - return ret; -} - -int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid) -{ - struct nfs4_exception exception = { }; - struct nfs_server *server = NFS_SERVER(state->inode); - int err; - do { - err = _nfs4_open_delegation_recall(ctx, state, stateid); - switch (err) { - case 0: - case -ENOENT: - case -ESTALE: - goto out; - case -NFS4ERR_BADSESSION: - case -NFS4ERR_BADSLOT: - case -NFS4ERR_BAD_HIGH_SLOT: - case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: - case -NFS4ERR_DEADSESSION: - set_bit(NFS_DELEGATED_STATE, &state->flags); - nfs4_schedule_session_recovery(server->nfs_client->cl_session, err); - err = -EAGAIN; - goto out; - case -NFS4ERR_STALE_CLIENTID: - case -NFS4ERR_STALE_STATEID: - set_bit(NFS_DELEGATED_STATE, &state->flags); - case -NFS4ERR_EXPIRED: - /* Don't recall a delegation if it was lost */ - nfs4_schedule_lease_recovery(server->nfs_client); - err = -EAGAIN; - goto out; - case -NFS4ERR_DELEG_REVOKED: - case -NFS4ERR_ADMIN_REVOKED: - case -NFS4ERR_BAD_STATEID: - nfs_inode_find_state_and_recover(state->inode, - stateid); - nfs4_schedule_stateid_recovery(server, state); - case -ENOMEM: - err = 0; - goto out; - case -NFS4ERR_DELAY: - case -NFS4ERR_GRACE: - set_bit(NFS_DELEGATED_STATE, &state->flags); - ssleep(1); - err = -EAGAIN; - goto out; - } - set_bit(NFS_DELEGATED_STATE, &state->flags); - err = nfs4_handle_exception(server, err, &exception); - } while (exception.retry); -out: + switch (err) { + default: + printk(KERN_ERR "NFS: %s: unhandled error " + "%d.\n", __func__, err); + case 0: + case -ENOENT: + case -ESTALE: + break; + case -NFS4ERR_BADSESSION: + case -NFS4ERR_BADSLOT: + case -NFS4ERR_BAD_HIGH_SLOT: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + case -NFS4ERR_DEADSESSION: + set_bit(NFS_DELEGATED_STATE, &state->flags); + nfs4_schedule_session_recovery(server->nfs_client->cl_session, err); + return -EAGAIN; + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_STALE_STATEID: + set_bit(NFS_DELEGATED_STATE, &state->flags); + case -NFS4ERR_EXPIRED: + /* Don't recall a delegation if it was lost */ + nfs4_schedule_lease_recovery(server->nfs_client); + return -EAGAIN; + case -NFS4ERR_DELEG_REVOKED: + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_BAD_STATEID: + nfs_inode_find_state_and_recover(state->inode, + stateid); + nfs4_schedule_stateid_recovery(server, state); + case -ENOMEM: + return 0; + case -NFS4ERR_DELAY: + case -NFS4ERR_GRACE: + set_bit(NFS_DELEGATED_STATE, &state->flags); + ssleep(1); + return -EAGAIN; + } return err; } From db4f2e637f6d5dff6d07112e62605275be6050b3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 1 Apr 2013 15:56:46 -0400 Subject: [PATCH 42/67] NFSv4: Clean up delegation recall error handling Unify the error handling in nfs4_open_delegation_recall and nfs4_lock_delegation_recall. Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 6 ++-- fs/nfs/delegation.h | 2 +- fs/nfs/nfs4proc.c | 79 ++++++++++++++------------------------------- 3 files changed, 29 insertions(+), 58 deletions(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 6390a4b5fee7..fc8a213497a1 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -64,7 +64,7 @@ int nfs4_have_delegation(struct inode *inode, fmode_t flags) return ret; } -static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state) +static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid) { struct inode *inode = state->inode; struct file_lock *fl; @@ -83,7 +83,7 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_ if (nfs_file_open_context(fl->fl_file) != ctx) continue; unlock_flocks(); - status = nfs4_lock_delegation_recall(state, fl); + status = nfs4_lock_delegation_recall(fl, state, stateid); if (status < 0) goto out; lock_flocks(); @@ -120,7 +120,7 @@ again: seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); err = nfs4_open_delegation_recall(ctx, state, stateid); if (!err) - err = nfs_delegation_claim_locks(ctx, state); + err = nfs_delegation_claim_locks(ctx, state, stateid); if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) err = -EAGAIN; mutex_unlock(&sp->so_delegreturn_mutex); diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index d54d4fca6793..88573d107502 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -53,7 +53,7 @@ void nfs_delegation_reap_unclaimed(struct nfs_client *clp); /* NFSv4 delegation-related procedures */ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync); int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid); -int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl); +int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid); bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, fmode_t flags); void nfs_mark_delegation_referenced(struct nfs_delegation *delegation); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 57b5fb778f16..bdf3d07ce974 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1379,19 +1379,8 @@ static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *sta return ret; } -int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid) +static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct nfs4_state *state, const nfs4_stateid *stateid, int err) { - struct nfs_server *server = NFS_SERVER(state->inode); - struct nfs4_opendata *opendata; - int err; - - opendata = nfs4_open_recoverdata_alloc(ctx, state, - NFS4_OPEN_CLAIM_DELEG_CUR_FH); - if (IS_ERR(opendata)) - return PTR_ERR(opendata); - nfs4_stateid_copy(&opendata->o_arg.u.delegation, stateid); - err = nfs4_open_recover(opendata, state); - nfs4_opendata_put(opendata); switch (err) { default: printk(KERN_ERR "NFS: %s: unhandled error " @@ -1418,20 +1407,40 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_OPENMODE: nfs_inode_find_state_and_recover(state->inode, stateid); nfs4_schedule_stateid_recovery(server, state); - case -ENOMEM: return 0; case -NFS4ERR_DELAY: case -NFS4ERR_GRACE: set_bit(NFS_DELEGATED_STATE, &state->flags); ssleep(1); return -EAGAIN; + case -ENOMEM: + case -NFS4ERR_DENIED: + /* kill_proc(fl->fl_pid, SIGLOST, 1); */ + return 0; } return err; } +int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid) +{ + struct nfs_server *server = NFS_SERVER(state->inode); + struct nfs4_opendata *opendata; + int err; + + opendata = nfs4_open_recoverdata_alloc(ctx, state, + NFS4_OPEN_CLAIM_DELEG_CUR_FH); + if (IS_ERR(opendata)) + return PTR_ERR(opendata); + nfs4_stateid_copy(&opendata->o_arg.u.delegation, stateid); + err = nfs4_open_recover(opendata, state); + nfs4_opendata_put(opendata); + return nfs4_handle_delegation_recall_error(server, state, stateid, err); +} + static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata) { struct nfs4_opendata *data = calldata; @@ -5127,54 +5136,16 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) return status; } -int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl) +int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid) { struct nfs_server *server = NFS_SERVER(state->inode); int err; err = nfs4_set_lock_state(state, fl); if (err != 0) - goto out; + return err; err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW); - switch (err) { - default: - printk(KERN_ERR "NFS: %s: unhandled error " - "%d.\n", __func__, err); - case 0: - case -ESTALE: - goto out; - case -NFS4ERR_STALE_CLIENTID: - case -NFS4ERR_STALE_STATEID: - set_bit(NFS_DELEGATED_STATE, &state->flags); - case -NFS4ERR_EXPIRED: - nfs4_schedule_lease_recovery(server->nfs_client); - return -EAGAIN; - case -NFS4ERR_BADSESSION: - case -NFS4ERR_BADSLOT: - case -NFS4ERR_BAD_HIGH_SLOT: - case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: - case -NFS4ERR_DEADSESSION: - set_bit(NFS_DELEGATED_STATE, &state->flags); - nfs4_schedule_session_recovery(server->nfs_client->cl_session, err); - return -EAGAIN; - case -NFS4ERR_DELEG_REVOKED: - case -NFS4ERR_ADMIN_REVOKED: - case -NFS4ERR_BAD_STATEID: - case -NFS4ERR_OPENMODE: - nfs4_schedule_stateid_recovery(server, state); - return 0; - case -NFS4ERR_DELAY: - case -NFS4ERR_GRACE: - set_bit(NFS_DELEGATED_STATE, &state->flags); - ssleep(1); - return -EAGAIN; - case -ENOMEM: - case -NFS4ERR_DENIED: - /* kill_proc(fl->fl_pid, SIGLOST, 1); */ - return 0; - } -out: - return err; + return nfs4_handle_delegation_recall_error(server, state, stateid, err); } struct nfs_release_lockowner_data { From b757144fd77cf5512f5b60179ba5ca8dcc5184b4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 3 Apr 2013 14:33:49 -0400 Subject: [PATCH 43/67] NFSv4: Be less aggressive about returning delegations for open files Currently, if the application that holds the file open isn't doing I/O, we may end up returning the delegation. This means that we can no longer cache the file as aggressively, and often also that we multiply the state that both the server and the client needs to track. This patch adds a check for open files to the routine that scans for delegations that are unreferenced. Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 30 +++++++++++++++++++++++++++--- fs/nfs/delegation.h | 1 + 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index fc8a213497a1..a377ea36381e 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -389,6 +389,24 @@ out: return err; } +static bool nfs_delegation_need_return(struct nfs_delegation *delegation) +{ + bool ret = false; + + if (test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags)) + ret = true; + if (test_and_clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags) && !ret) { + struct inode *inode; + + spin_lock(&delegation->lock); + inode = delegation->inode; + if (inode && list_empty(&NFS_I(inode)->open_files)) + ret = true; + spin_unlock(&delegation->lock); + } + return ret; +} + /** * nfs_client_return_marked_delegations - return previously marked delegations * @clp: nfs_client to process @@ -411,8 +429,7 @@ restart: list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { list_for_each_entry_rcu(delegation, &server->delegations, super_list) { - if (!test_and_clear_bit(NFS_DELEGATION_RETURN, - &delegation->flags)) + if (!nfs_delegation_need_return(delegation)) continue; inode = nfs_delegation_grab_inode(delegation); if (inode == NULL) @@ -471,6 +488,13 @@ int nfs4_inode_return_delegation(struct inode *inode) return err; } +static void nfs_mark_return_if_closed_delegation(struct nfs_server *server, + struct nfs_delegation *delegation) +{ + set_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags); + set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state); +} + static void nfs_mark_return_delegation(struct nfs_server *server, struct nfs_delegation *delegation) { @@ -574,7 +598,7 @@ static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server) list_for_each_entry_rcu(delegation, &server->delegations, super_list) { if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags)) continue; - nfs_mark_return_delegation(server, delegation); + nfs_mark_return_if_closed_delegation(server, delegation); } } diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 88573d107502..22f0a138e551 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -28,6 +28,7 @@ struct nfs_delegation { enum { NFS_DELEGATION_NEED_RECLAIM = 0, NFS_DELEGATION_RETURN, + NFS_DELEGATION_RETURN_IF_CLOSED, NFS_DELEGATION_REFERENCED, NFS_DELEGATION_RETURNING, }; From 5c31e2368f39aee062cb697d4b8857c64c7cef7c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 3 Apr 2013 19:04:58 -0400 Subject: [PATCH 44/67] NFSv4: Fix nfs_server_return_all_delegations If the state manager thread is already running, we may end up racing with it in nfs_client_return_marked_delegations. Better to just allow the state manager thread to do the job. Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index a377ea36381e..213f1bbeb828 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -502,6 +502,18 @@ static void nfs_mark_return_delegation(struct nfs_server *server, set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state); } +static bool nfs_server_mark_return_all_delegations(struct nfs_server *server) +{ + struct nfs_delegation *delegation; + bool ret = false; + + list_for_each_entry_rcu(delegation, &server->delegations, super_list) { + nfs_mark_return_delegation(server, delegation); + ret = true; + } + return ret; +} + /** * nfs_super_return_all_delegations - return delegations for one superblock * @sb: sb to process @@ -510,21 +522,19 @@ static void nfs_mark_return_delegation(struct nfs_server *server, void nfs_server_return_all_delegations(struct nfs_server *server) { struct nfs_client *clp = server->nfs_client; - struct nfs_delegation *delegation; + bool need_wait; if (clp == NULL) return; rcu_read_lock(); - list_for_each_entry_rcu(delegation, &server->delegations, super_list) { - spin_lock(&delegation->lock); - set_bit(NFS_DELEGATION_RETURN, &delegation->flags); - spin_unlock(&delegation->lock); - } + need_wait = nfs_server_mark_return_all_delegations(server); rcu_read_unlock(); - if (nfs_client_return_marked_delegations(clp) != 0) + if (need_wait) { nfs4_schedule_state_manager(clp); + nfs4_wait_clnt_recover(clp); + } } static void nfs_mark_return_all_delegation_types(struct nfs_server *server, From b02ba0b66095d2fdcc6b74538aa5a0ae13976745 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 3 Apr 2013 19:23:58 -0400 Subject: [PATCH 45/67] NFSv4: Clean up nfs_expire_all_delegations Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 43 +++++++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 213f1bbeb828..60e2e6e3e323 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -514,6 +514,33 @@ static bool nfs_server_mark_return_all_delegations(struct nfs_server *server) return ret; } +static void nfs_client_mark_return_all_delegations(struct nfs_client *clp) +{ + struct nfs_server *server; + + rcu_read_lock(); + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) + nfs_server_mark_return_all_delegations(server); + rcu_read_unlock(); +} + +static void nfs_delegation_run_state_manager(struct nfs_client *clp) +{ + if (test_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) + nfs4_schedule_state_manager(clp); +} + +/** + * nfs_expire_all_delegations + * @clp: client to process + * + */ +void nfs_expire_all_delegations(struct nfs_client *clp) +{ + nfs_client_mark_return_all_delegations(clp); + nfs_delegation_run_state_manager(clp); +} + /** * nfs_super_return_all_delegations - return delegations for one superblock * @sb: sb to process @@ -561,12 +588,6 @@ static void nfs_client_mark_return_all_delegation_types(struct nfs_client *clp, rcu_read_unlock(); } -static void nfs_delegation_run_state_manager(struct nfs_client *clp) -{ - if (test_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) - nfs4_schedule_state_manager(clp); -} - void nfs_remove_bad_delegation(struct inode *inode) { struct nfs_delegation *delegation; @@ -591,16 +612,6 @@ void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags) nfs_delegation_run_state_manager(clp); } -/** - * nfs_expire_all_delegations - * @clp: client to process - * - */ -void nfs_expire_all_delegations(struct nfs_client *clp) -{ - nfs_expire_all_delegation_types(clp, FMODE_READ|FMODE_WRITE); -} - static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server) { struct nfs_delegation *delegation; From 826e0013082a86fb16f2e414314a5268f744fb96 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 3 Apr 2013 19:27:52 -0400 Subject: [PATCH 46/67] NFSv4: Fix CB_RECALL_ANY to only return delegations that are not in use Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 2 +- fs/nfs/delegation.c | 14 +++++++------- fs/nfs/delegation.h | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 2960512792c2..a13d26ede254 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -500,7 +500,7 @@ __be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy, &args->craa_type_mask)) pnfs_recall_all_layouts(cps->clp); if (flags) - nfs_expire_all_delegation_types(cps->clp, flags); + nfs_expire_unused_delegation_types(cps->clp, flags); out: dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); return status; diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 60e2e6e3e323..f30bd1ec63b9 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -564,7 +564,7 @@ void nfs_server_return_all_delegations(struct nfs_server *server) } } -static void nfs_mark_return_all_delegation_types(struct nfs_server *server, +static void nfs_mark_return_unused_delegation_types(struct nfs_server *server, fmode_t flags) { struct nfs_delegation *delegation; @@ -573,18 +573,18 @@ static void nfs_mark_return_all_delegation_types(struct nfs_server *server, if ((delegation->type == (FMODE_READ|FMODE_WRITE)) && !(flags & FMODE_WRITE)) continue; if (delegation->type & flags) - nfs_mark_return_delegation(server, delegation); + nfs_mark_return_if_closed_delegation(server, delegation); } } -static void nfs_client_mark_return_all_delegation_types(struct nfs_client *clp, +static void nfs_client_mark_return_unused_delegation_types(struct nfs_client *clp, fmode_t flags) { struct nfs_server *server; rcu_read_lock(); list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) - nfs_mark_return_all_delegation_types(server, flags); + nfs_mark_return_unused_delegation_types(server, flags); rcu_read_unlock(); } @@ -601,14 +601,14 @@ void nfs_remove_bad_delegation(struct inode *inode) EXPORT_SYMBOL_GPL(nfs_remove_bad_delegation); /** - * nfs_expire_all_delegation_types + * nfs_expire_unused_delegation_types * @clp: client to process * @flags: delegation types to expire * */ -void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags) +void nfs_expire_unused_delegation_types(struct nfs_client *clp, fmode_t flags) { - nfs_client_mark_return_all_delegation_types(clp, flags); + nfs_client_mark_return_unused_delegation_types(clp, flags); nfs_delegation_run_state_manager(clp); } diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 22f0a138e551..9a79c7a99d6d 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -42,7 +42,7 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode); struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle); void nfs_server_return_all_delegations(struct nfs_server *); void nfs_expire_all_delegations(struct nfs_client *clp); -void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags); +void nfs_expire_unused_delegation_types(struct nfs_client *clp, fmode_t flags); void nfs_expire_unreferenced_delegations(struct nfs_client *clp); int nfs_client_return_marked_delegations(struct nfs_client *clp); int nfs_delegations_present(struct nfs_client *clp); From bc7a05ca5156915a5aada26d64ee035fdd5e5d25 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 8 Apr 2013 17:50:28 -0400 Subject: [PATCH 47/67] NFSv4: Handle timeouts correctly when probing for lease validity When we send a RENEW or SEQUENCE operation in order to probe if the lease is still valid, we want it to be able to time out since the lease we are probing is likely to time out too. Currently, because we use soft mount semantics for these RPC calls, the return value is EIO, which causes the state manager to exit with an "unhandled error" message. This patch changes the call semantics, so that the RPC layer returns ETIMEDOUT instead of EIO. We then have the state manager default to a simple retry instead of exiting. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 6 +++--- fs/nfs/nfs4state.c | 4 ++++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index bdf3d07ce974..264ddb16255f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3797,7 +3797,7 @@ static int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred, return -ENOMEM; data->client = clp; data->timestamp = jiffies; - return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT, + return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT, &nfs4_renew_ops, data); } @@ -3811,7 +3811,7 @@ static int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred) unsigned long now = jiffies; int status; - status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); + status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); if (status < 0) return status; do_renew_lease(clp, now); @@ -5959,7 +5959,7 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, .rpc_client = clp->cl_rpcclient, .rpc_message = &msg, .callback_ops = &nfs41_sequence_ops, - .flags = RPC_TASK_ASYNC | RPC_TASK_SOFT, + .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT, }; if (!atomic_inc_not_zero(&clp->cl_count)) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index b924bdd69494..1eb17285c99a 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1739,6 +1739,10 @@ static int nfs4_check_lease(struct nfs_client *clp) } status = ops->renew_lease(clp, cred); put_rpccred(cred); + if (status == -ETIMEDOUT) { + set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); + return 0; + } out: return nfs4_recovery_handle_error(clp, status); } From 577b42327d707fbe7166aad6902c2eeee6a65015 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 8 Apr 2013 21:38:12 -0400 Subject: [PATCH 48/67] NFS: Add functionality to allow waiting on all outstanding reads to complete This will later allow NFS locking code to wait for readahead to complete before releasing byte range locks. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 1 + fs/nfs/internal.h | 7 ++++++ fs/nfs/pagelist.c | 51 ++++++++++++++++++++++++++++++++++++++++++ include/linux/nfs_fs.h | 7 ++++++ 4 files changed, 66 insertions(+) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 55b840f05ab2..c1c7a9d78722 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -561,6 +561,7 @@ static void nfs_init_lock_context(struct nfs_lock_context *l_ctx) l_ctx->lockowner.l_owner = current->files; l_ctx->lockowner.l_pid = current->tgid; INIT_LIST_HEAD(&l_ctx->list); + nfs_iocounter_init(&l_ctx->io_count); } static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context *ctx) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 541c9ebdbc5a..91e59a39fc08 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -229,6 +229,13 @@ extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr, void (*release)(struct nfs_pgio_header *hdr)); void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos); +int nfs_iocounter_wait(struct nfs_io_counter *c); + +static inline void nfs_iocounter_init(struct nfs_io_counter *c) +{ + c->flags = 0; + atomic_set(&c->io_count, 0); +} /* nfs2xdr.c */ extern struct rpc_procinfo nfs_procedures[]; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 7f0933086b36..29cfb7ade121 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -84,6 +84,55 @@ nfs_page_free(struct nfs_page *p) kmem_cache_free(nfs_page_cachep, p); } +static void +nfs_iocounter_inc(struct nfs_io_counter *c) +{ + atomic_inc(&c->io_count); +} + +static void +nfs_iocounter_dec(struct nfs_io_counter *c) +{ + if (atomic_dec_and_test(&c->io_count)) { + clear_bit(NFS_IO_INPROGRESS, &c->flags); + smp_mb__after_clear_bit(); + wake_up_bit(&c->flags, NFS_IO_INPROGRESS); + } +} + +static int +__nfs_iocounter_wait(struct nfs_io_counter *c) +{ + wait_queue_head_t *wq = bit_waitqueue(&c->flags, NFS_IO_INPROGRESS); + DEFINE_WAIT_BIT(q, &c->flags, NFS_IO_INPROGRESS); + int ret = 0; + + do { + prepare_to_wait(wq, &q.wait, TASK_KILLABLE); + set_bit(NFS_IO_INPROGRESS, &c->flags); + if (atomic_read(&c->io_count) == 0) + break; + ret = nfs_wait_bit_killable(&c->flags); + } while (atomic_read(&c->io_count) != 0); + finish_wait(wq, &q.wait); + return ret; +} + +/** + * nfs_iocounter_wait - wait for i/o to complete + * @c: nfs_io_counter to use + * + * returns -ERESTARTSYS if interrupted by a fatal signal. + * Otherwise returns 0 once the io_count hits 0. + */ +int +nfs_iocounter_wait(struct nfs_io_counter *c) +{ + if (atomic_read(&c->io_count) == 0) + return 0; + return __nfs_iocounter_wait(c); +} + /** * nfs_create_request - Create an NFS read/write request. * @ctx: open context to use @@ -118,6 +167,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, return ERR_CAST(l_ctx); } req->wb_lock_context = l_ctx; + nfs_iocounter_inc(&l_ctx->io_count); /* Initialize the request struct. Initially, we assume a * long write-back delay. This will be adjusted in @@ -177,6 +227,7 @@ static void nfs_clear_request(struct nfs_page *req) req->wb_page = NULL; } if (l_ctx != NULL) { + nfs_iocounter_dec(&l_ctx->io_count); nfs_put_lock_context(l_ctx); req->wb_lock_context = NULL; } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index f6b1956f3c86..fc01d5cb4cf1 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -59,11 +59,18 @@ struct nfs_lockowner { pid_t l_pid; }; +#define NFS_IO_INPROGRESS 0 +struct nfs_io_counter { + unsigned long flags; + atomic_t io_count; +}; + struct nfs_lock_context { atomic_t count; struct list_head list; struct nfs_open_context *open_context; struct nfs_lockowner lockowner; + struct nfs_io_counter io_count; }; struct nfs4_state; From 7a8203d8cbab63b7c556ebd00e12685a3ed176b5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 8 Apr 2013 21:49:53 -0400 Subject: [PATCH 49/67] NFS: Ensure that NFS file unlock waits for readahead to complete Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 29f4a48a0ee6..a87a44f84113 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -744,6 +744,7 @@ static int do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) { struct inode *inode = filp->f_mapping->host; + struct nfs_lock_context *l_ctx; int status; /* @@ -752,6 +753,14 @@ do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) */ nfs_sync_mapping(filp->f_mapping); + l_ctx = nfs_get_lock_context(nfs_file_open_context(filp)); + if (!IS_ERR(l_ctx)) { + status = nfs_iocounter_wait(&l_ctx->io_count); + nfs_put_lock_context(l_ctx); + if (status < 0) + return status; + } + /* NOTE: special case * If we're signalled while cleaning up locks on process exit, we * still need to complete the unlock. From 314d7cc05d10208d6e63b0b1185f02c73a16adfb Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 10 Apr 2013 15:36:48 -0400 Subject: [PATCH 50/67] nfs: remove unnecessary check for NULL inode->i_flock from nfs_delegation_claim_locks The second check was added in commit 65b62a29 but it will never be true. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index f30bd1ec63b9..57db3244f4d9 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -70,11 +70,9 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_ struct file_lock *fl; int status = 0; - if (inode->i_flock == NULL) - return 0; - if (inode->i_flock == NULL) goto out; + /* Protect inode->i_flock using the file locks lock */ lock_flocks(); for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { From b9536ad52152ff440231dcc2dd9cfa644f5413c9 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Thu, 11 Apr 2013 09:28:45 -0400 Subject: [PATCH 51/67] NFSv4 release the sequence id in the return on close case Otherwise we deadlock if state recovery is initiated while we sleep. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 264ddb16255f..83197fb60d5f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2275,8 +2275,10 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) if (calldata->arg.fmode == 0) { task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE]; if (calldata->roc && - pnfs_roc_drain(inode, &calldata->roc_barrier, task)) + pnfs_roc_drain(inode, &calldata->roc_barrier, task)) { + nfs_release_seqid(calldata->arg.seqid); goto out_wait; + } } nfs_fattr_init(calldata->res.fattr); From b570a975ed276335dc7d148658c1f880ac0a507f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 12 Apr 2013 15:04:51 -0400 Subject: [PATCH 52/67] NFSv4: Fix handling of revoked delegations by setattr Currently, _nfs4_do_setattr() will use the delegation stateid if no writeable open file stateid is available. If the server revokes that delegation stateid, then the call to nfs4_handle_exception() will fail to handle the error due to the lack of a struct nfs4_state, and will just convert the error into an EIO. This patch just removes the requirement that we must have a struct nfs4_state in order to invalidate the delegation and retry. Reported-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 83197fb60d5f..af05df3c7c79 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -304,9 +304,13 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_BAD_STATEID: + if (inode != NULL && nfs4_have_delegation(inode, FMODE_READ)) { + nfs_remove_bad_delegation(inode); + exception->retry = 1; + break; + } if (state == NULL) break; - nfs_remove_bad_delegation(state->inode); ret = nfs4_schedule_stateid_recovery(server, state); if (ret < 0) break; From ba60eb25ff6be6f8e60488cdfd454e5c612bce60 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 14 Apr 2013 10:49:37 -0400 Subject: [PATCH 53/67] SUNRPC: Fix a livelock problem in the xprt->backlog queue This patch ensures that we throttle new RPC requests if there are requests already waiting in the xprt->backlog queue. The reason for doing this is to fix livelock issues that can occur when an existing (high priority) task is waiting in the backlog queue, gets woken up by xprt_free_slot(), but a new task then steals the slot. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 2 ++ net/sunrpc/clnt.c | 17 ++++++++++- net/sunrpc/xprt.c | 61 +++++++++++++++++++++++++++++++++++-- 3 files changed, 76 insertions(+), 4 deletions(-) diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 30834be03011..12815f66a9c4 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -279,6 +279,7 @@ struct xprt_class { struct rpc_xprt *xprt_create_transport(struct xprt_create *args); void xprt_connect(struct rpc_task *task); void xprt_reserve(struct rpc_task *task); +void xprt_retry_reserve(struct rpc_task *task); int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task); int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task); @@ -334,6 +335,7 @@ int xs_swapper(struct rpc_xprt *xprt, int enable); #define XPRT_CLOSING (6) #define XPRT_CONNECTION_ABORT (7) #define XPRT_CONNECTION_CLOSE (8) +#define XPRT_CONGESTED (9) static inline void xprt_set_connected(struct rpc_xprt *xprt) { diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index b95a0a2d5eea..a80ee9b80dcf 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1306,6 +1306,8 @@ call_reserve(struct rpc_task *task) xprt_reserve(task); } +static void call_retry_reserve(struct rpc_task *task); + /* * 1b. Grok the result of xprt_reserve() */ @@ -1347,7 +1349,7 @@ call_reserveresult(struct rpc_task *task) case -ENOMEM: rpc_delay(task, HZ >> 2); case -EAGAIN: /* woken up; retry */ - task->tk_action = call_reserve; + task->tk_action = call_retry_reserve; return; case -EIO: /* probably a shutdown */ break; @@ -1359,6 +1361,19 @@ call_reserveresult(struct rpc_task *task) rpc_exit(task, status); } +/* + * 1c. Retry reserving an RPC call slot + */ +static void +call_retry_reserve(struct rpc_task *task) +{ + dprint_status(task); + + task->tk_status = 0; + task->tk_action = call_reserveresult; + xprt_retry_reserve(task); +} + /* * 2. Bind and/or refresh the credentials */ diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index b7478d5e7ffd..745fca3cfd36 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -948,6 +948,34 @@ void xprt_transmit(struct rpc_task *task) spin_unlock_bh(&xprt->transport_lock); } +static void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task) +{ + set_bit(XPRT_CONGESTED, &xprt->state); + rpc_sleep_on(&xprt->backlog, task, NULL); +} + +static void xprt_wake_up_backlog(struct rpc_xprt *xprt) +{ + if (rpc_wake_up_next(&xprt->backlog) == NULL) + clear_bit(XPRT_CONGESTED, &xprt->state); +} + +static bool xprt_throttle_congested(struct rpc_xprt *xprt, struct rpc_task *task) +{ + bool ret = false; + + if (!test_bit(XPRT_CONGESTED, &xprt->state)) + goto out; + spin_lock(&xprt->reserve_lock); + if (test_bit(XPRT_CONGESTED, &xprt->state)) { + rpc_sleep_on(&xprt->backlog, task, NULL); + ret = true; + } + spin_unlock(&xprt->reserve_lock); +out: + return ret; +} + static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt, gfp_t gfp_flags) { struct rpc_rqst *req = ERR_PTR(-EAGAIN); @@ -992,7 +1020,7 @@ void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task) task->tk_status = -ENOMEM; break; case -EAGAIN: - rpc_sleep_on(&xprt->backlog, task, NULL); + xprt_add_backlog(xprt, task); dprintk("RPC: waiting for request slot\n"); default: task->tk_status = -EAGAIN; @@ -1028,7 +1056,7 @@ static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req) memset(req, 0, sizeof(*req)); /* mark unused */ list_add(&req->rq_list, &xprt->free); } - rpc_wake_up_next(&xprt->backlog); + xprt_wake_up_backlog(xprt); spin_unlock(&xprt->reserve_lock); } @@ -1092,13 +1120,40 @@ EXPORT_SYMBOL_GPL(xprt_free); * xprt_reserve - allocate an RPC request slot * @task: RPC task requesting a slot allocation * - * If no more slots are available, place the task on the transport's + * If the transport is marked as being congested, or if no more + * slots are available, place the task on the transport's * backlog queue. */ void xprt_reserve(struct rpc_task *task) { struct rpc_xprt *xprt; + task->tk_status = 0; + if (task->tk_rqstp != NULL) + return; + + task->tk_timeout = 0; + task->tk_status = -EAGAIN; + rcu_read_lock(); + xprt = rcu_dereference(task->tk_client->cl_xprt); + if (!xprt_throttle_congested(xprt, task)) + xprt->ops->alloc_slot(xprt, task); + rcu_read_unlock(); +} + +/** + * xprt_retry_reserve - allocate an RPC request slot + * @task: RPC task requesting a slot allocation + * + * If no more slots are available, place the task on the transport's + * backlog queue. + * Note that the only difference with xprt_reserve is that we now + * ignore the value of the XPRT_CONGESTED flag. + */ +void xprt_retry_reserve(struct rpc_task *task) +{ + struct rpc_xprt *xprt; + task->tk_status = 0; if (task->tk_rqstp != NULL) return; From b7993cebb841b0da7a33e9d5ce301a9fd3209165 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 14 Apr 2013 11:42:00 -0400 Subject: [PATCH 54/67] SUNRPC: Allow rpc_create() to request that TCP slots be unlimited This is mainly for use by NFSv4.1, where the session negotiation ultimately wants to decide how many RPC slots we can fill. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 1 + include/linux/sunrpc/xprt.h | 3 +++ net/sunrpc/clnt.c | 2 ++ net/sunrpc/xprtsock.c | 6 +++++- 4 files changed, 11 insertions(+), 1 deletion(-) diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 2cf4ffaa3cd4..e7d492ce7c18 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -124,6 +124,7 @@ struct rpc_create_args { #define RPC_CLNT_CREATE_NOPING (1UL << 4) #define RPC_CLNT_CREATE_DISCRTRY (1UL << 5) #define RPC_CLNT_CREATE_QUIET (1UL << 6) +#define RPC_CLNT_CREATE_INFINITE_SLOTS (1UL << 7) struct rpc_clnt *rpc_create(struct rpc_create_args *args); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 12815f66a9c4..ff5392421cb2 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -255,6 +255,8 @@ static inline int bc_prealloc(struct rpc_rqst *req) } #endif /* CONFIG_SUNRPC_BACKCHANNEL */ +#define XPRT_CREATE_INFINITE_SLOTS (1U) + struct xprt_create { int ident; /* XPRT_TRANSPORT identifier */ struct net * net; @@ -263,6 +265,7 @@ struct xprt_create { size_t addrlen; const char *servername; struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ + unsigned int flags; }; struct xprt_class { diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index a80ee9b80dcf..651245aa829a 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -414,6 +414,8 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) }; char servername[48]; + if (args->flags & RPC_CLNT_CREATE_INFINITE_SLOTS) + xprtargs.flags |= XPRT_CREATE_INFINITE_SLOTS; /* * If the caller chooses not to specify a hostname, whip * up a string representation of the passed-in address. diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 3081620cb02c..726e702b7a29 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2762,9 +2762,13 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) struct rpc_xprt *xprt; struct sock_xprt *transport; struct rpc_xprt *ret; + unsigned int max_slot_table_size = xprt_max_tcp_slot_table_entries; + + if (args->flags & XPRT_CREATE_INFINITE_SLOTS) + max_slot_table_size = RPC_MAX_SLOT_TABLE_LIMIT; xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries, - xprt_max_tcp_slot_table_entries); + max_slot_table_size); if (IS_ERR(xprt)) return xprt; transport = container_of(xprt, struct sock_xprt, xprt); From 98f98cf571b7b1f34683455a61dae9f35e7222a1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 14 Apr 2013 11:49:51 -0400 Subject: [PATCH 55/67] NFSv4.1: Set the RPC_CLNT_CREATE_INFINITE_SLOTS flag for NFSv4.1 transports This ensures that the RPC layer doesn't override the NFS session negotiation. Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 2 ++ fs/nfs/nfs4client.c | 2 ++ include/linux/nfs_fs_sb.h | 1 + 3 files changed, 5 insertions(+) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 84d8eae203a7..c513b0cc835f 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -593,6 +593,8 @@ int nfs_create_rpc_client(struct nfs_client *clp, args.flags |= RPC_CLNT_CREATE_DISCRTRY; if (test_bit(NFS_CS_NORESVPORT, &clp->cl_flags)) args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; + if (test_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags)) + args.flags |= RPC_CLNT_CREATE_INFINITE_SLOTS; if (!IS_ERR(clp->cl_rpcclient)) return 0; diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 17b34b2da2df..f4d4d4ec6bf7 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -198,6 +198,8 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp, /* Check NFS protocol revision and initialize RPC op vector */ clp->rpc_ops = &nfs_v4_clientops; + if (clp->cl_minorversion != 0) + __set_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags); __set_bit(NFS_CS_DISCRTRY, &clp->cl_flags); error = nfs_create_rpc_client(clp, timeparms, authflavour); if (error < 0) diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index d8fdfdc7a8fe..3b7fa2abecca 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -40,6 +40,7 @@ struct nfs_client { #define NFS_CS_NORESVPORT 0 /* - use ephemeral src port */ #define NFS_CS_DISCRTRY 1 /* - disconnect on RPC retry */ #define NFS_CS_MIGRATION 2 /* - transparent state migr */ +#define NFS_CS_INFINITE_SLOTS 3 /* - don't limit TCP slots */ struct sockaddr_storage cl_addr; /* server identifier */ size_t cl_addrlen; char * cl_hostname; /* hostname of server */ From 549b19cc9f31e8fdda317625d564bac0052a3328 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 16 Apr 2013 18:42:34 -0400 Subject: [PATCH 56/67] NFSv4: Record the OPEN create mode used in the nfs4_opendata structure If we're doing NFSv4.1 against a server that has persistent sessions, then we should not need to call SETATTR in order to reset the file attributes immediately after doing an exclusive create. Note that since the create mode depends on the type of session that has been negotiated with the server, we should not choose the mode until after we've got a session slot. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 16 ++++++++++++++-- fs/nfs/nfs4xdr.c | 37 ++++++++++++++++--------------------- include/linux/nfs_xdr.h | 1 + 3 files changed, 31 insertions(+), 23 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index af05df3c7c79..282d9fa6994a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1525,6 +1525,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) { struct nfs4_opendata *data = calldata; struct nfs4_state_owner *sp = data->owner; + struct nfs_client *clp = sp->so_server->nfs_client; if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0) goto out_wait; @@ -1545,7 +1546,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) rcu_read_unlock(); } /* Update client id. */ - data->o_arg.clientid = sp->so_server->nfs_client->cl_clientid; + data->o_arg.clientid = clp->cl_clientid; if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) { task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR]; data->o_arg.open_bitmap = &nfs4_open_noattr_bitmap[0]; @@ -1557,6 +1558,16 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) &data->o_res.seq_res, task) != 0) nfs_release_seqid(data->o_arg.seqid); + + /* Set the create mode (note dependency on the session type) */ + data->o_arg.createmode = NFS4_CREATE_UNCHECKED; + if (data->o_arg.open_flags & O_EXCL) { + data->o_arg.createmode = NFS4_CREATE_EXCLUSIVE; + if (nfs4_has_persistent_session(clp)) + data->o_arg.createmode = NFS4_CREATE_GUARDED; + else if (clp->cl_mvops->minor_version > 0) + data->o_arg.createmode = NFS4_CREATE_EXCLUSIVE4_1; + } return; unlock_no_action: rcu_read_unlock(); @@ -2000,7 +2011,8 @@ static int _nfs4_do_open(struct inode *dir, if (status != 0) goto err_opendata_put; - if (opendata->o_arg.open_flags & O_EXCL) { + if ((opendata->o_arg.open_flags & O_EXCL) && + (opendata->o_arg.createmode != NFS4_CREATE_GUARDED)) { nfs4_exclusive_attrset(opendata, sattr); nfs_fattr_init(opendata->o_res.f_attr); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 0b744895b9e1..fef71cbec501 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1366,33 +1366,28 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg) { + struct iattr dummy; __be32 *p; - struct nfs_client *clp; p = reserve_space(xdr, 4); - switch(arg->open_flags & O_EXCL) { - case 0: + switch(arg->createmode) { + case NFS4_CREATE_UNCHECKED: *p = cpu_to_be32(NFS4_CREATE_UNCHECKED); encode_attrs(xdr, arg->u.attrs, arg->server); break; - default: - clp = arg->server->nfs_client; - if (clp->cl_mvops->minor_version > 0) { - if (nfs4_has_persistent_session(clp)) { - *p = cpu_to_be32(NFS4_CREATE_GUARDED); - encode_attrs(xdr, arg->u.attrs, arg->server); - } else { - struct iattr dummy; - - *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE4_1); - encode_nfs4_verifier(xdr, &arg->u.verifier); - dummy.ia_valid = 0; - encode_attrs(xdr, &dummy, arg->server); - } - } else { - *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE); - encode_nfs4_verifier(xdr, &arg->u.verifier); - } + case NFS4_CREATE_GUARDED: + *p = cpu_to_be32(NFS4_CREATE_GUARDED); + encode_attrs(xdr, arg->u.attrs, arg->server); + break; + case NFS4_CREATE_EXCLUSIVE: + *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE); + encode_nfs4_verifier(xdr, &arg->u.verifier); + break; + case NFS4_CREATE_EXCLUSIVE4_1: + *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE4_1); + encode_nfs4_verifier(xdr, &arg->u.verifier); + dummy.ia_valid = 0; + encode_attrs(xdr, &dummy, arg->server); } } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 90a4aa190b43..bdc100f66dfb 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -349,6 +349,7 @@ struct nfs_openargs { const u32 * bitmask; const u32 * open_bitmap; __u32 claim; + enum createmode4 createmode; }; struct nfs_openres { From 042ad0b398ea4e937e38324dd096bdf9d2c495f7 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Fri, 19 Apr 2013 16:09:37 -0400 Subject: [PATCH 57/67] nfs: Send atime and mtime as a 64bit value RFC 3530 says that the seconds value of a nfstime4 structure is a 64bit value, but we are instead sending a 32-bit 0 and then a 32bit conversion of the 64bit Linux value. This means that if we try to set atime to a value before the epoch (touch -t 196001010101) the client will only send part of the new value due to lost precision. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index fef71cbec501..c2cbf0d90a31 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1058,8 +1058,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const if (iap->ia_valid & ATTR_ATIME_SET) { bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET; *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME); - *p++ = cpu_to_be32(0); - *p++ = cpu_to_be32(iap->ia_atime.tv_sec); + p = xdr_encode_hyper(p, (s64)iap->ia_atime.tv_sec); *p++ = cpu_to_be32(iap->ia_atime.tv_nsec); } else if (iap->ia_valid & ATTR_ATIME) { @@ -1069,8 +1068,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const if (iap->ia_valid & ATTR_MTIME_SET) { bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET; *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME); - *p++ = cpu_to_be32(0); - *p++ = cpu_to_be32(iap->ia_mtime.tv_sec); + p = xdr_encode_hyper(p, (s64)iap->ia_mtime.tv_sec); *p++ = cpu_to_be32(iap->ia_mtime.tv_nsec); } else if (iap->ia_valid & ATTR_MTIME) { From 92b40e93849e29f9ca661de6442bb66282738bf7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 20 Apr 2013 01:25:45 -0400 Subject: [PATCH 58/67] NFSv4: Use the open stateid if the delegation has the wrong mode Fix nfs4_select_rw_stateid() so that it chooses the open stateid (or an all-zero stateid) if the delegation does not match the selected read/write mode. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 1 + fs/nfs/nfs4proc.c | 1 + fs/nfs/nfs4state.c | 6 +++++- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 7ef19ce6d832..553a83cc4106 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -144,6 +144,7 @@ struct nfs4_lock_state { enum { LK_STATE_IN_USE, NFS_DELEGATED_STATE, /* Current stateid is delegation */ + NFS_OPEN_STATE, /* OPEN stateid is set */ NFS_O_RDONLY_STATE, /* OPEN stateid has read-only state */ NFS_O_WRONLY_STATE, /* OPEN stateid has write-only state */ NFS_O_RDWR_STATE, /* OPEN stateid has read/write state */ diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 282d9fa6994a..39849360f9f6 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -978,6 +978,7 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid * if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) nfs4_stateid_copy(&state->stateid, stateid); nfs4_stateid_copy(&state->open_stateid, stateid); + set_bit(NFS_OPEN_STATE, &state->flags); switch (fmode) { case FMODE_READ: set_bit(NFS_O_RDONLY_STATE, &state->flags); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 1eb17285c99a..b7796950eceb 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1024,12 +1024,16 @@ out: static int nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) { + const nfs4_stateid *src; int ret; int seq; do { + src = &zero_stateid; seq = read_seqbegin(&state->seqlock); - nfs4_stateid_copy(dst, &state->stateid); + if (test_bit(NFS_OPEN_STATE, &state->flags)) + src = &state->open_stateid; + nfs4_stateid_copy(dst, src); ret = 0; smp_rmb(); if (!list_empty(&state->owner->so_seqid.list)) From 8e472f33b5ba742f9bfcf16f0328f5a7842b32d1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 20 Apr 2013 01:30:53 -0400 Subject: [PATCH 59/67] NFSv4: Ensure the LOCK call cannot use the delegation stateid Defensive patch to ensure that we copy the state->open_stateid, which can never be set to the delegation stateid. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 39849360f9f6..ed01a66dc391 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4793,7 +4793,7 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata) if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) { goto out_release_lock_seqid; } - data->arg.open_stateid = &state->stateid; + data->arg.open_stateid = &state->open_stateid; data->arg.new_lock_owner = 1; data->res.open_seqid = data->arg.open_seqid; } else From 1dfd89af8697a299e7982ae740d4695ecd917eef Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 21 Apr 2013 18:01:06 -0400 Subject: [PATCH 60/67] LOCKD: Ensure that nlmclnt_block resets block->b_status after a server reboot After a server reboot, the reclaimer thread will recover all the existing locks. For locks that are blocked, however, it will change the value of block->b_status to nlm_lck_denied_grace_period in order to signal that they need to wake up and resend the original blocking lock request. Due to a bug, however, the block->b_status never gets reset after the blocked locks have been woken up, and so the process goes into an infinite loop of resends until the blocked lock is satisfied. Reported-by: Marc Eshel Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org --- fs/lockd/clntlock.c | 3 +++ fs/lockd/clntproc.c | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 0796c45d0d4d..01bfe7662751 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -144,6 +144,9 @@ int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout) timeout); if (ret < 0) return -ERESTARTSYS; + /* Reset the lock status after a server reboot so we resend */ + if (block->b_status == nlm_lck_denied_grace_period) + block->b_status = nlm_lck_blocked; req->a_res.status = block->b_status; return 0; } diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 7e529c3c45c0..9760ecb9b60f 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -550,9 +550,6 @@ again: status = nlmclnt_block(block, req, NLMCLNT_POLL_TIMEOUT); if (status < 0) break; - /* Resend the blocking lock request after a server reboot */ - if (resp->status == nlm_lck_denied_grace_period) - continue; if (resp->status != nlm_lck_blocked) break; } From fd068b200fb86e8fa52368e6f6088d2ab297a5d7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 22 Apr 2013 11:29:51 -0400 Subject: [PATCH 61/67] NFSv4: Ensure that we clear the NFS_OPEN_STATE flag when appropriate We should always clear it before initiating file recovery. Also ensure that we clear it after a CLOSE and/or after TEST_STATEID fails. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ed01a66dc391..c13144911d20 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1290,6 +1290,7 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state * /* memory barrier prior to reading state->n_* */ clear_bit(NFS_DELEGATED_STATE, &state->flags); + clear_bit(NFS_OPEN_STATE, &state->flags); smp_rmb(); if (state->n_rdwr != 0) { clear_bit(NFS_O_RDWR_STATE, &state->flags); @@ -1893,6 +1894,7 @@ static int nfs41_check_open_stateid(struct nfs4_state *state) clear_bit(NFS_O_RDONLY_STATE, &state->flags); clear_bit(NFS_O_WRONLY_STATE, &state->flags); clear_bit(NFS_O_RDWR_STATE, &state->flags); + clear_bit(NFS_OPEN_STATE, &state->flags); } return status; } @@ -2208,11 +2210,19 @@ static void nfs4_close_clear_stateid_flags(struct nfs4_state *state, fmode_t fmode) { spin_lock(&state->owner->so_lock); - if (!(fmode & FMODE_READ)) - clear_bit(NFS_O_RDONLY_STATE, &state->flags); - if (!(fmode & FMODE_WRITE)) - clear_bit(NFS_O_WRONLY_STATE, &state->flags); clear_bit(NFS_O_RDWR_STATE, &state->flags); + switch (fmode & (FMODE_READ|FMODE_WRITE)) { + case FMODE_WRITE: + clear_bit(NFS_O_RDONLY_STATE, &state->flags); + break; + case FMODE_READ: + clear_bit(NFS_O_WRONLY_STATE, &state->flags); + break; + case 0: + clear_bit(NFS_O_RDONLY_STATE, &state->flags); + clear_bit(NFS_O_WRONLY_STATE, &state->flags); + clear_bit(NFS_OPEN_STATE, &state->flags); + } spin_unlock(&state->owner->so_lock); } From 79d852bf5e7691dc78cc6322ecd1860c50940785 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 22 Apr 2013 15:42:48 -0400 Subject: [PATCH 62/67] NFS: Retry SETCLIENTID with AUTH_SYS instead of AUTH_NONE Recently I changed the SETCLIENTID code to use AUTH_GSS(krb5i), and then retry with AUTH_NONE if that didn't work. This was to enable Kerberos NFS mounts to work without forcing Linux NFS clients to have a keytab on hand. Rick Macklem reports that the FreeBSD server accepts AUTH_NONE only for NULL operations (thus certainly not for SETCLIENTID). Falling back to AUTH_NONE means our proposed 3.10 NFS client will not interoperate with FreeBSD servers over NFSv4 unless Kerberos is fully configured on both ends. If the Linux client falls back to using AUTH_SYS instead for SETCLIENTID, all should work fine as long as the NFS server is configured to allow AUTH_SYS for SETCLIENTID. This may still prevent access to Kerberos-only FreeBSD servers by Linux clients with no keytab. Rick is of the opinion that the security settings the server applies to its pseudo-fs should also apply to the SETCLIENTID operation. Linux and Solaris NFS servers do not place that limitation on SETCLIENTID. The security settings for the server's pseudo-fs are determined automatically as the union of security flavors allowed on real exports, as recommended by RFC 3530bis; and the flavors allowed for SETCLIENTID are all flavors supported by the respective server implementation. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index c54600962bc6..a75e712490f4 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1884,7 +1884,7 @@ again: break; case -NFS4ERR_CLID_INUSE: case -NFS4ERR_WRONGSEC: - clnt = rpc_clone_client_set_auth(clnt, RPC_AUTH_NULL); + clnt = rpc_clone_client_set_auth(clnt, RPC_AUTH_UNIX); if (IS_ERR(clnt)) { status = PTR_ERR(clnt); break; From 8188df1733d6722336e287cc859b2567ad70576a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Apr 2013 14:31:19 -0400 Subject: [PATCH 63/67] NFSv4.1: Use the more efficient open_noattr call for open-by-filehandle When we're doing open-by-filehandle in NFSv4.1, we shouldn't need to do the cache consistency revalidation on the directory. It is therefore more efficient to just use open_noattr, which returns the file attributes, but not the directory attributes. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c13144911d20..a411a53d8f4a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1549,9 +1549,13 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) } /* Update client id. */ data->o_arg.clientid = clp->cl_clientid; - if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) { - task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR]; + switch (data->o_arg.claim) { + case NFS4_OPEN_CLAIM_PREVIOUS: + case NFS4_OPEN_CLAIM_DELEG_CUR_FH: + case NFS4_OPEN_CLAIM_DELEG_PREV_FH: data->o_arg.open_bitmap = &nfs4_open_noattr_bitmap[0]; + case NFS4_OPEN_CLAIM_FH: + task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR]; nfs_copy_fh(&data->o_res.fh, data->o_arg.fh); } data->timestamp = jiffies; From cd4c9be2c61d4d974d348743fddb8183b8185abc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Apr 2013 14:46:25 -0400 Subject: [PATCH 64/67] NFSv4.1: Don't do a delegated open for NFS4_OPEN_CLAIM_DELEG_CUR_FH modes If we're in a delegation recall situation, we can't do a delegated open. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a411a53d8f4a..ea50807b65d9 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1543,6 +1543,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) rcu_read_lock(); delegation = rcu_dereference(NFS_I(data->state->inode)->delegation); if (data->o_arg.claim != NFS4_OPEN_CLAIM_DELEGATE_CUR && + data->o_arg.claim != NFS4_OPEN_CLAIM_DELEG_CUR_FH && can_open_delegated(delegation, data->o_arg.fmode)) goto unlock_no_action; rcu_read_unlock(); From bdeca1b76cd56cd10a029f0ad2fd9ab6dd7e313d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Apr 2013 14:52:44 -0400 Subject: [PATCH 65/67] NFSv4: Don't recheck permissions on open in case of recovery cached open If we already checked the user access permissions on the original open, then don't bother checking again on recovery. Doing so can cause a deadlock with NFSv4.1, since the may_open() operation is not privileged. Furthermore, we can't report an access permission failure here anyway. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ea50807b65d9..e18b3b46c001 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -769,6 +769,7 @@ struct nfs4_opendata { struct iattr attrs; unsigned long timestamp; unsigned int rpc_done : 1; + unsigned int is_recover : 1; int rpc_status; int cancelled; }; @@ -1101,9 +1102,11 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) /* Save the delegation */ nfs4_stateid_copy(&stateid, &delegation->stateid); rcu_read_unlock(); - ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode); - if (ret != 0) - goto out; + if (!opendata->is_recover) { + ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode); + if (ret != 0) + goto out; + } ret = -EAGAIN; /* Try to update the stateid using the delegation */ @@ -1670,8 +1673,11 @@ static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover) data->rpc_done = 0; data->rpc_status = 0; data->cancelled = 0; - if (isrecover) + data->is_recover = 0; + if (isrecover) { nfs4_set_sequence_privileged(&o_arg->seq_args); + data->is_recover = 1; + } task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); From ee3ae84ef40154c734dc2cbca5cf4b2f54c6c7c7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 29 Apr 2013 10:35:36 -0400 Subject: [PATCH 66/67] NFSv4: Servers should only check SETATTR stateid open mode on size change The NFSv4 and NFSv4.1 specs are both clear that the server should only check stateid open mode if a SETATTR specifies the size attribute. If the open mode is not one that allows writing, then it returns NFS4ERR_OPENMODE. In the case where the SETATTR is not changing the size, the client will still pass it the delegation stateid to ensure that the server does not recall that delegation. In that case, the server should _ignore_ the delegation open mode, and simply apply standard permission checks. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3bc847ce4838..982b4527551d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2142,20 +2142,25 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, .rpc_cred = cred, }; unsigned long timestamp = jiffies; + fmode_t fmode; + bool truncate; int status; nfs_fattr_init(fattr); - if (state != NULL && nfs4_valid_open_stateid(state)) { + /* Servers should only apply open mode checks for file size changes */ + truncate = (sattr->ia_valid & ATTR_SIZE) ? true : false; + fmode = truncate ? FMODE_WRITE : FMODE_READ; + + if (nfs4_copy_delegation_stateid(&arg.stateid, inode, fmode)) { + /* Use that stateid */ + } else if (truncate && state != NULL && nfs4_valid_open_stateid(state)) { struct nfs_lockowner lockowner = { .l_owner = current->files, .l_pid = current->tgid, }; nfs4_select_rw_stateid(&arg.stateid, state, FMODE_WRITE, &lockowner); - } else if (nfs4_copy_delegation_stateid(&arg.stateid, inode, - FMODE_WRITE)) { - /* Use that stateid */ } else nfs4_stateid_copy(&arg.stateid, &zero_stateid); From 721ccfb79b6f74f4052de70236d24047e73682d4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 29 Apr 2013 11:11:58 -0400 Subject: [PATCH 67/67] NFSv4: Warn once about servers that incorrectly apply open mode to setattr Debugging aid to help identify servers that incorrectly apply open mode checks to setattr requests that are not changing the file size. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 982b4527551d..9da4bd55eb30 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2184,6 +2184,13 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, err = _nfs4_do_setattr(inode, cred, fattr, sattr, state); switch (err) { case -NFS4ERR_OPENMODE: + if (!(sattr->ia_valid & ATTR_SIZE)) { + pr_warn_once("NFSv4: server %s is incorrectly " + "applying open mode checks to " + "a SETATTR that is not " + "changing file size.\n", + server->nfs_client->cl_hostname); + } if (state && !(state->state & FMODE_WRITE)) { err = -EBADF; if (sattr->ia_valid & ATTR_OPEN)