diff --git a/Documentation/filesystems/nfs/nfs-rdma.txt b/Documentation/filesystems/nfs/nfs-rdma.txt index 95c13aa575ff..906b6c233f62 100644 --- a/Documentation/filesystems/nfs/nfs-rdma.txt +++ b/Documentation/filesystems/nfs/nfs-rdma.txt @@ -138,9 +138,9 @@ Installation - Build, install, reboot The NFS/RDMA code will be enabled automatically if NFS and RDMA - are turned on. The NFS/RDMA client and server are configured via the - SUNRPC_XPRT_RDMA_CLIENT and SUNRPC_XPRT_RDMA_SERVER config options that both - depend on SUNRPC and INFINIBAND. The default value of both options will be: + are turned on. The NFS/RDMA client and server are configured via the hidden + SUNRPC_XPRT_RDMA config option that depends on SUNRPC and INFINIBAND. The + value of SUNRPC_XPRT_RDMA will be: - N if either SUNRPC or INFINIBAND are N, in this case the NFS/RDMA client and server will not be built @@ -238,9 +238,8 @@ NFS/RDMA Setup - Start the NFS server - If the NFS/RDMA server was built as a module - (CONFIG_SUNRPC_XPRT_RDMA_SERVER=m in kernel config), load the RDMA - transport module: + If the NFS/RDMA server was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in + kernel config), load the RDMA transport module: $ modprobe svcrdma @@ -259,9 +258,8 @@ NFS/RDMA Setup - On the client system - If the NFS/RDMA client was built as a module - (CONFIG_SUNRPC_XPRT_RDMA_CLIENT=m in kernel config), load the RDMA client - module: + If the NFS/RDMA client was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in + kernel config), load the RDMA client module: $ modprobe xprtrdma.ko diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 55505cbe11af..d678bcc3cbcb 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -322,6 +322,11 @@ out_rqst: return error; } +static struct svc_serv_ops lockd_sv_ops = { + .svo_shutdown = svc_rpcb_cleanup, + .svo_enqueue_xprt = svc_xprt_do_enqueue, +}; + static struct svc_serv *lockd_create_svc(void) { struct svc_serv *serv; @@ -350,7 +355,7 @@ static struct svc_serv *lockd_create_svc(void) nlm_timeout = LOCKD_DFLT_TIMEO; nlmsvc_timeout = nlm_timeout * HZ; - serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, svc_rpcb_cleanup); + serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, &lockd_sv_ops); if (!serv) { printk(KERN_WARNING "lockd_up: create service failed\n"); return ERR_PTR(-ENOMEM); @@ -586,6 +591,7 @@ static int lockd_init_net(struct net *net) INIT_DELAYED_WORK(&ln->grace_period_end, grace_ender); INIT_LIST_HEAD(&ln->lockd_manager.list); + ln->lockd_manager.block_opens = false; spin_lock_init(&ln->nsm_clnt_lock); return 0; } diff --git a/fs/locks.c b/fs/locks.c index d3d558ba4da7..2a54c800a223 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1568,6 +1568,7 @@ int fcntl_getlease(struct file *filp) * desired lease. * @dentry: dentry to check * @arg: type of lease that we're trying to acquire + * @flags: current lock flags * * Check to see if there's an existing open fd on this file that would * conflict with the lease we're trying to set. diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 682529c00996..2c4a0b565d28 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -308,6 +308,10 @@ err_bind: return ret; } +static struct svc_serv_ops nfs_cb_sv_ops = { + .svo_enqueue_xprt = svc_xprt_do_enqueue, +}; + static struct svc_serv *nfs_callback_create_svc(int minorversion) { struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; @@ -333,7 +337,7 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion) printk(KERN_WARNING "nfs_callback_create_svc: no kthread, %d users??\n", cb_info->users); - serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL); + serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, &nfs_cb_sv_ops); if (!serv) { printk(KERN_ERR "nfs_callback_create_svc: create service failed\n"); return ERR_PTR(-ENOMEM); diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c index ae6e58ea4de5..fd8c9a5bcac4 100644 --- a/fs/nfs_common/grace.c +++ b/fs/nfs_common/grace.c @@ -63,14 +63,33 @@ EXPORT_SYMBOL_GPL(locks_end_grace); * lock reclaims. */ int -locks_in_grace(struct net *net) +__state_in_grace(struct net *net, bool open) { struct list_head *grace_list = net_generic(net, grace_net_id); + struct lock_manager *lm; - return !list_empty(grace_list); + if (!open) + return !list_empty(grace_list); + + list_for_each_entry(lm, grace_list, list) { + if (lm->block_opens) + return true; + } + return false; +} + +int locks_in_grace(struct net *net) +{ + return __state_in_grace(net, 0); } EXPORT_SYMBOL_GPL(locks_in_grace); +int opens_in_grace(struct net *net) +{ + return __state_in_grace(net, 1); +} +EXPORT_SYMBOL_GPL(opens_in_grace); + static int __net_init grace_init_net(struct net *net) { diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index f79521a59747..b4d84b579f20 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -1075,73 +1075,6 @@ exp_pseudoroot(struct svc_rqst *rqstp, struct svc_fh *fhp) return rv; } -/* Iterator */ - -static void *e_start(struct seq_file *m, loff_t *pos) - __acquires(((struct cache_detail *)m->private)->hash_lock) -{ - loff_t n = *pos; - unsigned hash, export; - struct cache_head *ch; - struct cache_detail *cd = m->private; - struct cache_head **export_table = cd->hash_table; - - read_lock(&cd->hash_lock); - if (!n--) - return SEQ_START_TOKEN; - hash = n >> 32; - export = n & ((1LL<<32) - 1); - - - for (ch=export_table[hash]; ch; ch=ch->next) - if (!export--) - return ch; - n &= ~((1LL<<32) - 1); - do { - hash++; - n += 1LL<<32; - } while(hash < EXPORT_HASHMAX && export_table[hash]==NULL); - if (hash >= EXPORT_HASHMAX) - return NULL; - *pos = n+1; - return export_table[hash]; -} - -static void *e_next(struct seq_file *m, void *p, loff_t *pos) -{ - struct cache_head *ch = p; - int hash = (*pos >> 32); - struct cache_detail *cd = m->private; - struct cache_head **export_table = cd->hash_table; - - if (p == SEQ_START_TOKEN) - hash = 0; - else if (ch->next == NULL) { - hash++; - *pos += 1LL<<32; - } else { - ++*pos; - return ch->next; - } - *pos &= ~((1LL<<32) - 1); - while (hash < EXPORT_HASHMAX && export_table[hash] == NULL) { - hash++; - *pos += 1LL<<32; - } - if (hash >= EXPORT_HASHMAX) - return NULL; - ++*pos; - return export_table[hash]; -} - -static void e_stop(struct seq_file *m, void *p) - __releases(((struct cache_detail *)m->private)->hash_lock) -{ - struct cache_detail *cd = m->private; - - read_unlock(&cd->hash_lock); -} - static struct flags { int flag; char *name[2]; @@ -1270,9 +1203,9 @@ static int e_show(struct seq_file *m, void *p) } const struct seq_operations nfs_exports_op = { - .start = e_start, - .next = e_next, - .stop = e_stop, + .start = cache_seq_start, + .next = cache_seq_next, + .stop = cache_seq_stop, .show = e_show, }; diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h index 1f52bfcc436f..2e315072bf3f 100644 --- a/fs/nfsd/export.h +++ b/fs/nfsd/export.h @@ -6,6 +6,7 @@ #include #include +#include struct knfsd_fh; struct svc_fh; diff --git a/fs/nfsd/idmap.h b/fs/nfsd/idmap.h index a3f34900091f..23cc85d1efdd 100644 --- a/fs/nfsd/idmap.h +++ b/fs/nfsd/idmap.h @@ -37,9 +37,7 @@ #include #include - -/* XXX from linux/nfs_idmap.h */ -#define IDMAP_NAMESZ 128 +#include #ifdef CONFIG_NFSD_V4 int nfsd_idmap_init(struct net *); diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index ea6749a32760..d8b16c2568f3 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -110,6 +110,7 @@ struct nfsd_net { unsigned int max_connections; u32 clientid_counter; + u32 clverifier_counter; struct svc_serv *nfsd_serv; }; diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index d54701f6dc78..1580ea6fd64d 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -44,13 +44,13 @@ static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp, inode = d_inode(fh->fh_dentry); - if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) + if (argp->mask & ~NFS_ACL_MASK) RETURN_STATUS(nfserr_inval); resp->mask = argp->mask; nfserr = fh_getattr(fh, &resp->stat); if (nfserr) - goto fail; + RETURN_STATUS(nfserr); if (resp->mask & (NFS_ACL|NFS_ACLCNT)) { acl = get_acl(inode, ACL_TYPE_ACCESS); @@ -202,7 +202,7 @@ static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p, if (!p) return 0; argp->mask = ntohl(*p++); - if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) || + if (argp->mask & ~NFS_ACL_MASK || !xdr_argsize_check(rqstp, p)) return 0; @@ -293,9 +293,7 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p, resp->acl_default, resp->mask & NFS_DFACL, NFS_ACL_DEFAULT); - if (n <= 0) - return 0; - return 1; + return (n > 0); } static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p, diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c index 882b1a14bc3e..01df4cd7c753 100644 --- a/fs/nfsd/nfs3acl.c +++ b/fs/nfsd/nfs3acl.c @@ -41,7 +41,7 @@ static __be32 nfsd3_proc_getacl(struct svc_rqst * rqstp, inode = d_inode(fh->fh_dentry); - if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) + if (argp->mask & ~NFS_ACL_MASK) RETURN_STATUS(nfserr_inval); resp->mask = argp->mask; @@ -148,7 +148,7 @@ static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p, if (!p) return 0; args->mask = ntohl(*p++); - if (args->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) || + if (args->mask & ~NFS_ACL_MASK || !xdr_argsize_check(rqstp, p)) return 0; diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index eb5accf1b37f..6adabd6049b7 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c @@ -34,8 +34,10 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include #include -#include +#include + #include "nfsfh.h" #include "nfsd.h" #include "acl.h" @@ -100,7 +102,7 @@ deny_mask_from_posix(unsigned short perm, u32 flags) /* We only map from NFSv4 to POSIX ACLs when setting ACLs, when we err on the * side of being more restrictive, so the mode bit mapping below is * pessimistic. An optimistic version would be needed to handle DENY's, - * but we espect to coalesce all ALLOWs and DENYs before mapping to mode + * but we expect to coalesce all ALLOWs and DENYs before mapping to mode * bits. */ static void @@ -458,7 +460,7 @@ init_state(struct posix_acl_state *state, int cnt) state->empty = 1; /* * In the worst case, each individual acl could be for a distinct - * named user or group, but we don't no which, so we allocate + * named user or group, but we don't know which, so we allocate * enough space for either: */ alloc = sizeof(struct posix_ace_state_array) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index a49201835a97..e7f50c4081d6 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -435,12 +435,12 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr, */ status = 0; out: - if (status) - nfsd4_mark_cb_fault(cb->cb_clp, status); + cb->cb_seq_status = status; return status; out_overflow: print_overflow_msg(__func__, xdr); - return -EIO; + status = -EIO; + goto out; } static int decode_cb_sequence4res(struct xdr_stream *xdr, @@ -451,11 +451,10 @@ static int decode_cb_sequence4res(struct xdr_stream *xdr, if (cb->cb_minorversion == 0) return 0; - status = decode_cb_op_status(xdr, OP_CB_SEQUENCE, &cb->cb_status); - if (unlikely(status || cb->cb_status)) + status = decode_cb_op_status(xdr, OP_CB_SEQUENCE, &cb->cb_seq_status); + if (unlikely(status || cb->cb_seq_status)) return status; - cb->cb_update_seq_nr = true; return decode_cb_sequence4resok(xdr, cb); } @@ -527,7 +526,7 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, if (cb != NULL) { status = decode_cb_sequence4res(xdr, cb); - if (unlikely(status || cb->cb_status)) + if (unlikely(status || cb->cb_seq_status)) return status; } @@ -617,7 +616,7 @@ static int nfs4_xdr_dec_cb_layout(struct rpc_rqst *rqstp, if (cb) { status = decode_cb_sequence4res(xdr, cb); - if (unlikely(status || cb->cb_status)) + if (unlikely(status || cb->cb_seq_status)) return status; } return decode_cb_op_status(xdr, OP_CB_LAYOUTRECALL, &cb->cb_status); @@ -876,7 +875,11 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) u32 minorversion = clp->cl_minorversion; cb->cb_minorversion = minorversion; - cb->cb_update_seq_nr = false; + /* + * cb_seq_status is only set in decode_cb_sequence4res, + * and so will remain 1 if an rpc level failure occurs. + */ + cb->cb_seq_status = 1; cb->cb_status = 0; if (minorversion) { if (!nfsd41_cb_get_slot(clp, task)) @@ -885,6 +888,84 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) rpc_call_start(task); } +static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback *cb) +{ + struct nfs4_client *clp = cb->cb_clp; + struct nfsd4_session *session = clp->cl_cb_session; + bool ret = true; + + if (!clp->cl_minorversion) { + /* + * If the backchannel connection was shut down while this + * task was queued, we need to resubmit it after setting up + * a new backchannel connection. + * + * Note that if we lost our callback connection permanently + * the submission code will error out, so we don't need to + * handle that case here. + */ + if (task->tk_flags & RPC_TASK_KILLED) + goto need_restart; + + return true; + } + + switch (cb->cb_seq_status) { + case 0: + /* + * No need for lock, access serialized in nfsd4_cb_prepare + * + * RFC5661 20.9.3 + * If CB_SEQUENCE returns an error, then the state of the slot + * (sequence ID, cached reply) MUST NOT change. + */ + ++session->se_cb_seq_nr; + break; + case -ESERVERFAULT: + ++session->se_cb_seq_nr; + case 1: + case -NFS4ERR_BADSESSION: + nfsd4_mark_cb_fault(cb->cb_clp, cb->cb_seq_status); + ret = false; + break; + case -NFS4ERR_DELAY: + if (!rpc_restart_call(task)) + goto out; + + rpc_delay(task, 2 * HZ); + return false; + case -NFS4ERR_BADSLOT: + goto retry_nowait; + case -NFS4ERR_SEQ_MISORDERED: + if (session->se_cb_seq_nr != 1) { + session->se_cb_seq_nr = 1; + goto retry_nowait; + } + break; + default: + dprintk("%s: unprocessed error %d\n", __func__, + cb->cb_seq_status); + } + + clear_bit(0, &clp->cl_cb_slot_busy); + rpc_wake_up_next(&clp->cl_cb_waitq); + dprintk("%s: freed slot, new seqid=%d\n", __func__, + clp->cl_cb_session->se_cb_seq_nr); + + if (task->tk_flags & RPC_TASK_KILLED) + goto need_restart; +out: + return ret; +retry_nowait: + if (rpc_restart_call_prepare(task)) + ret = false; + goto out; +need_restart: + task->tk_status = 0; + cb->cb_need_restart = true; + return false; +} + static void nfsd4_cb_done(struct rpc_task *task, void *calldata) { struct nfsd4_callback *cb = calldata; @@ -893,37 +974,8 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata) dprintk("%s: minorversion=%d\n", __func__, clp->cl_minorversion); - if (clp->cl_minorversion) { - /* - * No need for lock, access serialized in nfsd4_cb_prepare - * - * RFC5661 20.9.3 - * If CB_SEQUENCE returns an error, then the state of the slot - * (sequence ID, cached reply) MUST NOT change. - */ - if (cb->cb_update_seq_nr) - ++clp->cl_cb_session->se_cb_seq_nr; - - clear_bit(0, &clp->cl_cb_slot_busy); - rpc_wake_up_next(&clp->cl_cb_waitq); - dprintk("%s: freed slot, new seqid=%d\n", __func__, - clp->cl_cb_session->se_cb_seq_nr); - } - - /* - * If the backchannel connection was shut down while this - * task was queued, we need to resubmit it after setting up - * a new backchannel connection. - * - * Note that if we lost our callback connection permanently - * the submission code will error out, so we don't need to - * handle that case here. - */ - if (task->tk_flags & RPC_TASK_KILLED) { - task->tk_status = 0; - cb->cb_need_restart = true; + if (!nfsd4_cb_sequence_done(task, cb)) return; - } if (cb->cb_status) { WARN_ON_ONCE(task->tk_status); @@ -1099,8 +1151,8 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, cb->cb_msg.rpc_resp = cb; cb->cb_ops = ops; INIT_WORK(&cb->cb_work, nfsd4_run_cb_work); + cb->cb_seq_status = 1; cb->cb_status = 0; - cb->cb_update_seq_nr = false; cb->cb_need_restart = false; } diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index e1b3d3d472da..5b20577dcdd2 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -59,9 +59,6 @@ MODULE_PARM_DESC(nfs4_disable_idmapping, * that. */ -#define IDMAP_TYPE_USER 0 -#define IDMAP_TYPE_GROUP 1 - struct ent { struct cache_head h; int type; /* User / Group */ diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 90cfda75313c..4ce6b97b31ad 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -276,13 +276,13 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru nfsd4_security_inode_setsecctx(*resfh, &open->op_label, open->op_bmval); /* - * Following rfc 3530 14.2.16, use the returned bitmask - * to indicate which attributes we used to store the - * verifier: + * Following rfc 3530 14.2.16, and rfc 5661 18.16.4 + * use the returned bitmask to indicate which attributes + * we used to store the verifier: */ - if (open->op_createmode == NFS4_CREATE_EXCLUSIVE && status == 0) - open->op_bmval[1] = (FATTR4_WORD1_TIME_ACCESS | - FATTR4_WORD1_TIME_MODIFY); + if (nfsd_create_is_exclusive(open->op_createmode) && status == 0) + open->op_bmval[1] |= (FATTR4_WORD1_TIME_ACCESS | + FATTR4_WORD1_TIME_MODIFY); } else /* * Note this may exit with the parent still locked. @@ -362,7 +362,6 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { __be32 status; struct svc_fh *resfh = NULL; - struct nfsd4_compoundres *resp; struct net *net = SVC_NET(rqstp); struct nfsd_net *nn = net_generic(net, nfsd_net_id); @@ -389,8 +388,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, copy_clientid(&open->op_clientid, cstate->session); /* check seqid for replay. set nfs4_owner */ - resp = rqstp->rq_resp; - status = nfsd4_process_open1(&resp->cstate, open, nn); + status = nfsd4_process_open1(cstate, open, nn); if (status == nfserr_replay_me) { struct nfs4_replay *rp = &open->op_openowner->oo_owner.so_replay; fh_put(&cstate->current_fh); @@ -417,10 +415,10 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, /* Openowner is now set, so sequence id will get bumped. Now we need * these checks before we do any creates: */ status = nfserr_grace; - if (locks_in_grace(net) && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS) + if (opens_in_grace(net) && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS) goto out; status = nfserr_no_grace; - if (!locks_in_grace(net) && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) + if (!opens_in_grace(net) && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) goto out; switch (open->op_claim_type) { @@ -829,7 +827,7 @@ nfsd4_remove(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { __be32 status; - if (locks_in_grace(SVC_NET(rqstp))) + if (opens_in_grace(SVC_NET(rqstp))) return nfserr_grace; status = nfsd_unlink(rqstp, &cstate->current_fh, 0, remove->rm_name, remove->rm_namelen); @@ -848,7 +846,7 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (!cstate->save_fh.fh_dentry) return status; - if (locks_in_grace(SVC_NET(rqstp)) && + if (opens_in_grace(SVC_NET(rqstp)) && !(cstate->save_fh.fh_export->ex_flags & NFSEXP_NOSUBTREECHECK)) return nfserr_grace; status = nfsd_rename(rqstp, &cstate->save_fh, rename->rn_sname, @@ -1364,10 +1362,6 @@ nfsd4_layoutcommit(struct svc_rqst *rqstp, goto out; } - nfserr = ops->proc_layoutcommit(inode, lcp); - if (nfserr) - goto out_put_stid; - if (new_size > i_size_read(inode)) { lcp->lc_size_chg = 1; lcp->lc_newsize = new_size; @@ -1375,7 +1369,7 @@ nfsd4_layoutcommit(struct svc_rqst *rqstp, lcp->lc_size_chg = 0; } -out_put_stid: + nfserr = ops->proc_layoutcommit(inode, lcp); nfs4_put_stid(&ls->ls_stid); out: return nfserr; diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index d88ea7b9a85c..e3d47091b191 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -272,6 +272,7 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn) .ctx.actor = nfsd4_build_namelist, .names = LIST_HEAD_INIT(ctx.names) }; + struct name_list *entry, *tmp; int status; status = nfs4_save_creds(&original_cred); @@ -286,9 +287,8 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn) status = iterate_dir(nn->rec_file, &ctx.ctx); mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT); - while (!list_empty(&ctx.names)) { - struct name_list *entry; - entry = list_entry(ctx.names.next, struct name_list, list); + + list_for_each_entry_safe(entry, tmp, &ctx.names, list) { if (!status) { struct dentry *dentry; dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1); @@ -304,6 +304,12 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn) } mutex_unlock(&d_inode(dir)->i_mutex); nfs4_reset_creds(original_cred); + + list_for_each_entry_safe(entry, tmp, &ctx.names, list) { + dprintk("NFSD: %s. Left entry %s\n", __func__, entry->name); + list_del(&entry->list); + kfree(entry); + } return status; } @@ -541,8 +547,7 @@ nfsd4_legacy_tracking_init(struct net *net) /* XXX: The legacy code won't work in a container */ if (net != &init_net) { - WARN(1, KERN_ERR "NFSD: attempt to initialize legacy client " - "tracking in a container!\n"); + pr_warn("NFSD: attempt to initialize legacy client tracking in a container ignored.\n"); return -EINVAL; } @@ -1254,8 +1259,7 @@ nfsd4_umh_cltrack_init(struct net *net) /* XXX: The usermode helper s not working in container yet. */ if (net != &init_net) { - WARN(1, KERN_ERR "NFSD: attempt to initialize umh client " - "tracking in a container!\n"); + pr_warn("NFSD: attempt to initialize umh client tracking in a container ignored.\n"); return -EINVAL; } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 95202719a1fd..0f1d5691b795 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -777,13 +777,16 @@ hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); } -static void +static bool unhash_delegation_locked(struct nfs4_delegation *dp) { struct nfs4_file *fp = dp->dl_stid.sc_file; lockdep_assert_held(&state_lock); + if (list_empty(&dp->dl_perfile)) + return false; + dp->dl_stid.sc_type = NFS4_CLOSED_DELEG_STID; /* Ensure that deleg break won't try to requeue it */ ++dp->dl_time; @@ -792,16 +795,21 @@ unhash_delegation_locked(struct nfs4_delegation *dp) list_del_init(&dp->dl_recall_lru); list_del_init(&dp->dl_perfile); spin_unlock(&fp->fi_lock); + return true; } static void destroy_delegation(struct nfs4_delegation *dp) { + bool unhashed; + spin_lock(&state_lock); - unhash_delegation_locked(dp); + unhashed = unhash_delegation_locked(dp); spin_unlock(&state_lock); - put_clnt_odstate(dp->dl_clnt_odstate); - nfs4_put_deleg_lease(dp->dl_stid.sc_file); - nfs4_put_stid(&dp->dl_stid); + if (unhashed) { + put_clnt_odstate(dp->dl_clnt_odstate); + nfs4_put_deleg_lease(dp->dl_stid.sc_file); + nfs4_put_stid(&dp->dl_stid); + } } static void revoke_delegation(struct nfs4_delegation *dp) @@ -990,6 +998,12 @@ release_all_access(struct nfs4_ol_stateid *stp) } } +static inline void nfs4_free_stateowner(struct nfs4_stateowner *sop) +{ + kfree(sop->so_owner.data); + sop->so_ops->so_free(sop); +} + static void nfs4_put_stateowner(struct nfs4_stateowner *sop) { struct nfs4_client *clp = sop->so_client; @@ -1000,20 +1014,23 @@ static void nfs4_put_stateowner(struct nfs4_stateowner *sop) return; sop->so_ops->so_unhash(sop); spin_unlock(&clp->cl_lock); - kfree(sop->so_owner.data); - sop->so_ops->so_free(sop); + nfs4_free_stateowner(sop); } -static void unhash_ol_stateid(struct nfs4_ol_stateid *stp) +static bool unhash_ol_stateid(struct nfs4_ol_stateid *stp) { struct nfs4_file *fp = stp->st_stid.sc_file; lockdep_assert_held(&stp->st_stateowner->so_client->cl_lock); + if (list_empty(&stp->st_perfile)) + return false; + spin_lock(&fp->fi_lock); - list_del(&stp->st_perfile); + list_del_init(&stp->st_perfile); spin_unlock(&fp->fi_lock); list_del(&stp->st_perstateowner); + return true; } static void nfs4_free_ol_stateid(struct nfs4_stid *stid) @@ -1063,25 +1080,27 @@ static void put_ol_stateid_locked(struct nfs4_ol_stateid *stp, list_add(&stp->st_locks, reaplist); } -static void unhash_lock_stateid(struct nfs4_ol_stateid *stp) +static bool unhash_lock_stateid(struct nfs4_ol_stateid *stp) { struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner); lockdep_assert_held(&oo->oo_owner.so_client->cl_lock); list_del_init(&stp->st_locks); - unhash_ol_stateid(stp); nfs4_unhash_stid(&stp->st_stid); + return unhash_ol_stateid(stp); } static void release_lock_stateid(struct nfs4_ol_stateid *stp) { struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner); + bool unhashed; spin_lock(&oo->oo_owner.so_client->cl_lock); - unhash_lock_stateid(stp); + unhashed = unhash_lock_stateid(stp); spin_unlock(&oo->oo_owner.so_client->cl_lock); - nfs4_put_stid(&stp->st_stid); + if (unhashed) + nfs4_put_stid(&stp->st_stid); } static void unhash_lockowner_locked(struct nfs4_lockowner *lo) @@ -1129,7 +1148,7 @@ static void release_lockowner(struct nfs4_lockowner *lo) while (!list_empty(&lo->lo_owner.so_stateids)) { stp = list_first_entry(&lo->lo_owner.so_stateids, struct nfs4_ol_stateid, st_perstateowner); - unhash_lock_stateid(stp); + WARN_ON(!unhash_lock_stateid(stp)); put_ol_stateid_locked(stp, &reaplist); } spin_unlock(&clp->cl_lock); @@ -1142,21 +1161,26 @@ static void release_open_stateid_locks(struct nfs4_ol_stateid *open_stp, { struct nfs4_ol_stateid *stp; + lockdep_assert_held(&open_stp->st_stid.sc_client->cl_lock); + while (!list_empty(&open_stp->st_locks)) { stp = list_entry(open_stp->st_locks.next, struct nfs4_ol_stateid, st_locks); - unhash_lock_stateid(stp); + WARN_ON(!unhash_lock_stateid(stp)); put_ol_stateid_locked(stp, reaplist); } } -static void unhash_open_stateid(struct nfs4_ol_stateid *stp, +static bool unhash_open_stateid(struct nfs4_ol_stateid *stp, struct list_head *reaplist) { + bool unhashed; + lockdep_assert_held(&stp->st_stid.sc_client->cl_lock); - unhash_ol_stateid(stp); + unhashed = unhash_ol_stateid(stp); release_open_stateid_locks(stp, reaplist); + return unhashed; } static void release_open_stateid(struct nfs4_ol_stateid *stp) @@ -1164,8 +1188,8 @@ static void release_open_stateid(struct nfs4_ol_stateid *stp) LIST_HEAD(reaplist); spin_lock(&stp->st_stid.sc_client->cl_lock); - unhash_open_stateid(stp, &reaplist); - put_ol_stateid_locked(stp, &reaplist); + if (unhash_open_stateid(stp, &reaplist)) + put_ol_stateid_locked(stp, &reaplist); spin_unlock(&stp->st_stid.sc_client->cl_lock); free_ol_stateid_reaplist(&reaplist); } @@ -1210,8 +1234,8 @@ static void release_openowner(struct nfs4_openowner *oo) while (!list_empty(&oo->oo_owner.so_stateids)) { stp = list_first_entry(&oo->oo_owner.so_stateids, struct nfs4_ol_stateid, st_perstateowner); - unhash_open_stateid(stp, &reaplist); - put_ol_stateid_locked(stp, &reaplist); + if (unhash_open_stateid(stp, &reaplist)) + put_ol_stateid_locked(stp, &reaplist); } spin_unlock(&clp->cl_lock); free_ol_stateid_reaplist(&reaplist); @@ -1714,7 +1738,7 @@ __destroy_client(struct nfs4_client *clp) spin_lock(&state_lock); while (!list_empty(&clp->cl_delegations)) { dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt); - unhash_delegation_locked(dp); + WARN_ON(!unhash_delegation_locked(dp)); list_add(&dp->dl_recall_lru, &reaplist); } spin_unlock(&state_lock); @@ -1894,7 +1918,7 @@ static void gen_confirm(struct nfs4_client *clp, struct nfsd_net *nn) * __force to keep sparse happy */ verf[0] = (__force __be32)get_seconds(); - verf[1] = (__force __be32)nn->clientid_counter; + verf[1] = (__force __be32)nn->clverifier_counter++; memcpy(clp->cl_confirm.data, verf, sizeof(clp->cl_confirm.data)); } @@ -2241,6 +2265,9 @@ static bool client_has_state(struct nfs4_client *clp) * Also note we should probably be using this in 4.0 case too. */ return !list_empty(&clp->cl_openowners) +#ifdef CONFIG_NFSD_PNFS + || !list_empty(&clp->cl_lo_states) +#endif || !list_empty(&clp->cl_delegations) || !list_empty(&clp->cl_sessions); } @@ -2547,11 +2574,9 @@ nfsd4_create_session(struct svc_rqst *rqstp, goto out_free_conn; cs_slot = &conf->cl_cs_slot; status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); - if (status == nfserr_replay_cache) { - status = nfsd4_replay_create_session(cr_ses, cs_slot); - goto out_free_conn; - } else if (cr_ses->seqid != cs_slot->sl_seqid + 1) { - status = nfserr_seq_misordered; + if (status) { + if (status == nfserr_replay_cache) + status = nfsd4_replay_create_session(cr_ses, cs_slot); goto out_free_conn; } } else if (unconf) { @@ -3041,10 +3066,11 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, unconf = find_unconfirmed_client_by_name(&clname, nn); if (unconf) unhash_client_locked(unconf); - if (conf && same_verf(&conf->cl_verifier, &clverifier)) + if (conf && same_verf(&conf->cl_verifier, &clverifier)) { /* case 1: probable callback update */ copy_clid(new, conf); - else /* case 4 (new client) or cases 2, 3 (client reboot): */ + gen_confirm(new, nn); + } else /* case 4 (new client) or cases 2, 3 (client reboot): */ gen_clid(new, nn); new->cl_minorversion = 0; gen_callback(new, setclid, rqstp); @@ -3085,10 +3111,11 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, /* * We try hard to give out unique clientid's, so if we get an * attempt to confirm the same clientid with a different cred, - * there's a bug somewhere. Let's charitably assume it's our - * bug. + * the client may be buggy; this should never happen. + * + * Nevertheless, RFC 7530 recommends INUSE for this case: */ - status = nfserr_serverfault; + status = nfserr_clid_inuse; if (unconf && !same_creds(&unconf->cl_cred, &rqstp->rq_cred)) goto out; if (conf && !same_creds(&conf->cl_cred, &rqstp->rq_cred)) @@ -3315,7 +3342,8 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open, hash_openowner(oo, clp, strhashval); ret = oo; } else - nfs4_free_openowner(&oo->oo_owner); + nfs4_free_stateowner(&oo->oo_owner); + spin_unlock(&clp->cl_lock); return ret; } @@ -3482,6 +3510,9 @@ static int nfsd4_cb_recall_done(struct nfsd4_callback *cb, { struct nfs4_delegation *dp = cb_to_delegation(cb); + if (dp->dl_stid.sc_type == NFS4_CLOSED_DELEG_STID) + return 1; + switch (task->tk_status) { case 0: return 1; @@ -3885,12 +3916,6 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *c return status; } -static void -nfs4_set_claim_prev(struct nfsd4_open *open, bool has_session) -{ - open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; -} - /* Should we give out recallable state?: */ static bool nfsd4_cb_channel_good(struct nfs4_client *clp) { @@ -3923,7 +3948,7 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_file *fp, int flag) static int nfs4_setlease(struct nfs4_delegation *dp) { struct nfs4_file *fp = dp->dl_stid.sc_file; - struct file_lock *fl, *ret; + struct file_lock *fl; struct file *filp; int status = 0; @@ -3934,10 +3959,10 @@ static int nfs4_setlease(struct nfs4_delegation *dp) if (!filp) { /* We should always have a readable file here */ WARN_ON_ONCE(1); + locks_free_lock(fl); return -EBADF; } fl->fl_file = filp; - ret = fl; status = vfs_setlease(filp, fl->fl_type, &fl, NULL); if (fl) locks_free_lock(fl); @@ -4063,7 +4088,8 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, case NFS4_OPEN_CLAIM_FH: /* * Let's not give out any delegations till everyone's - * had the chance to reclaim theirs.... + * had the chance to reclaim theirs, *and* until + * NLM locks have all been reclaimed: */ if (locks_in_grace(clp->net)) goto out_no_deleg; @@ -4209,7 +4235,7 @@ out: if (fp) put_nfs4_file(fp); if (status == 0 && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) - nfs4_set_claim_prev(open, nfsd4_has_session(&resp->cstate)); + open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; /* * To finish the open response, we just need to set the rflags. */ @@ -4338,14 +4364,12 @@ nfs4_laundromat(struct nfsd_net *nn) spin_lock(&state_lock); list_for_each_safe(pos, next, &nn->del_recall_lru) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); - if (net_generic(dp->dl_stid.sc_client->net, nfsd_net_id) != nn) - continue; if (time_after((unsigned long)dp->dl_time, (unsigned long)cutoff)) { t = dp->dl_time - cutoff; new_timeo = min(new_timeo, t); break; } - unhash_delegation_locked(dp); + WARN_ON(!unhash_delegation_locked(dp)); list_add(&dp->dl_recall_lru, &reaplist); } spin_unlock(&state_lock); @@ -4440,7 +4464,7 @@ check_special_stateids(struct net *net, svc_fh *current_fh, stateid_t *stateid, { if (ONE_STATEID(stateid) && (flags & RD_STATE)) return nfs_ok; - else if (locks_in_grace(net)) { + else if (opens_in_grace(net)) { /* Answer in remaining cases depends on existence of * conflicting state; so we must wait out the grace period. */ return nfserr_grace; @@ -4459,7 +4483,7 @@ check_special_stateids(struct net *net, svc_fh *current_fh, stateid_t *stateid, static inline int grace_disallows_io(struct net *net, struct inode *inode) { - return locks_in_grace(net) && mandatory_lock(inode); + return opens_in_grace(net) && mandatory_lock(inode); } /* Returns true iff a is later than b: */ @@ -4751,7 +4775,7 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (check_for_locks(stp->st_stid.sc_file, lockowner(stp->st_stateowner))) break; - unhash_lock_stateid(stp); + WARN_ON(!unhash_lock_stateid(stp)); spin_unlock(&cl->cl_lock); nfs4_put_stid(s); ret = nfs_ok; @@ -4967,20 +4991,23 @@ out: static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) { struct nfs4_client *clp = s->st_stid.sc_client; + bool unhashed; LIST_HEAD(reaplist); s->st_stid.sc_type = NFS4_CLOSED_STID; spin_lock(&clp->cl_lock); - unhash_open_stateid(s, &reaplist); + unhashed = unhash_open_stateid(s, &reaplist); if (clp->cl_minorversion) { - put_ol_stateid_locked(s, &reaplist); + if (unhashed) + put_ol_stateid_locked(s, &reaplist); spin_unlock(&clp->cl_lock); free_ol_stateid_reaplist(&reaplist); } else { spin_unlock(&clp->cl_lock); free_ol_stateid_reaplist(&reaplist); - move_to_close_lru(s, clp->net); + if (unhashed) + move_to_close_lru(s, clp->net); } } @@ -5045,9 +5072,6 @@ out: return status; } - -#define LOFF_OVERFLOW(start, len) ((u64)(len) > ~(u64)(start)) - static inline u64 end_offset(u64 start, u64 len) { @@ -5139,8 +5163,7 @@ nevermind: } static struct nfs4_lockowner * -find_lockowner_str_locked(clientid_t *clid, struct xdr_netobj *owner, - struct nfs4_client *clp) +find_lockowner_str_locked(struct nfs4_client *clp, struct xdr_netobj *owner) { unsigned int strhashval = ownerstr_hashval(owner); struct nfs4_stateowner *so; @@ -5158,13 +5181,12 @@ find_lockowner_str_locked(clientid_t *clid, struct xdr_netobj *owner, } static struct nfs4_lockowner * -find_lockowner_str(clientid_t *clid, struct xdr_netobj *owner, - struct nfs4_client *clp) +find_lockowner_str(struct nfs4_client *clp, struct xdr_netobj *owner) { struct nfs4_lockowner *lo; spin_lock(&clp->cl_lock); - lo = find_lockowner_str_locked(clid, owner, clp); + lo = find_lockowner_str_locked(clp, owner); spin_unlock(&clp->cl_lock); return lo; } @@ -5208,14 +5230,14 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, lo->lo_owner.so_seqid = lock->lk_new_lock_seqid; lo->lo_owner.so_ops = &lockowner_ops; spin_lock(&clp->cl_lock); - ret = find_lockowner_str_locked(&clp->cl_clientid, - &lock->lk_new_owner, clp); + ret = find_lockowner_str_locked(clp, &lock->lk_new_owner); if (ret == NULL) { list_add(&lo->lo_owner.so_strhash, &clp->cl_ownerstr_hashtbl[strhashval]); ret = lo; } else - nfs4_free_lockowner(&lo->lo_owner); + nfs4_free_stateowner(&lo->lo_owner); + spin_unlock(&clp->cl_lock); return ret; } @@ -5298,8 +5320,8 @@ find_or_create_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fi, static int check_lock_length(u64 offset, u64 length) { - return ((length == 0) || ((length != NFS4_MAX_UINT64) && - LOFF_OVERFLOW(offset, length))); + return ((length == 0) || ((length != NFS4_MAX_UINT64) && + (length > ~offset))); } static void get_lock_access(struct nfs4_ol_stateid *lock_stp, u32 access) @@ -5328,9 +5350,9 @@ lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, struct nfs4_lockowner *lo; unsigned int strhashval; - lo = find_lockowner_str(&cl->cl_clientid, &lock->v.new.owner, cl); + lo = find_lockowner_str(cl, &lock->lk_new_owner); if (!lo) { - strhashval = ownerstr_hashval(&lock->v.new.owner); + strhashval = ownerstr_hashval(&lock->lk_new_owner); lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock); if (lo == NULL) return nfserr_jukebox; @@ -5391,7 +5413,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (lock->lk_is_new) { if (nfsd4_has_session(cstate)) /* See rfc 5661 18.10.3: given clientid is ignored: */ - memcpy(&lock->v.new.clientid, + memcpy(&lock->lk_new_clientid, &cstate->session->se_client->cl_clientid, sizeof(clientid_t)); @@ -5409,7 +5431,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, open_sop = openowner(open_stp->st_stateowner); status = nfserr_bad_stateid; if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid, - &lock->v.new.clientid)) + &lock->lk_new_clientid)) goto out; status = lookup_or_create_lock_state(cstate, open_stp, lock, &lock_stp, &new); @@ -5603,8 +5625,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; } - lo = find_lockowner_str(&lockt->lt_clientid, &lockt->lt_owner, - cstate->clp); + lo = find_lockowner_str(cstate->clp, &lockt->lt_owner); if (lo) file_lock->fl_owner = (fl_owner_t)lo; file_lock->fl_pid = current->tgid; @@ -6019,7 +6040,7 @@ nfsd_inject_add_lock_to_list(struct nfs4_ol_stateid *lst, static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max, struct list_head *collect, - void (*func)(struct nfs4_ol_stateid *)) + bool (*func)(struct nfs4_ol_stateid *)) { struct nfs4_openowner *oop; struct nfs4_ol_stateid *stp, *st_next; @@ -6033,9 +6054,9 @@ static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max, list_for_each_entry_safe(lst, lst_next, &stp->st_locks, st_locks) { if (func) { - func(lst); - nfsd_inject_add_lock_to_list(lst, - collect); + if (func(lst)) + nfsd_inject_add_lock_to_list(lst, + collect); } ++count; /* @@ -6305,7 +6326,7 @@ static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max, continue; atomic_inc(&clp->cl_refcount); - unhash_delegation_locked(dp); + WARN_ON(!unhash_delegation_locked(dp)); list_add(&dp->dl_recall_lru, victims); } ++count; @@ -6584,6 +6605,7 @@ nfs4_state_start_net(struct net *net) return ret; nn->boot_time = get_seconds(); nn->grace_ended = false; + nn->nfsd4_manager.block_opens = true; locks_start_grace(net, &nn->nfsd4_manager); nfsd4_client_tracking_init(net); printk(KERN_INFO "NFSD: starting %ld-second grace period (net %p)\n", @@ -6602,7 +6624,7 @@ nfs4_state_start(void) ret = set_callback_cred(); if (ret) return -ENOMEM; - laundry_wq = create_singlethread_workqueue("nfsd4"); + laundry_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, "nfsd4"); if (laundry_wq == NULL) { ret = -ENOMEM; goto out_recovery; @@ -6635,7 +6657,7 @@ nfs4_state_shutdown_net(struct net *net) spin_lock(&state_lock); list_for_each_safe(pos, next, &nn->del_recall_lru) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); - unhash_delegation_locked(dp); + WARN_ON(!unhash_delegation_locked(dp)); list_add(&dp->dl_recall_lru, &reaplist); } spin_unlock(&state_lock); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 75e0563c09d1..51c9e9ca39a4 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2140,6 +2140,27 @@ nfsd4_encode_aclname(struct xdr_stream *xdr, struct svc_rqst *rqstp, return nfsd4_encode_user(xdr, rqstp, ace->who_uid); } +static inline __be32 +nfsd4_encode_layout_type(struct xdr_stream *xdr, enum pnfs_layouttype layout_type) +{ + __be32 *p; + + if (layout_type) { + p = xdr_reserve_space(xdr, 8); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(1); + *p++ = cpu_to_be32(layout_type); + } else { + p = xdr_reserve_space(xdr, 4); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(0); + } + + return 0; +} + #define WORD0_ABSENT_FS_ATTRS (FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_FSID | \ FATTR4_WORD0_RDATTR_ERROR) #define WORD1_ABSENT_FS_ATTRS FATTR4_WORD1_MOUNTED_ON_FILEID @@ -2205,6 +2226,39 @@ static int get_parent_attributes(struct svc_export *exp, struct kstat *stat) return err; } +static __be32 +nfsd4_encode_bitmap(struct xdr_stream *xdr, u32 bmval0, u32 bmval1, u32 bmval2) +{ + __be32 *p; + + if (bmval2) { + p = xdr_reserve_space(xdr, 16); + if (!p) + goto out_resource; + *p++ = cpu_to_be32(3); + *p++ = cpu_to_be32(bmval0); + *p++ = cpu_to_be32(bmval1); + *p++ = cpu_to_be32(bmval2); + } else if (bmval1) { + p = xdr_reserve_space(xdr, 12); + if (!p) + goto out_resource; + *p++ = cpu_to_be32(2); + *p++ = cpu_to_be32(bmval0); + *p++ = cpu_to_be32(bmval1); + } else { + p = xdr_reserve_space(xdr, 8); + if (!p) + goto out_resource; + *p++ = cpu_to_be32(1); + *p++ = cpu_to_be32(bmval0); + } + + return 0; +out_resource: + return nfserr_resource; +} + /* * Note: @fhp can be NULL; in this case, we might have to compose the filehandle * ourselves. @@ -2301,28 +2355,9 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, } #endif /* CONFIG_NFSD_V4_SECURITY_LABEL */ - if (bmval2) { - p = xdr_reserve_space(xdr, 16); - if (!p) - goto out_resource; - *p++ = cpu_to_be32(3); - *p++ = cpu_to_be32(bmval0); - *p++ = cpu_to_be32(bmval1); - *p++ = cpu_to_be32(bmval2); - } else if (bmval1) { - p = xdr_reserve_space(xdr, 12); - if (!p) - goto out_resource; - *p++ = cpu_to_be32(2); - *p++ = cpu_to_be32(bmval0); - *p++ = cpu_to_be32(bmval1); - } else { - p = xdr_reserve_space(xdr, 8); - if (!p) - goto out_resource; - *p++ = cpu_to_be32(1); - *p++ = cpu_to_be32(bmval0); - } + status = nfsd4_encode_bitmap(xdr, bmval0, bmval1, bmval2); + if (status) + goto out; attrlen_offset = xdr->buf->len; p = xdr_reserve_space(xdr, 4); @@ -2675,6 +2710,9 @@ out_acl: *p++ = cpu_to_be32(stat.mtime.tv_nsec); } if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) { + struct kstat parent_stat; + u64 ino = stat.ino; + p = xdr_reserve_space(xdr, 8); if (!p) goto out_resource; @@ -2683,25 +2721,25 @@ out_acl: * and this is the root of a cross-mounted filesystem. */ if (ignore_crossmnt == 0 && - dentry == exp->ex_path.mnt->mnt_root) - get_parent_attributes(exp, &stat); - p = xdr_encode_hyper(p, stat.ino); + dentry == exp->ex_path.mnt->mnt_root) { + err = get_parent_attributes(exp, &parent_stat); + if (err) + goto out_nfserr; + ino = parent_stat.ino; + } + p = xdr_encode_hyper(p, ino); } #ifdef CONFIG_NFSD_PNFS - if ((bmval1 & FATTR4_WORD1_FS_LAYOUT_TYPES) || - (bmval2 & FATTR4_WORD2_LAYOUT_TYPES)) { - if (exp->ex_layout_type) { - p = xdr_reserve_space(xdr, 8); - if (!p) - goto out_resource; - *p++ = cpu_to_be32(1); - *p++ = cpu_to_be32(exp->ex_layout_type); - } else { - p = xdr_reserve_space(xdr, 4); - if (!p) - goto out_resource; - *p++ = cpu_to_be32(0); - } + if (bmval1 & FATTR4_WORD1_FS_LAYOUT_TYPES) { + status = nfsd4_encode_layout_type(xdr, exp->ex_layout_type); + if (status) + goto out; + } + + if (bmval2 & FATTR4_WORD2_LAYOUT_TYPES) { + status = nfsd4_encode_layout_type(xdr, exp->ex_layout_type); + if (status) + goto out; } if (bmval2 & FATTR4_WORD2_LAYOUT_BLKSIZE) { @@ -2711,21 +2749,20 @@ out_acl: *p++ = cpu_to_be32(stat.blksize); } #endif /* CONFIG_NFSD_PNFS */ + if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) { + status = nfsd4_encode_bitmap(xdr, NFSD_SUPPATTR_EXCLCREAT_WORD0, + NFSD_SUPPATTR_EXCLCREAT_WORD1, + NFSD_SUPPATTR_EXCLCREAT_WORD2); + if (status) + goto out; + } + if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) { status = nfsd4_encode_security_label(xdr, rqstp, context, contextlen); if (status) goto out; } - if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) { - p = xdr_reserve_space(xdr, 16); - if (!p) - goto out_resource; - *p++ = cpu_to_be32(3); - *p++ = cpu_to_be32(NFSD_SUPPATTR_EXCLCREAT_WORD0); - *p++ = cpu_to_be32(NFSD_SUPPATTR_EXCLCREAT_WORD1); - *p++ = cpu_to_be32(NFSD_SUPPATTR_EXCLCREAT_WORD2); - } attrlen = htonl(xdr->buf->len - attrlen_offset - 4); write_bytes_to_xdr_buf(xdr->buf, attrlen_offset, &attrlen, 4); @@ -3044,13 +3081,12 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ __be32 *p; if (!nfserr) { - p = xdr_reserve_space(xdr, 32); + p = xdr_reserve_space(xdr, 20); if (!p) return nfserr_resource; - p = encode_cinfo(p, &create->cr_cinfo); - *p++ = cpu_to_be32(2); - *p++ = cpu_to_be32(create->cr_bmval[0]); - *p++ = cpu_to_be32(create->cr_bmval[1]); + encode_cinfo(p, &create->cr_cinfo); + nfserr = nfsd4_encode_bitmap(xdr, create->cr_bmval[0], + create->cr_bmval[1], create->cr_bmval[2]); } return nfserr; } @@ -3190,16 +3226,22 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op nfserr = nfsd4_encode_stateid(xdr, &open->op_stateid); if (nfserr) goto out; - p = xdr_reserve_space(xdr, 40); + p = xdr_reserve_space(xdr, 24); if (!p) return nfserr_resource; p = encode_cinfo(p, &open->op_cinfo); *p++ = cpu_to_be32(open->op_rflags); - *p++ = cpu_to_be32(2); - *p++ = cpu_to_be32(open->op_bmval[0]); - *p++ = cpu_to_be32(open->op_bmval[1]); - *p++ = cpu_to_be32(open->op_delegate_type); + nfserr = nfsd4_encode_bitmap(xdr, open->op_bmval[0], open->op_bmval[1], + open->op_bmval[2]); + if (nfserr) + goto out; + + p = xdr_reserve_space(xdr, 4); + if (!p) + return nfserr_resource; + + *p++ = cpu_to_be32(open->op_delegate_type); switch (open->op_delegate_type) { case NFS4_OPEN_DELEGATE_NONE: break; diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 9277cc91c21b..ad4e2377dd63 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -391,6 +391,14 @@ static int nfsd_get_default_max_blksize(void) return ret; } +static struct svc_serv_ops nfsd_thread_sv_ops = { + .svo_shutdown = nfsd_last_thread, + .svo_function = nfsd, + .svo_enqueue_xprt = svc_xprt_do_enqueue, + .svo_setup = svc_set_num_threads, + .svo_module = THIS_MODULE, +}; + int nfsd_create_serv(struct net *net) { int error; @@ -405,7 +413,7 @@ int nfsd_create_serv(struct net *net) nfsd_max_blksize = nfsd_get_default_max_blksize(); nfsd_reset_versions(); nn->nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, - nfsd_last_thread, nfsd, THIS_MODULE); + &nfsd_thread_sv_ops); if (nn->nfsd_serv == NULL) return -ENOMEM; @@ -500,8 +508,8 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net) /* apply the new numbers */ svc_get(nn->nfsd_serv); for (i = 0; i < n; i++) { - err = svc_set_num_threads(nn->nfsd_serv, &nn->nfsd_serv->sv_pools[i], - nthreads[i]); + err = nn->nfsd_serv->sv_ops->svo_setup(nn->nfsd_serv, + &nn->nfsd_serv->sv_pools[i], nthreads[i]); if (err) break; } @@ -540,7 +548,8 @@ nfsd_svc(int nrservs, struct net *net) error = nfsd_startup_net(nrservs, net); if (error) goto out_destroy; - error = svc_set_num_threads(nn->nfsd_serv, NULL, nrservs); + error = nn->nfsd_serv->sv_ops->svo_setup(nn->nfsd_serv, + NULL, nrservs); if (error) goto out_shutdown; /* We are holding a reference to nn->nfsd_serv which diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 4874ce515fc1..583ffc13cae2 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -67,8 +67,8 @@ struct nfsd4_callback { struct rpc_message cb_msg; struct nfsd4_callback_ops *cb_ops; struct work_struct cb_work; + int cb_seq_status; int cb_status; - bool cb_update_seq_nr; bool cb_need_restart; }; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index b5e077a6e7d4..45c04979e7b3 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1249,12 +1249,6 @@ out_nfserr: #ifdef CONFIG_NFSD_V3 -static inline int nfsd_create_is_exclusive(int createmode) -{ - return createmode == NFS3_CREATE_EXCLUSIVE - || createmode == NFS4_CREATE_EXCLUSIVE4_1; -} - /* * NFSv3 and NFSv4 version of nfsd_create */ diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 5be875e3e638..fee2451ae248 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -131,4 +131,10 @@ static inline __be32 fh_getattr(struct svc_fh *fh, struct kstat *stat) return nfserrno(vfs_getattr(&p, stat)); } +static inline int nfsd_create_is_exclusive(int createmode) +{ + return createmode == NFS3_CREATE_EXCLUSIVE + || createmode == NFS4_CREATE_EXCLUSIVE4_1; +} + #endif /* LINUX_NFSD_VFS_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 864203c10dbc..dc634a55163b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -943,12 +943,18 @@ struct lock_manager_operations { struct lock_manager { struct list_head list; + /* + * NFSv4 and up also want opens blocked during the grace period; + * NLM doesn't care: + */ + bool block_opens; }; struct net; void locks_start_grace(struct net *, struct lock_manager *); void locks_end_grace(struct lock_manager *); int locks_in_grace(struct net *); +int opens_in_grace(struct net *); /* that will die - we need it for nfs_lock_info */ #include diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 437ddb6c4aef..03d3b4c92d9f 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -46,7 +46,7 @@ * */ struct cache_head { - struct cache_head * next; + struct hlist_node cache_list; time_t expiry_time; /* After time time, don't use the data */ time_t last_refresh; /* If CACHE_PENDING, this is when upcall * was sent, else this is when update was received @@ -73,7 +73,7 @@ struct cache_detail_pipefs { struct cache_detail { struct module * owner; int hash_size; - struct cache_head ** hash_table; + struct hlist_head * hash_table; rwlock_t hash_lock; atomic_t inuse; /* active user-space update or lookup */ @@ -224,6 +224,11 @@ extern int sunrpc_cache_register_pipefs(struct dentry *parent, const char *, umode_t, struct cache_detail *); extern void sunrpc_cache_unregister_pipefs(struct cache_detail *); +/* Must store cache_detail in seq_file->private if using next three functions */ +extern void *cache_seq_start(struct seq_file *file, loff_t *pos); +extern void *cache_seq_next(struct seq_file *file, void *p, loff_t *pos); +extern void cache_seq_stop(struct seq_file *file, void *p); + extern void qword_add(char **bpp, int *lp, char *str); extern void qword_addhex(char **bpp, int *lp, char *buf, int blen); extern int qword_get(char **bpp, char *dest, int bufsize); diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index fae6fb947fc8..cc0fc712bb82 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -19,11 +19,6 @@ #include #include -/* - * This is the RPC server thread function prototype - */ -typedef int (*svc_thread_fn)(void *); - /* statistics for svc_pool structures */ struct svc_pool_stats { atomic_long_t packets; @@ -54,6 +49,25 @@ struct svc_pool { unsigned long sp_flags; } ____cacheline_aligned_in_smp; +struct svc_serv; + +struct svc_serv_ops { + /* Callback to use when last thread exits. */ + void (*svo_shutdown)(struct svc_serv *, struct net *); + + /* function for service threads to run */ + int (*svo_function)(void *); + + /* queue up a transport for servicing */ + void (*svo_enqueue_xprt)(struct svc_xprt *); + + /* set up thread (or whatever) execution context */ + int (*svo_setup)(struct svc_serv *, struct svc_pool *, int); + + /* optional module to count when adding threads (pooled svcs only) */ + struct module *svo_module; +}; + /* * RPC service. * @@ -85,16 +99,7 @@ struct svc_serv { unsigned int sv_nrpools; /* number of thread pools */ struct svc_pool * sv_pools; /* array of thread pools */ - - void (*sv_shutdown)(struct svc_serv *serv, - struct net *net); - /* Callback to use when last thread - * exits. - */ - - struct module * sv_module; /* optional module to count when - * adding threads */ - svc_thread_fn sv_function; /* main function for threads */ + struct svc_serv_ops *sv_ops; /* server operations */ #if defined(CONFIG_SUNRPC_BACKCHANNEL) struct list_head sv_cb_list; /* queue for callback requests * that arrive over the same @@ -422,6 +427,29 @@ struct svc_procedure { unsigned int pc_xdrressize; /* maximum size of XDR reply */ }; +/* + * Mode for mapping cpus to pools. + */ +enum { + SVC_POOL_AUTO = -1, /* choose one of the others */ + SVC_POOL_GLOBAL, /* no mapping, just a single global pool + * (legacy & UP mode) */ + SVC_POOL_PERCPU, /* one pool per cpu */ + SVC_POOL_PERNODE /* one pool per numa node */ +}; + +struct svc_pool_map { + int count; /* How many svc_servs use us */ + int mode; /* Note: int not enum to avoid + * warnings about "enumeration value + * not handled in switch" */ + unsigned int npools; + unsigned int *pool_to; /* maps pool id to cpu or node */ + unsigned int *to_pool; /* maps cpu or node to pool id */ +}; + +extern struct svc_pool_map svc_pool_map; + /* * Function prototypes. */ @@ -429,13 +457,17 @@ int svc_rpcb_setup(struct svc_serv *serv, struct net *net); void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net); int svc_bind(struct svc_serv *serv, struct net *net); struct svc_serv *svc_create(struct svc_program *, unsigned int, - void (*shutdown)(struct svc_serv *, struct net *net)); + struct svc_serv_ops *); +struct svc_rqst *svc_rqst_alloc(struct svc_serv *serv, + struct svc_pool *pool, int node); struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node); +void svc_rqst_free(struct svc_rqst *); void svc_exit_thread(struct svc_rqst *); +unsigned int svc_pool_map_get(void); +void svc_pool_map_put(void); struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, - void (*shutdown)(struct svc_serv *, struct net *net), - svc_thread_fn, struct module *); + struct svc_serv_ops *); int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); int svc_pool_stats_open(struct svc_serv *serv, struct file *file); void svc_destroy(struct svc_serv *); diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index cb94ee4181d4..d5ee6d8b7c58 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -172,13 +172,6 @@ struct svcxprt_rdma { #define RDMAXPRT_SQ_PENDING 2 #define RDMAXPRT_CONN_PENDING 3 -#define RPCRDMA_MAX_SVC_SEGS (64) /* server max scatter/gather */ -#if RPCSVC_MAXPAYLOAD < (RPCRDMA_MAX_SVC_SEGS << PAGE_SHIFT) -#define RPCRDMA_MAXPAYLOAD RPCSVC_MAXPAYLOAD -#else -#define RPCRDMA_MAXPAYLOAD (RPCRDMA_MAX_SVC_SEGS << PAGE_SHIFT) -#endif - #define RPCRDMA_LISTEN_BACKLOG 10 /* The default ORD value is based on two outstanding full-size writes with a * page size of 4k, or 32k * 2 ops / 4k = 16 outstanding RDMA_READ. */ @@ -187,6 +180,8 @@ struct svcxprt_rdma { #define RPCRDMA_MAX_REQUESTS 32 #define RPCRDMA_MAX_REQ_SIZE 4096 +#define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD + /* svc_rdma_marshal.c */ extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *); extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *, @@ -213,6 +208,8 @@ extern int rdma_read_chunk_frmr(struct svcxprt_rdma *, struct svc_rqst *, /* svc_rdma_sendto.c */ extern int svc_rdma_sendto(struct svc_rqst *); +extern struct rpcrdma_read_chunk * + svc_rdma_get_read_chunk(struct rpcrdma_msg *); /* svc_rdma_transport.c */ extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *); @@ -225,7 +222,6 @@ extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int); extern void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt); extern struct svc_rdma_req_map *svc_rdma_get_req_map(void); extern void svc_rdma_put_req_map(struct svc_rdma_req_map *); -extern int svc_rdma_fastreg(struct svcxprt_rdma *, struct svc_rdma_fastreg_mr *); extern struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *); extern void svc_rdma_put_frmr(struct svcxprt_rdma *, struct svc_rdma_fastreg_mr *); @@ -238,83 +234,4 @@ extern void svc_rdma_prep_reply_hdr(struct svc_rqst *); extern int svc_rdma_init(void); extern void svc_rdma_cleanup(void); -/* - * Returns the address of the first read chunk or if no read chunk is - * present - */ -static inline struct rpcrdma_read_chunk * -svc_rdma_get_read_chunk(struct rpcrdma_msg *rmsgp) -{ - struct rpcrdma_read_chunk *ch = - (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; - - if (ch->rc_discrim == 0) - return NULL; - - return ch; -} - -/* - * Returns the address of the first read write array element or if no - * write array list is present - */ -static inline struct rpcrdma_write_array * -svc_rdma_get_write_array(struct rpcrdma_msg *rmsgp) -{ - if (rmsgp->rm_body.rm_chunks[0] != 0 - || rmsgp->rm_body.rm_chunks[1] == 0) - return NULL; - - return (struct rpcrdma_write_array *)&rmsgp->rm_body.rm_chunks[1]; -} - -/* - * Returns the address of the first reply array element or if no - * reply array is present - */ -static inline struct rpcrdma_write_array * -svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp) -{ - struct rpcrdma_read_chunk *rch; - struct rpcrdma_write_array *wr_ary; - struct rpcrdma_write_array *rp_ary; - - /* XXX: Need to fix when reply list may occur with read-list and/or - * write list */ - if (rmsgp->rm_body.rm_chunks[0] != 0 || - rmsgp->rm_body.rm_chunks[1] != 0) - return NULL; - - rch = svc_rdma_get_read_chunk(rmsgp); - if (rch) { - while (rch->rc_discrim) - rch++; - - /* The reply list follows an empty write array located - * at 'rc_position' here. The reply array is at rc_target. - */ - rp_ary = (struct rpcrdma_write_array *)&rch->rc_target; - - goto found_it; - } - - wr_ary = svc_rdma_get_write_array(rmsgp); - if (wr_ary) { - rp_ary = (struct rpcrdma_write_array *) - &wr_ary-> - wc_array[ntohl(wr_ary->wc_nchunks)].wc_target.rs_length; - - goto found_it; - } - - /* No read list, no write list */ - rp_ary = (struct rpcrdma_write_array *) - &rmsgp->rm_body.rm_chunks[2]; - - found_it: - if (rp_ary->wc_discrim == 0) - return NULL; - - return rp_ary; -} #endif diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 79f6f8f3dc0a..78512cfe1fe6 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -116,6 +116,7 @@ void svc_xprt_init(struct net *, struct svc_xprt_class *, struct svc_xprt *, struct svc_serv *); int svc_create_xprt(struct svc_serv *, const char *, struct net *, const int, const unsigned short, int); +void svc_xprt_do_enqueue(struct svc_xprt *xprt); void svc_xprt_enqueue(struct svc_xprt *xprt); void svc_xprt_put(struct svc_xprt *xprt); void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt); diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index fd1a02cb3c82..003dca933803 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -529,18 +529,21 @@ TRACE_EVENT(svc_xprt_do_enqueue, TP_STRUCT__entry( __field(struct svc_xprt *, xprt) - __field(struct svc_rqst *, rqst) + __field_struct(struct sockaddr_storage, ss) + __field(int, pid) + __field(unsigned long, flags) ), TP_fast_assign( __entry->xprt = xprt; - __entry->rqst = rqst; + xprt ? memcpy(&__entry->ss, &xprt->xpt_remote, sizeof(__entry->ss)) : memset(&__entry->ss, 0, sizeof(__entry->ss)); + __entry->pid = rqst? rqst->rq_task->pid : 0; + __entry->flags = xprt ? xprt->xpt_flags : 0; ), TP_printk("xprt=0x%p addr=%pIScp pid=%d flags=%s", __entry->xprt, - (struct sockaddr *)&__entry->xprt->xpt_remote, - __entry->rqst ? __entry->rqst->rq_task->pid : 0, - show_svc_xprt_flags(__entry->xprt->xpt_flags)) + (struct sockaddr *)&__entry->ss, + __entry->pid, show_svc_xprt_flags(__entry->flags)) ); TRACE_EVENT(svc_xprt_dequeue, @@ -589,16 +592,20 @@ TRACE_EVENT(svc_handle_xprt, TP_STRUCT__entry( __field(struct svc_xprt *, xprt) __field(int, len) + __field_struct(struct sockaddr_storage, ss) + __field(unsigned long, flags) ), TP_fast_assign( __entry->xprt = xprt; + xprt ? memcpy(&__entry->ss, &xprt->xpt_remote, sizeof(__entry->ss)) : memset(&__entry->ss, 0, sizeof(__entry->ss)); __entry->len = len; + __entry->flags = xprt ? xprt->xpt_flags : 0; ), TP_printk("xprt=0x%p addr=%pIScp len=%d flags=%s", __entry->xprt, - (struct sockaddr *)&__entry->xprt->xpt_remote, __entry->len, - show_svc_xprt_flags(__entry->xprt->xpt_flags)) + (struct sockaddr *)&__entry->ss, + __entry->len, show_svc_xprt_flags(__entry->flags)) ); #endif /* _TRACE_SUNRPC_H */ diff --git a/include/uapi/linux/nfsacl.h b/include/uapi/linux/nfsacl.h index 9bb9771a107f..552726631162 100644 --- a/include/uapi/linux/nfsacl.h +++ b/include/uapi/linux/nfsacl.h @@ -22,6 +22,7 @@ #define NFS_ACLCNT 0x0002 #define NFS_DFACL 0x0004 #define NFS_DFACLCNT 0x0008 +#define NFS_ACL_MASK 0x000f /* Flag for Default ACL entries */ #define NFS_ACL_DEFAULT 0x1000 diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 2928afffbb81..4a2340a54401 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -44,7 +44,7 @@ static void cache_revisit_request(struct cache_head *item); static void cache_init(struct cache_head *h) { time_t now = seconds_since_boot(); - h->next = NULL; + INIT_HLIST_NODE(&h->cache_list); h->flags = 0; kref_init(&h->ref); h->expiry_time = now + CACHE_NEW_EXPIRY; @@ -54,15 +54,14 @@ static void cache_init(struct cache_head *h) struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, struct cache_head *key, int hash) { - struct cache_head **head, **hp; - struct cache_head *new = NULL, *freeme = NULL; + struct cache_head *new = NULL, *freeme = NULL, *tmp = NULL; + struct hlist_head *head; head = &detail->hash_table[hash]; read_lock(&detail->hash_lock); - for (hp=head; *hp != NULL ; hp = &(*hp)->next) { - struct cache_head *tmp = *hp; + hlist_for_each_entry(tmp, head, cache_list) { if (detail->match(tmp, key)) { if (cache_is_expired(detail, tmp)) /* This entry is expired, we will discard it. */ @@ -88,12 +87,10 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, write_lock(&detail->hash_lock); /* check if entry appeared while we slept */ - for (hp=head; *hp != NULL ; hp = &(*hp)->next) { - struct cache_head *tmp = *hp; + hlist_for_each_entry(tmp, head, cache_list) { if (detail->match(tmp, key)) { if (cache_is_expired(detail, tmp)) { - *hp = tmp->next; - tmp->next = NULL; + hlist_del_init(&tmp->cache_list); detail->entries --; freeme = tmp; break; @@ -104,8 +101,8 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, return tmp; } } - new->next = *head; - *head = new; + + hlist_add_head(&new->cache_list, head); detail->entries++; cache_get(new); write_unlock(&detail->hash_lock); @@ -143,7 +140,6 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail, * If 'old' is not VALID, we update it directly, * otherwise we need to replace it */ - struct cache_head **head; struct cache_head *tmp; if (!test_bit(CACHE_VALID, &old->flags)) { @@ -168,15 +164,13 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail, } cache_init(tmp); detail->init(tmp, old); - head = &detail->hash_table[hash]; write_lock(&detail->hash_lock); if (test_bit(CACHE_NEGATIVE, &new->flags)) set_bit(CACHE_NEGATIVE, &tmp->flags); else detail->update(tmp, new); - tmp->next = *head; - *head = tmp; + hlist_add_head(&tmp->cache_list, &detail->hash_table[hash]); detail->entries++; cache_get(tmp); cache_fresh_locked(tmp, new->expiry_time); @@ -416,28 +410,29 @@ static int cache_clean(void) /* find a non-empty bucket in the table */ while (current_detail && current_index < current_detail->hash_size && - current_detail->hash_table[current_index] == NULL) + hlist_empty(¤t_detail->hash_table[current_index])) current_index++; /* find a cleanable entry in the bucket and clean it, or set to next bucket */ if (current_detail && current_index < current_detail->hash_size) { - struct cache_head *ch, **cp; + struct cache_head *ch = NULL; struct cache_detail *d; + struct hlist_head *head; + struct hlist_node *tmp; write_lock(¤t_detail->hash_lock); /* Ok, now to clean this strand */ - cp = & current_detail->hash_table[current_index]; - for (ch = *cp ; ch ; cp = & ch->next, ch = *cp) { + head = ¤t_detail->hash_table[current_index]; + hlist_for_each_entry_safe(ch, tmp, head, cache_list) { if (current_detail->nextcheck > ch->expiry_time) current_detail->nextcheck = ch->expiry_time+1; if (!cache_is_expired(current_detail, ch)) continue; - *cp = ch->next; - ch->next = NULL; + hlist_del_init(&ch->cache_list); current_detail->entries--; rv = 1; break; @@ -1270,18 +1265,13 @@ EXPORT_SYMBOL_GPL(qword_get); * get a header, then pass each real item in the cache */ -struct handle { - struct cache_detail *cd; -}; - -static void *c_start(struct seq_file *m, loff_t *pos) +void *cache_seq_start(struct seq_file *m, loff_t *pos) __acquires(cd->hash_lock) { loff_t n = *pos; unsigned int hash, entry; struct cache_head *ch; - struct cache_detail *cd = ((struct handle*)m->private)->cd; - + struct cache_detail *cd = m->private; read_lock(&cd->hash_lock); if (!n--) @@ -1289,7 +1279,7 @@ static void *c_start(struct seq_file *m, loff_t *pos) hash = n >> 32; entry = n & ((1LL<<32) - 1); - for (ch=cd->hash_table[hash]; ch; ch=ch->next) + hlist_for_each_entry(ch, &cd->hash_table[hash], cache_list) if (!entry--) return ch; n &= ~((1LL<<32) - 1); @@ -1297,51 +1287,57 @@ static void *c_start(struct seq_file *m, loff_t *pos) hash++; n += 1LL<<32; } while(hash < cd->hash_size && - cd->hash_table[hash]==NULL); + hlist_empty(&cd->hash_table[hash])); if (hash >= cd->hash_size) return NULL; *pos = n+1; - return cd->hash_table[hash]; + return hlist_entry_safe(cd->hash_table[hash].first, + struct cache_head, cache_list); } +EXPORT_SYMBOL_GPL(cache_seq_start); -static void *c_next(struct seq_file *m, void *p, loff_t *pos) +void *cache_seq_next(struct seq_file *m, void *p, loff_t *pos) { struct cache_head *ch = p; int hash = (*pos >> 32); - struct cache_detail *cd = ((struct handle*)m->private)->cd; + struct cache_detail *cd = m->private; if (p == SEQ_START_TOKEN) hash = 0; - else if (ch->next == NULL) { + else if (ch->cache_list.next == NULL) { hash++; *pos += 1LL<<32; } else { ++*pos; - return ch->next; + return hlist_entry_safe(ch->cache_list.next, + struct cache_head, cache_list); } *pos &= ~((1LL<<32) - 1); while (hash < cd->hash_size && - cd->hash_table[hash] == NULL) { + hlist_empty(&cd->hash_table[hash])) { hash++; *pos += 1LL<<32; } if (hash >= cd->hash_size) return NULL; ++*pos; - return cd->hash_table[hash]; + return hlist_entry_safe(cd->hash_table[hash].first, + struct cache_head, cache_list); } +EXPORT_SYMBOL_GPL(cache_seq_next); -static void c_stop(struct seq_file *m, void *p) +void cache_seq_stop(struct seq_file *m, void *p) __releases(cd->hash_lock) { - struct cache_detail *cd = ((struct handle*)m->private)->cd; + struct cache_detail *cd = m->private; read_unlock(&cd->hash_lock); } +EXPORT_SYMBOL_GPL(cache_seq_stop); static int c_show(struct seq_file *m, void *p) { struct cache_head *cp = p; - struct cache_detail *cd = ((struct handle*)m->private)->cd; + struct cache_detail *cd = m->private; if (p == SEQ_START_TOKEN) return cd->cache_show(m, cd, NULL); @@ -1364,33 +1360,36 @@ static int c_show(struct seq_file *m, void *p) } static const struct seq_operations cache_content_op = { - .start = c_start, - .next = c_next, - .stop = c_stop, + .start = cache_seq_start, + .next = cache_seq_next, + .stop = cache_seq_stop, .show = c_show, }; static int content_open(struct inode *inode, struct file *file, struct cache_detail *cd) { - struct handle *han; + struct seq_file *seq; + int err; if (!cd || !try_module_get(cd->owner)) return -EACCES; - han = __seq_open_private(file, &cache_content_op, sizeof(*han)); - if (han == NULL) { + + err = seq_open(file, &cache_content_op); + if (err) { module_put(cd->owner); - return -ENOMEM; + return err; } - han->cd = cd; + seq = file->private_data; + seq->private = cd; return 0; } static int content_release(struct inode *inode, struct file *file, struct cache_detail *cd) { - int ret = seq_release_private(inode, file); + int ret = seq_release(inode, file); module_put(cd->owner); return ret; } @@ -1665,17 +1664,21 @@ EXPORT_SYMBOL_GPL(cache_unregister_net); struct cache_detail *cache_create_net(struct cache_detail *tmpl, struct net *net) { struct cache_detail *cd; + int i; cd = kmemdup(tmpl, sizeof(struct cache_detail), GFP_KERNEL); if (cd == NULL) return ERR_PTR(-ENOMEM); - cd->hash_table = kzalloc(cd->hash_size * sizeof(struct cache_head *), + cd->hash_table = kzalloc(cd->hash_size * sizeof(struct hlist_head), GFP_KERNEL); if (cd->hash_table == NULL) { kfree(cd); return ERR_PTR(-ENOMEM); } + + for (i = 0; i < cd->hash_size; i++) + INIT_HLIST_HEAD(&cd->hash_table[i]); cd->net = net; return cd; } diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 5a16d8d8c831..a8f579df14d8 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -34,36 +34,19 @@ static void svc_unregister(const struct svc_serv *serv, struct net *net); -#define svc_serv_is_pooled(serv) ((serv)->sv_function) +#define svc_serv_is_pooled(serv) ((serv)->sv_ops->svo_function) -/* - * Mode for mapping cpus to pools. - */ -enum { - SVC_POOL_AUTO = -1, /* choose one of the others */ - SVC_POOL_GLOBAL, /* no mapping, just a single global pool - * (legacy & UP mode) */ - SVC_POOL_PERCPU, /* one pool per cpu */ - SVC_POOL_PERNODE /* one pool per numa node */ -}; #define SVC_POOL_DEFAULT SVC_POOL_GLOBAL /* * Structure for mapping cpus to pools and vice versa. * Setup once during sunrpc initialisation. */ -static struct svc_pool_map { - int count; /* How many svc_servs use us */ - int mode; /* Note: int not enum to avoid - * warnings about "enumeration value - * not handled in switch" */ - unsigned int npools; - unsigned int *pool_to; /* maps pool id to cpu or node */ - unsigned int *to_pool; /* maps cpu or node to pool id */ -} svc_pool_map = { - .count = 0, +struct svc_pool_map svc_pool_map = { .mode = SVC_POOL_DEFAULT }; +EXPORT_SYMBOL_GPL(svc_pool_map); + static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */ static int @@ -236,7 +219,7 @@ svc_pool_map_init_pernode(struct svc_pool_map *m) * vice versa). Initialise the map if we're the first user. * Returns the number of pools. */ -static unsigned int +unsigned int svc_pool_map_get(void) { struct svc_pool_map *m = &svc_pool_map; @@ -271,7 +254,7 @@ svc_pool_map_get(void) mutex_unlock(&svc_pool_map_mutex); return m->npools; } - +EXPORT_SYMBOL_GPL(svc_pool_map_get); /* * Drop a reference to the global map of cpus to pools. @@ -280,7 +263,7 @@ svc_pool_map_get(void) * mode using the pool_mode module option without * rebooting or re-loading sunrpc.ko. */ -static void +void svc_pool_map_put(void) { struct svc_pool_map *m = &svc_pool_map; @@ -297,7 +280,7 @@ svc_pool_map_put(void) mutex_unlock(&svc_pool_map_mutex); } - +EXPORT_SYMBOL_GPL(svc_pool_map_put); static int svc_pool_map_get_node(unsigned int pidx) { @@ -423,7 +406,7 @@ EXPORT_SYMBOL_GPL(svc_bind); */ static struct svc_serv * __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, - void (*shutdown)(struct svc_serv *serv, struct net *net)) + struct svc_serv_ops *ops) { struct svc_serv *serv; unsigned int vers; @@ -440,7 +423,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, bufsize = RPCSVC_MAXPAYLOAD; serv->sv_max_payload = bufsize? bufsize : 4096; serv->sv_max_mesg = roundup(serv->sv_max_payload + PAGE_SIZE, PAGE_SIZE); - serv->sv_shutdown = shutdown; + serv->sv_ops = ops; xdrsize = 0; while (prog) { prog->pg_lovers = prog->pg_nvers-1; @@ -486,26 +469,22 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, struct svc_serv * svc_create(struct svc_program *prog, unsigned int bufsize, - void (*shutdown)(struct svc_serv *serv, struct net *net)) + struct svc_serv_ops *ops) { - return __svc_create(prog, bufsize, /*npools*/1, shutdown); + return __svc_create(prog, bufsize, /*npools*/1, ops); } EXPORT_SYMBOL_GPL(svc_create); struct svc_serv * svc_create_pooled(struct svc_program *prog, unsigned int bufsize, - void (*shutdown)(struct svc_serv *serv, struct net *net), - svc_thread_fn func, struct module *mod) + struct svc_serv_ops *ops) { struct svc_serv *serv; unsigned int npools = svc_pool_map_get(); - serv = __svc_create(prog, bufsize, npools, shutdown); + serv = __svc_create(prog, bufsize, npools, ops); if (!serv) goto out_err; - - serv->sv_function = func; - serv->sv_module = mod; return serv; out_err: svc_pool_map_put(); @@ -517,8 +496,8 @@ void svc_shutdown_net(struct svc_serv *serv, struct net *net) { svc_close_net(serv, net); - if (serv->sv_shutdown) - serv->sv_shutdown(serv, net); + if (serv->sv_ops->svo_shutdown) + serv->sv_ops->svo_shutdown(serv, net); } EXPORT_SYMBOL_GPL(svc_shutdown_net); @@ -604,40 +583,52 @@ svc_release_buffer(struct svc_rqst *rqstp) } struct svc_rqst * -svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) +svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node) { struct svc_rqst *rqstp; rqstp = kzalloc_node(sizeof(*rqstp), GFP_KERNEL, node); if (!rqstp) - goto out_enomem; + return rqstp; - serv->sv_nrthreads++; __set_bit(RQ_BUSY, &rqstp->rq_flags); spin_lock_init(&rqstp->rq_lock); rqstp->rq_server = serv; rqstp->rq_pool = pool; + + rqstp->rq_argp = kmalloc_node(serv->sv_xdrsize, GFP_KERNEL, node); + if (!rqstp->rq_argp) + goto out_enomem; + + rqstp->rq_resp = kmalloc_node(serv->sv_xdrsize, GFP_KERNEL, node); + if (!rqstp->rq_resp) + goto out_enomem; + + if (!svc_init_buffer(rqstp, serv->sv_max_mesg, node)) + goto out_enomem; + + return rqstp; +out_enomem: + svc_rqst_free(rqstp); + return NULL; +} +EXPORT_SYMBOL_GPL(svc_rqst_alloc); + +struct svc_rqst * +svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) +{ + struct svc_rqst *rqstp; + + rqstp = svc_rqst_alloc(serv, pool, node); + if (!rqstp) + return ERR_PTR(-ENOMEM); + + serv->sv_nrthreads++; spin_lock_bh(&pool->sp_lock); pool->sp_nrthreads++; list_add_rcu(&rqstp->rq_all, &pool->sp_all_threads); spin_unlock_bh(&pool->sp_lock); - - rqstp->rq_argp = kmalloc_node(serv->sv_xdrsize, GFP_KERNEL, node); - if (!rqstp->rq_argp) - goto out_thread; - - rqstp->rq_resp = kmalloc_node(serv->sv_xdrsize, GFP_KERNEL, node); - if (!rqstp->rq_resp) - goto out_thread; - - if (!svc_init_buffer(rqstp, serv->sv_max_mesg, node)) - goto out_thread; - return rqstp; -out_thread: - svc_exit_thread(rqstp); -out_enomem: - return ERR_PTR(-ENOMEM); } EXPORT_SYMBOL_GPL(svc_prepare_thread); @@ -739,12 +730,12 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) break; } - __module_get(serv->sv_module); - task = kthread_create_on_node(serv->sv_function, rqstp, + __module_get(serv->sv_ops->svo_module); + task = kthread_create_on_node(serv->sv_ops->svo_function, rqstp, node, "%s", serv->sv_name); if (IS_ERR(task)) { error = PTR_ERR(task); - module_put(serv->sv_module); + module_put(serv->sv_ops->svo_module); svc_exit_thread(rqstp); break; } @@ -772,15 +763,21 @@ EXPORT_SYMBOL_GPL(svc_set_num_threads); * mutex" for the service. */ void -svc_exit_thread(struct svc_rqst *rqstp) +svc_rqst_free(struct svc_rqst *rqstp) { - struct svc_serv *serv = rqstp->rq_server; - struct svc_pool *pool = rqstp->rq_pool; - svc_release_buffer(rqstp); kfree(rqstp->rq_resp); kfree(rqstp->rq_argp); kfree(rqstp->rq_auth_data); + kfree_rcu(rqstp, rq_rcu_head); +} +EXPORT_SYMBOL_GPL(svc_rqst_free); + +void +svc_exit_thread(struct svc_rqst *rqstp) +{ + struct svc_serv *serv = rqstp->rq_server; + struct svc_pool *pool = rqstp->rq_pool; spin_lock_bh(&pool->sp_lock); pool->sp_nrthreads--; @@ -788,7 +785,7 @@ svc_exit_thread(struct svc_rqst *rqstp) list_del_rcu(&rqstp->rq_all); spin_unlock_bh(&pool->sp_lock); - kfree_rcu(rqstp, rq_rcu_head); + svc_rqst_free(rqstp); /* Release the server */ if (serv) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 163ac45c3639..a6cbb2104667 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -24,7 +24,6 @@ static int svc_deferred_recv(struct svc_rqst *rqstp); static struct cache_deferred_req *svc_defer(struct cache_req *req); static void svc_age_temp_xprts(unsigned long closure); static void svc_delete_xprt(struct svc_xprt *xprt); -static void svc_xprt_do_enqueue(struct svc_xprt *xprt); /* apparently the "standard" is that clients close * idle connections after 5 minutes, servers after @@ -225,12 +224,12 @@ static void svc_xprt_received(struct svc_xprt *xprt) } /* As soon as we clear busy, the xprt could be closed and - * 'put', so we need a reference to call svc_xprt_do_enqueue with: + * 'put', so we need a reference to call svc_enqueue_xprt with: */ svc_xprt_get(xprt); smp_mb__before_atomic(); clear_bit(XPT_BUSY, &xprt->xpt_flags); - svc_xprt_do_enqueue(xprt); + xprt->xpt_server->sv_ops->svo_enqueue_xprt(xprt); svc_xprt_put(xprt); } @@ -320,7 +319,7 @@ static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt) return false; } -static void svc_xprt_do_enqueue(struct svc_xprt *xprt) +void svc_xprt_do_enqueue(struct svc_xprt *xprt) { struct svc_pool *pool; struct svc_rqst *rqstp = NULL; @@ -402,6 +401,7 @@ redo_search: out: trace_svc_xprt_do_enqueue(xprt, rqstp); } +EXPORT_SYMBOL_GPL(svc_xprt_do_enqueue); /* * Queue up a transport with data pending. If there are idle nfsd @@ -412,7 +412,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) { if (test_bit(XPT_BUSY, &xprt->xpt_flags)) return; - svc_xprt_do_enqueue(xprt); + xprt->xpt_server->sv_ops->svo_enqueue_xprt(xprt); } EXPORT_SYMBOL_GPL(svc_xprt_enqueue); diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index d25cd430f9ff..1dfae8317065 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -136,6 +136,79 @@ static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt, return dma_addr; } +/* Returns the address of the first read chunk or if no read chunk + * is present + */ +struct rpcrdma_read_chunk * +svc_rdma_get_read_chunk(struct rpcrdma_msg *rmsgp) +{ + struct rpcrdma_read_chunk *ch = + (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; + + if (ch->rc_discrim == xdr_zero) + return NULL; + return ch; +} + +/* Returns the address of the first read write array element or + * if no write array list is present + */ +static struct rpcrdma_write_array * +svc_rdma_get_write_array(struct rpcrdma_msg *rmsgp) +{ + if (rmsgp->rm_body.rm_chunks[0] != xdr_zero || + rmsgp->rm_body.rm_chunks[1] == xdr_zero) + return NULL; + return (struct rpcrdma_write_array *)&rmsgp->rm_body.rm_chunks[1]; +} + +/* Returns the address of the first reply array element or if no + * reply array is present + */ +static struct rpcrdma_write_array * +svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp) +{ + struct rpcrdma_read_chunk *rch; + struct rpcrdma_write_array *wr_ary; + struct rpcrdma_write_array *rp_ary; + + /* XXX: Need to fix when reply chunk may occur with read list + * and/or write list. + */ + if (rmsgp->rm_body.rm_chunks[0] != xdr_zero || + rmsgp->rm_body.rm_chunks[1] != xdr_zero) + return NULL; + + rch = svc_rdma_get_read_chunk(rmsgp); + if (rch) { + while (rch->rc_discrim != xdr_zero) + rch++; + + /* The reply chunk follows an empty write array located + * at 'rc_position' here. The reply array is at rc_target. + */ + rp_ary = (struct rpcrdma_write_array *)&rch->rc_target; + goto found_it; + } + + wr_ary = svc_rdma_get_write_array(rmsgp); + if (wr_ary) { + int chunk = be32_to_cpu(wr_ary->wc_nchunks); + + rp_ary = (struct rpcrdma_write_array *) + &wr_ary->wc_array[chunk].wc_target.rs_length; + goto found_it; + } + + /* No read list, no write list */ + rp_ary = (struct rpcrdma_write_array *)&rmsgp->rm_body.rm_chunks[2]; + + found_it: + if (rp_ary->wc_discrim == xdr_zero) + return NULL; + return rp_ary; +} + /* Assumptions: * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE */ @@ -384,6 +457,7 @@ static int send_reply(struct svcxprt_rdma *rdma, int byte_count) { struct ib_send_wr send_wr; + u32 xdr_off; int sge_no; int sge_bytes; int page_no; @@ -418,8 +492,8 @@ static int send_reply(struct svcxprt_rdma *rdma, ctxt->direction = DMA_TO_DEVICE; /* Map the payload indicated by 'byte_count' */ + xdr_off = 0; for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) { - int xdr_off = 0; sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count); byte_count -= sge_bytes; ctxt->sge[sge_no].addr = @@ -457,6 +531,13 @@ static int send_reply(struct svcxprt_rdma *rdma, } rqstp->rq_next_page = rqstp->rq_respages + 1; + /* The loop above bumps sc_dma_used for each sge. The + * xdr_buf.tail gets a separate sge, but resides in the + * same page as xdr_buf.head. Don't count it twice. + */ + if (sge_no > ctxt->count) + atomic_dec(&rdma->sc_dma_used); + if (sge_no > rdma->sc_max_sge) { pr_err("svcrdma: Too many sges (%d)\n", sge_no); goto err; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 6b36279e4288..21e40365042c 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -91,7 +91,7 @@ struct svc_xprt_class svc_rdma_class = { .xcl_name = "rdma", .xcl_owner = THIS_MODULE, .xcl_ops = &svc_rdma_ops, - .xcl_max_payload = RPCRDMA_MAXPAYLOAD, + .xcl_max_payload = RPCSVC_MAXPAYLOAD_RDMA, .xcl_ident = XPRT_TRANSPORT_RDMA, }; @@ -659,6 +659,7 @@ static int rdma_cma_handler(struct rdma_cm_id *cma_id, if (xprt) { set_bit(XPT_CLOSE, &xprt->xpt_flags); svc_xprt_enqueue(xprt); + svc_xprt_put(xprt); } break; default: @@ -1201,40 +1202,6 @@ static int svc_rdma_secure_port(struct svc_rqst *rqstp) return 1; } -/* - * Attempt to register the kvec representing the RPC memory with the - * device. - * - * Returns: - * NULL : The device does not support fastreg or there were no more - * fastreg mr. - * frmr : The kvec register request was successfully posted. - * <0 : An error was encountered attempting to register the kvec. - */ -int svc_rdma_fastreg(struct svcxprt_rdma *xprt, - struct svc_rdma_fastreg_mr *frmr) -{ - struct ib_send_wr fastreg_wr; - u8 key; - - /* Bump the key */ - key = (u8)(frmr->mr->lkey & 0x000000FF); - ib_update_fast_reg_key(frmr->mr, ++key); - - /* Prepare FASTREG WR */ - memset(&fastreg_wr, 0, sizeof fastreg_wr); - fastreg_wr.opcode = IB_WR_FAST_REG_MR; - fastreg_wr.send_flags = IB_SEND_SIGNALED; - fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva; - fastreg_wr.wr.fast_reg.page_list = frmr->page_list; - fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len; - fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; - fastreg_wr.wr.fast_reg.length = frmr->map_len; - fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags; - fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey; - return svc_rdma_send(xprt, &fastreg_wr); -} - int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) { struct ib_send_wr *bad_wr, *n_wr; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index f49dd8b38122..e718d0959af3 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -51,7 +51,6 @@ #include /* rpc_xprt */ #include /* RPC/RDMA protocol */ #include /* xprt parameters */ -#include /* RPCSVC_MAXPAYLOAD */ #define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */ #define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */