From 0ab88ca4bcf18ba21058d8f19220f60afe0d34d8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 22 Mar 2019 15:07:11 +0100 Subject: [PATCH 01/23] nfsd: avoid uninitialized variable warning clang warns that 'contextlen' may be accessed without an initialization: fs/nfsd/nfs4xdr.c:2911:9: error: variable 'contextlen' is uninitialized when used here [-Werror,-Wuninitialized] contextlen); ^~~~~~~~~~ fs/nfsd/nfs4xdr.c:2424:16: note: initialize the variable 'contextlen' to silence this warning int contextlen; ^ = 0 Presumably this cannot happen, as FATTR4_WORD2_SECURITY_LABEL is set if CONFIG_NFSD_V4_SECURITY_LABEL is enabled. Adding another #ifdef like the other two in this function avoids the warning. Signed-off-by: Arnd Bergmann Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 3de42a729093..a3a3455826aa 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2420,8 +2420,10 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, __be32 status; int err; struct nfs4_acl *acl = NULL; +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL void *context = NULL; int contextlen; +#endif bool contextsupport = false; struct nfsd4_compoundres *resp = rqstp->rq_resp; u32 minorversion = resp->cstate.minorversion; @@ -2906,12 +2908,14 @@ out_acl: *p++ = cpu_to_be32(NFS4_CHANGE_TYPE_IS_TIME_METADATA); } +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) { status = nfsd4_encode_security_label(xdr, rqstp, context, contextlen); if (status) goto out; } +#endif attrlen = htonl(xdr->buf->len - attrlen_offset - 4); write_bytes_to_xdr_buf(xdr->buf, attrlen_offset, &attrlen, 4); From 9d69338c8c5fc68e9ae3ab23d01356ca024b789e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 22 Mar 2019 13:16:56 +1100 Subject: [PATCH 02/23] sunrpc/cache: handle missing listeners better. If no handler (such as rpc.mountd) has opened a cache 'channel', the sunrpc cache responds to all lookup requests with -ENOENT. This is particularly important for the auth.unix.gid cache which is optional. If the channel was open briefly and an upcall was written to it, this upcall remains pending even when the handler closes the channel. When an upcall is pending, the code currently doesn't check if there are still listeners, it only performs that check before sending an upcall. As the cache treads a recently closes channel (closed less than 30 seconds ago) as "potentially still open", there is a reasonable sized window when a request can become pending in a closed channel, and thereby block lookups indefinitely. This can easily be demonstrated by running cat /proc/net/rpc/auth.unix.gid/channel and then trying to mount an NFS filesystem from this host. It will block indefinitely (unless mountd is run with --manage-gids, or krb5 is used). When cache_check() finds that an upcall is pending, it should perform the "cache_listeners_exist()" exist test. If no listeners do exist, the request should be negated. With this change in place, there can still be a 30second wait on mount, until the cache gives up waiting for a handler to come back, but this is much better than an indefinite wait. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- net/sunrpc/cache.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 261131dfa1f1..d22328984853 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -40,6 +40,7 @@ static bool cache_defer_req(struct cache_req *req, struct cache_head *item); static void cache_revisit_request(struct cache_head *item); +static bool cache_listeners_exist(struct cache_detail *detail); static void cache_init(struct cache_head *h, struct cache_detail *detail) { @@ -306,7 +307,8 @@ int cache_check(struct cache_detail *detail, cache_fresh_unlocked(h, detail); break; } - } + } else if (!cache_listeners_exist(detail)) + rv = try_to_negate_entry(detail, h); } if (rv == -EAGAIN) { From 6b1891052a3f8e3c3217e8512bbed2fd6252977b Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Tue, 26 Mar 2019 18:06:26 -0400 Subject: [PATCH 03/23] nfsd: make nfs4_client_reclaim use an xdr_netobj instead of a fixed char array This will allow the reclaim_str_hashtbl to store either the recovery directory names used by the legacy client tracking code or the full client strings used by the nfsdcld client tracking code. Signed-off-by: Scott Mayhew Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4recover.c | 109 +++++++++++++++++++++++++++++++++++------- fs/nfsd/nfs4state.c | 30 ++++++------ fs/nfsd/state.h | 8 ++-- 3 files changed, 110 insertions(+), 37 deletions(-) diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 5188f9f70c78..2243b909b407 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -169,13 +169,34 @@ legacy_recdir_name_error(struct nfs4_client *clp, int error) } } +static void +__nfsd4_create_reclaim_record_grace(struct nfs4_client *clp, + const char *dname, int len, struct nfsd_net *nn) +{ + struct xdr_netobj name; + struct nfs4_client_reclaim *crp; + + name.data = kmemdup(dname, len, GFP_KERNEL); + if (!name.data) { + dprintk("%s: failed to allocate memory for name.data!\n", + __func__); + return; + } + name.len = len; + crp = nfs4_client_to_reclaim(name, nn); + if (!crp) { + kfree(name.data); + return; + } + crp->cr_clp = clp; +} + static void nfsd4_create_clid_dir(struct nfs4_client *clp) { const struct cred *original_cred; char dname[HEXDIR_LEN]; struct dentry *dir, *dentry; - struct nfs4_client_reclaim *crp; int status; struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); @@ -221,11 +242,9 @@ out_put: out_unlock: inode_unlock(d_inode(dir)); if (status == 0) { - if (nn->in_grace) { - crp = nfs4_client_to_reclaim(dname, nn); - if (crp) - crp->cr_clp = clp; - } + if (nn->in_grace) + __nfsd4_create_reclaim_record_grace(clp, dname, + HEXDIR_LEN, nn); vfs_fsync(nn->rec_file, 0); } else { printk(KERN_ERR "NFSD: failed to write recovery record" @@ -345,11 +364,30 @@ out_unlock: return status; } +static void +__nfsd4_remove_reclaim_record_grace(const char *dname, int len, + struct nfsd_net *nn) +{ + struct xdr_netobj name; + struct nfs4_client_reclaim *crp; + + name.data = kmemdup(dname, len, GFP_KERNEL); + if (!name.data) { + dprintk("%s: failed to allocate memory for name.data!\n", + __func__); + return; + } + name.len = len; + crp = nfsd4_find_reclaim_client(name, nn); + kfree(name.data); + if (crp) + nfs4_remove_reclaim_record(crp, nn); +} + static void nfsd4_remove_clid_dir(struct nfs4_client *clp) { const struct cred *original_cred; - struct nfs4_client_reclaim *crp; char dname[HEXDIR_LEN]; int status; struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); @@ -374,12 +412,9 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp) nfs4_reset_creds(original_cred); if (status == 0) { vfs_fsync(nn->rec_file, 0); - if (nn->in_grace) { - /* remove reclaim record */ - crp = nfsd4_find_reclaim_client(dname, nn); - if (crp) - nfs4_remove_reclaim_record(crp, nn); - } + if (nn->in_grace) + __nfsd4_remove_reclaim_record_grace(dname, + HEXDIR_LEN, nn); } out_drop_write: mnt_drop_write_file(nn->rec_file); @@ -393,14 +428,31 @@ static int purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn) { int status; + struct xdr_netobj name; - if (nfs4_has_reclaimed_state(child->d_name.name, nn)) + if (child->d_name.len != HEXDIR_LEN - 1) { + printk("%s: illegal name %pd in recovery directory\n", + __func__, child); + /* Keep trying; maybe the others are OK: */ return 0; + } + name.data = kmemdup_nul(child->d_name.name, child->d_name.len, GFP_KERNEL); + if (!name.data) { + dprintk("%s: failed to allocate memory for name.data!\n", + __func__); + goto out; + } + name.len = HEXDIR_LEN; + if (nfs4_has_reclaimed_state(name, nn)) + goto out_free; status = vfs_rmdir(d_inode(parent), child); if (status) printk("failed to remove client recovery directory %pd\n", child); +out_free: + kfree(name.data); +out: /* Keep trying, success or failure: */ return 0; } @@ -430,13 +482,24 @@ out: static int load_recdir(struct dentry *parent, struct dentry *child, struct nfsd_net *nn) { + struct xdr_netobj name; + if (child->d_name.len != HEXDIR_LEN - 1) { - printk("nfsd4: illegal name %pd in recovery directory\n", - child); + printk("%s: illegal name %pd in recovery directory\n", + __func__, child); /* Keep trying; maybe the others are OK: */ return 0; } - nfs4_client_to_reclaim(child->d_name.name, nn); + name.data = kmemdup_nul(child->d_name.name, child->d_name.len, GFP_KERNEL); + if (!name.data) { + dprintk("%s: failed to allocate memory for name.data!\n", + __func__); + goto out; + } + name.len = HEXDIR_LEN; + if (!nfs4_client_to_reclaim(name, nn)) + kfree(name.data); +out: return 0; } @@ -616,6 +679,7 @@ nfsd4_check_legacy_client(struct nfs4_client *clp) char dname[HEXDIR_LEN]; struct nfs4_client_reclaim *crp; struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + struct xdr_netobj name; /* did we already find that this client is stable? */ if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) @@ -628,13 +692,22 @@ nfsd4_check_legacy_client(struct nfs4_client *clp) } /* look for it in the reclaim hashtable otherwise */ - crp = nfsd4_find_reclaim_client(dname, nn); + name.data = kmemdup(dname, HEXDIR_LEN, GFP_KERNEL); + if (!name.data) { + dprintk("%s: failed to allocate memory for name.data!\n", + __func__); + goto out_enoent; + } + name.len = HEXDIR_LEN; + crp = nfsd4_find_reclaim_client(name, nn); + kfree(name.data); if (crp) { set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); crp->cr_clp = clp; return 0; } +out_enoent: return -ENOENT; } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index f056b1d3fecd..e601416aaf5f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1067,9 +1067,9 @@ static unsigned int clientid_hashval(u32 id) return id & CLIENT_HASH_MASK; } -static unsigned int clientstr_hashval(const char *name) +static unsigned int clientstr_hashval(struct xdr_netobj name) { - return opaque_hashval(name, 8) & CLIENT_HASH_MASK; + return opaque_hashval(name.data, 8) & CLIENT_HASH_MASK; } /* @@ -2048,11 +2048,6 @@ compare_blob(const struct xdr_netobj *o1, const struct xdr_netobj *o2) return memcmp(o1->data, o2->data, o1->len); } -static int same_name(const char *n1, const char *n2) -{ - return 0 == memcmp(n1, n2, HEXDIR_LEN); -} - static int same_verf(nfs4_verifier *v1, nfs4_verifier *v2) { @@ -6457,7 +6452,7 @@ alloc_reclaim(void) } bool -nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn) +nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn) { struct nfs4_client_reclaim *crp; @@ -6467,20 +6462,24 @@ nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn) /* * failure => all reset bets are off, nfserr_no_grace... + * + * The caller is responsible for freeing name.data if NULL is returned (it + * will be freed in nfs4_remove_reclaim_record in the normal case). */ struct nfs4_client_reclaim * -nfs4_client_to_reclaim(const char *name, struct nfsd_net *nn) +nfs4_client_to_reclaim(struct xdr_netobj name, struct nfsd_net *nn) { unsigned int strhashval; struct nfs4_client_reclaim *crp; - dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", HEXDIR_LEN, name); + dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", name.len, name.data); crp = alloc_reclaim(); if (crp) { strhashval = clientstr_hashval(name); INIT_LIST_HEAD(&crp->cr_strhash); list_add(&crp->cr_strhash, &nn->reclaim_str_hashtbl[strhashval]); - memcpy(crp->cr_recdir, name, HEXDIR_LEN); + crp->cr_name.data = name.data; + crp->cr_name.len = name.len; crp->cr_clp = NULL; nn->reclaim_str_hashtbl_size++; } @@ -6491,6 +6490,7 @@ void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *crp, struct nfsd_net *nn) { list_del(&crp->cr_strhash); + kfree(crp->cr_name.data); kfree(crp); nn->reclaim_str_hashtbl_size--; } @@ -6514,16 +6514,16 @@ nfs4_release_reclaim(struct nfsd_net *nn) /* * called from OPEN, CLAIM_PREVIOUS with a new clientid. */ struct nfs4_client_reclaim * -nfsd4_find_reclaim_client(const char *recdir, struct nfsd_net *nn) +nfsd4_find_reclaim_client(struct xdr_netobj name, struct nfsd_net *nn) { unsigned int strhashval; struct nfs4_client_reclaim *crp = NULL; - dprintk("NFSD: nfs4_find_reclaim_client for recdir %s\n", recdir); + dprintk("NFSD: nfs4_find_reclaim_client for name %.*s\n", name.len, name.data); - strhashval = clientstr_hashval(recdir); + strhashval = clientstr_hashval(name); list_for_each_entry(crp, &nn->reclaim_str_hashtbl[strhashval], cr_strhash) { - if (same_name(crp->cr_recdir, recdir)) { + if (compare_blob(&crp->cr_name, &name) == 0) { return crp; } } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 9d6cb246c6c5..0b74d371ed67 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -368,7 +368,7 @@ struct nfs4_client { struct nfs4_client_reclaim { struct list_head cr_strhash; /* hash by cr_name */ struct nfs4_client *cr_clp; /* pointer to associated clp */ - char cr_recdir[HEXDIR_LEN]; /* recover dir */ + struct xdr_netobj cr_name; /* recovery dir name */ }; /* A reasonable value for REPLAY_ISIZE was estimated as follows: @@ -620,7 +620,7 @@ void nfs4_put_stid(struct nfs4_stid *s); void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid); void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct nfsd_net *); extern void nfs4_release_reclaim(struct nfsd_net *); -extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir, +extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(struct xdr_netobj name, struct nfsd_net *nn); extern __be32 nfs4_check_open_reclaim(clientid_t *clid, struct nfsd4_compound_state *cstate, struct nfsd_net *nn); @@ -635,9 +635,9 @@ extern void nfsd4_destroy_callback_queue(void); extern void nfsd4_shutdown_callback(struct nfs4_client *); extern void nfsd4_shutdown_copy(struct nfs4_client *clp); extern void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp); -extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name, +extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(struct xdr_netobj name, struct nfsd_net *nn); -extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn); +extern bool nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn); struct nfs4_file *find_file(struct knfsd_fh *fh); void put_nfs4_file(struct nfs4_file *fi); From 74725959c33c14114fdce1e36e3504d106584d53 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Tue, 26 Mar 2019 18:06:27 -0400 Subject: [PATCH 04/23] nfsd: un-deprecate nfsdcld When nfsdcld was released, it was quickly deprecated in favor of the nfsdcltrack usermodehelper, so as to not require another running daemon. That prevents NFSv4 clients from reclaiming locks from nfsd's running in containers, since neither nfsdcltrack nor the legacy client tracking code work in containers. This commit un-deprecates the use of nfsdcld, with one twist: we will populate the reclaim_str_hashtbl on startup. During client tracking initialization, do an upcall ("GraceStart") to nfsdcld to get a list of clients from the database. nfsdcld will do one downcall with a status of -EINPROGRESS for each client record in the database, which in turn will cause an nfs4_client_reclaim to be added to the reclaim_str_hashtbl. When complete, nfsdcld will do a final downcall with a status of 0. This will save nfsd from having to do an upcall to the daemon during nfs4_check_open_reclaim() processing. Even though nfsdcld was quickly deprecated, there is a very small chance of old nfsdcld daemons running in the wild. These will respond to the new "GraceStart" upcall with -EOPNOTSUPP, in which case we will log a message and fall back to the original nfsdcld tracking ops (now called nfsd4_cld_tracking_ops_v0). Signed-off-by: Scott Mayhew Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4recover.c | 225 ++++++++++++++++++++++++++++++++-- include/uapi/linux/nfsd/cld.h | 1 + 2 files changed, 218 insertions(+), 8 deletions(-) diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 2243b909b407..89c2a27956d0 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -779,6 +779,34 @@ cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg) return ret; } +static ssize_t +__cld_pipe_inprogress_downcall(const struct cld_msg __user *cmsg, + struct nfsd_net *nn) +{ + uint8_t cmd; + struct xdr_netobj name; + uint16_t namelen; + + if (get_user(cmd, &cmsg->cm_cmd)) { + dprintk("%s: error when copying cmd from userspace", __func__); + return -EFAULT; + } + if (cmd == Cld_GraceStart) { + if (get_user(namelen, &cmsg->cm_u.cm_name.cn_len)) + return -EFAULT; + name.data = memdup_user(&cmsg->cm_u.cm_name.cn_id, namelen); + if (IS_ERR_OR_NULL(name.data)) + return -EFAULT; + name.len = namelen; + if (!nfs4_client_to_reclaim(name, nn)) { + kfree(name.data); + return -EFAULT; + } + return sizeof(*cmsg); + } + return -EFAULT; +} + static ssize_t cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) { @@ -788,6 +816,7 @@ cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) struct nfsd_net *nn = net_generic(file_inode(filp)->i_sb->s_fs_info, nfsd_net_id); struct cld_net *cn = nn->cld_net; + int16_t status; if (mlen != sizeof(*cmsg)) { dprintk("%s: got %zu bytes, expected %zu\n", __func__, mlen, @@ -801,13 +830,24 @@ cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) return -EFAULT; } + /* + * copy the status so we know whether to remove the upcall from the + * list (for -EINPROGRESS, we just want to make sure the xid is + * valid, not remove the upcall from the list) + */ + if (get_user(status, &cmsg->cm_status)) { + dprintk("%s: error when copying status from userspace", __func__); + return -EFAULT; + } + /* walk the list and find corresponding xid */ cup = NULL; spin_lock(&cn->cn_lock); list_for_each_entry(tmp, &cn->cn_list, cu_list) { if (get_unaligned(&tmp->cu_msg.cm_xid) == xid) { cup = tmp; - list_del_init(&cup->cu_list); + if (status != -EINPROGRESS) + list_del_init(&cup->cu_list); break; } } @@ -819,6 +859,9 @@ cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) return -EINVAL; } + if (status == -EINPROGRESS) + return __cld_pipe_inprogress_downcall(cmsg, nn); + if (copy_from_user(&cup->cu_msg, src, mlen) != 0) return -EFAULT; @@ -1065,9 +1108,14 @@ out_err: "record from stable storage: %d\n", ret); } -/* Check for presence of a record, and update its timestamp */ +/* + * For older nfsdcld's that do not allow us to "slurp" the clients + * from the tracking database during startup. + * + * Check for presence of a record, and update its timestamp + */ static int -nfsd4_cld_check(struct nfs4_client *clp) +nfsd4_cld_check_v0(struct nfs4_client *clp) { int ret; struct cld_upcall *cup; @@ -1100,8 +1148,61 @@ nfsd4_cld_check(struct nfs4_client *clp) return ret; } +/* + * For newer nfsdcld's that allow us to "slurp" the clients + * from the tracking database during startup. + * + * Check for presence of a record in the reclaim_str_hashtbl + */ +static int +nfsd4_cld_check(struct nfs4_client *clp) +{ + struct nfs4_client_reclaim *crp; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + /* did we already find that this client is stable? */ + if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) + return 0; + + /* look for it in the reclaim hashtable otherwise */ + crp = nfsd4_find_reclaim_client(clp->cl_name, nn); + if (crp) { + crp->cr_clp = clp; + return 0; + } + + return -ENOENT; +} + +static int +nfsd4_cld_grace_start(struct nfsd_net *nn) +{ + int ret; + struct cld_upcall *cup; + struct cld_net *cn = nn->cld_net; + + cup = alloc_cld_upcall(cn); + if (!cup) { + ret = -ENOMEM; + goto out_err; + } + + cup->cu_msg.cm_cmd = Cld_GraceStart; + ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg); + if (!ret) + ret = cup->cu_msg.cm_status; + + free_cld_upcall(cup); +out_err: + if (ret) + dprintk("%s: Unable to get clients from userspace: %d\n", + __func__, ret); + return ret; +} + +/* For older nfsdcld's that need cm_gracetime */ static void -nfsd4_cld_grace_done(struct nfsd_net *nn) +nfsd4_cld_grace_done_v0(struct nfsd_net *nn) { int ret; struct cld_upcall *cup; @@ -1125,11 +1226,118 @@ out_err: printk(KERN_ERR "NFSD: Unable to end grace period: %d\n", ret); } -static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops = { +/* + * For newer nfsdcld's that do not need cm_gracetime. We also need to call + * nfs4_release_reclaim() to clear out the reclaim_str_hashtbl. + */ +static void +nfsd4_cld_grace_done(struct nfsd_net *nn) +{ + int ret; + struct cld_upcall *cup; + struct cld_net *cn = nn->cld_net; + + cup = alloc_cld_upcall(cn); + if (!cup) { + ret = -ENOMEM; + goto out_err; + } + + cup->cu_msg.cm_cmd = Cld_GraceDone; + ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg); + if (!ret) + ret = cup->cu_msg.cm_status; + + free_cld_upcall(cup); +out_err: + nfs4_release_reclaim(nn); + if (ret) + printk(KERN_ERR "NFSD: Unable to end grace period: %d\n", ret); +} + +static int +nfs4_cld_state_init(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + int i; + + nn->reclaim_str_hashtbl = kmalloc_array(CLIENT_HASH_SIZE, + sizeof(struct list_head), + GFP_KERNEL); + if (!nn->reclaim_str_hashtbl) + return -ENOMEM; + + for (i = 0; i < CLIENT_HASH_SIZE; i++) + INIT_LIST_HEAD(&nn->reclaim_str_hashtbl[i]); + nn->reclaim_str_hashtbl_size = 0; + + return 0; +} + +static void +nfs4_cld_state_shutdown(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + kfree(nn->reclaim_str_hashtbl); +} + +static int +nfsd4_cld_tracking_init(struct net *net) +{ + int status; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + status = nfs4_cld_state_init(net); + if (status) + return status; + + status = nfsd4_init_cld_pipe(net); + if (status) + goto err_shutdown; + + status = nfsd4_cld_grace_start(nn); + if (status) { + if (status == -EOPNOTSUPP) + printk(KERN_WARNING "NFSD: Please upgrade nfsdcld.\n"); + nfs4_release_reclaim(nn); + goto err_remove; + } + return 0; + +err_remove: + nfsd4_remove_cld_pipe(net); +err_shutdown: + nfs4_cld_state_shutdown(net); + return status; +} + +static void +nfsd4_cld_tracking_exit(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + nfs4_release_reclaim(nn); + nfsd4_remove_cld_pipe(net); + nfs4_cld_state_shutdown(net); +} + +/* For older nfsdcld's */ +static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops_v0 = { .init = nfsd4_init_cld_pipe, .exit = nfsd4_remove_cld_pipe, .create = nfsd4_cld_create, .remove = nfsd4_cld_remove, + .check = nfsd4_cld_check_v0, + .grace_done = nfsd4_cld_grace_done_v0, +}; + +/* For newer nfsdcld's */ +static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops = { + .init = nfsd4_cld_tracking_init, + .exit = nfsd4_cld_tracking_exit, + .create = nfsd4_cld_create, + .remove = nfsd4_cld_remove, .check = nfsd4_cld_check, .grace_done = nfsd4_cld_grace_done, }; @@ -1514,9 +1722,10 @@ nfsd4_client_tracking_init(struct net *net) /* Finally, try to use nfsdcld */ nn->client_tracking_ops = &nfsd4_cld_tracking_ops; - printk(KERN_WARNING "NFSD: the nfsdcld client tracking upcall will be " - "removed in 3.10. Please transition to using " - "nfsdcltrack.\n"); + status = nn->client_tracking_ops->init(net); + if (!status) + return status; + nn->client_tracking_ops = &nfsd4_cld_tracking_ops_v0; do_init: status = nn->client_tracking_ops->init(net); if (status) { diff --git a/include/uapi/linux/nfsd/cld.h b/include/uapi/linux/nfsd/cld.h index f8f5cccad749..b1e9de4f07d5 100644 --- a/include/uapi/linux/nfsd/cld.h +++ b/include/uapi/linux/nfsd/cld.h @@ -36,6 +36,7 @@ enum cld_command { Cld_Remove, /* remove record of this cm_id */ Cld_Check, /* is this cm_id allowed? */ Cld_GraceDone, /* grace period is complete */ + Cld_GraceStart, }; /* representation of long-form NFSv4 client ID */ From 362063a595be959bc08f4163e6405a0266740091 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Tue, 26 Mar 2019 18:06:28 -0400 Subject: [PATCH 05/23] nfsd: keep a tally of RECLAIM_COMPLETE operations when using nfsdcld When using nfsdcld for NFSv4 client tracking, track the number of RECLAIM_COMPLETE operations we receive from "known" clients to help in deciding if we can lift the grace period early (or whether we need to start a v4 grace period at all). Signed-off-by: Scott Mayhew Signed-off-by: J. Bruce Fields --- fs/nfsd/netns.h | 3 +++ fs/nfsd/nfs4recover.c | 3 +++ fs/nfsd/nfs4state.c | 33 ++++++++++++++++++++++++++++++++- fs/nfsd/nfsctl.c | 1 + 4 files changed, 39 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 32cb8c027483..b42aaa22fba2 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -104,6 +104,9 @@ struct nfsd_net { time_t nfsd4_grace; bool somebody_reclaimed; + bool track_reclaim_completes; + atomic_t nr_reclaim_complete; + bool nfsd_net_up; bool lockd_up; diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 89c2a27956d0..09e3c9352778 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -1270,6 +1270,8 @@ nfs4_cld_state_init(struct net *net) for (i = 0; i < CLIENT_HASH_SIZE; i++) INIT_LIST_HEAD(&nn->reclaim_str_hashtbl[i]); nn->reclaim_str_hashtbl_size = 0; + nn->track_reclaim_completes = true; + atomic_set(&nn->nr_reclaim_complete, 0); return 0; } @@ -1279,6 +1281,7 @@ nfs4_cld_state_shutdown(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); + nn->track_reclaim_completes = false; kfree(nn->reclaim_str_hashtbl); } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index e601416aaf5f..8078314981f5 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -77,6 +77,7 @@ static u64 current_sessionid = 1; /* forward declarations */ static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner); static void nfs4_free_ol_stateid(struct nfs4_stid *stid); +void nfsd4_end_grace(struct nfsd_net *nn); /* Locking: */ @@ -1997,6 +1998,22 @@ destroy_client(struct nfs4_client *clp) __destroy_client(clp); } +static void inc_reclaim_complete(struct nfs4_client *clp) +{ + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + if (!nn->track_reclaim_completes) + return; + if (!nfsd4_find_reclaim_client(clp->cl_name, nn)) + return; + if (atomic_inc_return(&nn->nr_reclaim_complete) == + nn->reclaim_str_hashtbl_size) { + printk(KERN_INFO "NFSD: all clients done reclaiming, ending NFSv4 grace period (net %x)\n", + clp->net->ns.inum); + nfsd4_end_grace(nn); + } +} + static void expire_client(struct nfs4_client *clp) { unhash_client(clp); @@ -3348,6 +3365,7 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp, status = nfs_ok; nfsd4_client_record_create(cstate->session->se_client); + inc_reclaim_complete(cstate->session->se_client); out: return status; } @@ -4707,7 +4725,6 @@ nfsd4_end_grace(struct nfsd_net *nn) if (nn->grace_ended) return; - dprintk("NFSD: end of grace period\n"); nn->grace_ended = true; /* * If the server goes down again right now, an NFSv4 @@ -4743,6 +4760,10 @@ static bool clients_still_reclaiming(struct nfsd_net *nn) unsigned long double_grace_period_end = nn->boot_time + 2 * nn->nfsd4_lease; + if (nn->track_reclaim_completes && + atomic_read(&nn->nr_reclaim_complete) == + nn->reclaim_str_hashtbl_size) + return false; if (!nn->somebody_reclaimed) return false; nn->somebody_reclaimed = false; @@ -4773,6 +4794,7 @@ nfs4_laundromat(struct nfsd_net *nn) new_timeo = 0; goto out; } + dprintk("NFSD: end of grace period\n"); nfsd4_end_grace(nn); INIT_LIST_HEAD(&reaplist); spin_lock(&nn->client_lock); @@ -7261,10 +7283,19 @@ nfs4_state_start_net(struct net *net) return ret; locks_start_grace(net, &nn->nfsd4_manager); nfsd4_client_tracking_init(net); + if (nn->track_reclaim_completes && nn->reclaim_str_hashtbl_size == 0) + goto skip_grace; printk(KERN_INFO "NFSD: starting %ld-second grace period (net %x)\n", nn->nfsd4_grace, net->ns.inum); queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ); return 0; + +skip_grace: + printk(KERN_INFO "NFSD: no clients to reclaim, skipping NFSv4 grace period (net %x)\n", + net->ns.inum); + queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_lease * HZ); + nfsd4_end_grace(nn); + return 0; } /* initialization to perform when the nfsd service is started: */ diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index f2feb2d11bae..6667d3a4f839 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1242,6 +1242,7 @@ static __net_init int nfsd_init_net(struct net *net) nn->nfsd4_lease = 90; /* default lease time */ nn->nfsd4_grace = 90; nn->somebody_reclaimed = false; + nn->track_reclaim_completes = false; nn->clverifier_counter = prandom_u32(); nn->clientid_counter = prandom_u32(); nn->s2s_cp_cl_id = nn->clientid_counter++; From 869216075b63c7a4030ee193c079cb5a69a918cb Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Tue, 26 Mar 2019 18:06:29 -0400 Subject: [PATCH 06/23] nfsd: re-order client tracking method selection The new order is first nfsdcld, then the UMH upcall, and finally the legacy tracking method. Added some printk's to the tracking initialization functions so it's clear which tracking method was ultimately selected. Signed-off-by: Scott Mayhew Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4recover.c | 80 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 65 insertions(+), 15 deletions(-) diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 09e3c9352778..d60001da1dbf 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -628,6 +628,7 @@ nfsd4_legacy_tracking_init(struct net *net) status = nfsd4_load_reboot_recovery_data(net); if (status) goto err; + printk("NFSD: Using legacy client tracking operations.\n"); return 0; err: @@ -937,7 +938,7 @@ nfsd4_cld_unregister_net(struct net *net, struct rpc_pipe *pipe) /* Initialize rpc_pipefs pipe for communication with client tracking daemon */ static int -nfsd4_init_cld_pipe(struct net *net) +__nfsd4_init_cld_pipe(struct net *net) { int ret; struct dentry *dentry; @@ -980,6 +981,17 @@ err: return ret; } +static int +nfsd4_init_cld_pipe(struct net *net) +{ + int status; + + status = __nfsd4_init_cld_pipe(net); + if (!status) + printk("NFSD: Using old nfsdcld client tracking operations.\n"); + return status; +} + static void nfsd4_remove_cld_pipe(struct net *net) { @@ -1285,27 +1297,55 @@ nfs4_cld_state_shutdown(struct net *net) kfree(nn->reclaim_str_hashtbl); } +static bool +cld_running(struct nfsd_net *nn) +{ + struct cld_net *cn = nn->cld_net; + struct rpc_pipe *pipe = cn->cn_pipe; + + return pipe->nreaders || pipe->nwriters; +} + static int nfsd4_cld_tracking_init(struct net *net) { int status; struct nfsd_net *nn = net_generic(net, nfsd_net_id); + bool running; + int retries = 10; status = nfs4_cld_state_init(net); if (status) return status; - status = nfsd4_init_cld_pipe(net); + status = __nfsd4_init_cld_pipe(net); if (status) goto err_shutdown; + /* + * rpc pipe upcalls take 30 seconds to time out, so we don't want to + * queue an upcall unless we know that nfsdcld is running (because we + * want this to fail fast so that nfsd4_client_tracking_init() can try + * the next client tracking method). nfsdcld should already be running + * before nfsd is started, so the wait here is for nfsdcld to open the + * pipefs file we just created. + */ + while (!(running = cld_running(nn)) && retries--) + msleep(100); + + if (!running) { + status = -ETIMEDOUT; + goto err_remove; + } + status = nfsd4_cld_grace_start(nn); if (status) { if (status == -EOPNOTSUPP) printk(KERN_WARNING "NFSD: Please upgrade nfsdcld.\n"); nfs4_release_reclaim(nn); goto err_remove; - } + } else + printk("NFSD: Using nfsdcld client tracking operations.\n"); return 0; err_remove: @@ -1552,6 +1592,8 @@ nfsd4_umh_cltrack_init(struct net *net) ret = nfsd4_umh_cltrack_upcall("init", NULL, grace_start, NULL); kfree(grace_start); + if (!ret) + printk("NFSD: Using UMH upcall client tracking operations.\n"); return ret; } @@ -1701,9 +1743,20 @@ nfsd4_client_tracking_init(struct net *net) if (nn->client_tracking_ops) goto do_init; + /* First, try to use nfsdcld */ + nn->client_tracking_ops = &nfsd4_cld_tracking_ops; + status = nn->client_tracking_ops->init(net); + if (!status) + return status; + if (status != -ETIMEDOUT) { + nn->client_tracking_ops = &nfsd4_cld_tracking_ops_v0; + status = nn->client_tracking_ops->init(net); + if (!status) + return status; + } + /* - * First, try a UMH upcall. It should succeed or fail quickly, so - * there's little harm in trying that first. + * Next, try the UMH upcall. */ nn->client_tracking_ops = &nfsd4_umh_tracking_ops; status = nn->client_tracking_ops->init(net); @@ -1711,26 +1764,23 @@ nfsd4_client_tracking_init(struct net *net) return status; /* - * See if the recoverydir exists and is a directory. If it is, - * then use the legacy ops. + * Finally, See if the recoverydir exists and is a directory. + * If it is, then use the legacy ops. */ nn->client_tracking_ops = &nfsd4_legacy_tracking_ops; status = kern_path(nfs4_recoverydir(), LOOKUP_FOLLOW, &path); if (!status) { status = d_is_dir(path.dentry); path_put(&path); - if (status) - goto do_init; + if (!status) { + status = -EINVAL; + goto out; + } } - /* Finally, try to use nfsdcld */ - nn->client_tracking_ops = &nfsd4_cld_tracking_ops; - status = nn->client_tracking_ops->init(net); - if (!status) - return status; - nn->client_tracking_ops = &nfsd4_cld_tracking_ops_v0; do_init: status = nn->client_tracking_ops->init(net); +out: if (status) { printk(KERN_WARNING "NFSD: Unable to initialize client " "recovery tracking! (%d)\n", status); From 8a9f4f41248a4dc00a9f0831a50f8b7a2fd86bfc Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Tue, 26 Mar 2019 18:06:30 -0400 Subject: [PATCH 07/23] nfsd: handle legacy client tracking records sent by nfsdcld The new nfsdcld will do a one-time "upgrade" where it searches for records from nfsdcltrack and the legacy tracking during startup. Legacy records will be prefixed with the string "hash:", which we need to strip off before adding to the reclaim_str_hashtbl. When legacy records are encountered, set the new cn_has_legacy flag in the cld_net. When this flag is set, if the search for a reclaim record based on the client name string fails, then do a second search based on the hash of the name string. Note that if there are legacy records then the grace period will not be lifted early via the tracking of RECLAIM_COMPLETEs. Signed-off-by: Scott Mayhew Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4recover.c | 39 +++++++++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index d60001da1dbf..3a123c88120a 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -731,6 +731,7 @@ struct cld_net { spinlock_t cn_lock; struct list_head cn_list; unsigned int cn_xid; + bool cn_has_legacy; }; struct cld_upcall { @@ -787,6 +788,7 @@ __cld_pipe_inprogress_downcall(const struct cld_msg __user *cmsg, uint8_t cmd; struct xdr_netobj name; uint16_t namelen; + struct cld_net *cn = nn->cld_net; if (get_user(cmd, &cmsg->cm_cmd)) { dprintk("%s: error when copying cmd from userspace", __func__); @@ -799,6 +801,11 @@ __cld_pipe_inprogress_downcall(const struct cld_msg __user *cmsg, if (IS_ERR_OR_NULL(name.data)) return -EFAULT; name.len = namelen; + if (name.len > 5 && memcmp(name.data, "hash:", 5) == 0) { + name.len = name.len - 5; + memmove(name.data, name.data + 5, name.len); + cn->cn_has_legacy = true; + } if (!nfs4_client_to_reclaim(name, nn)) { kfree(name.data); return -EFAULT; @@ -969,6 +976,7 @@ __nfsd4_init_cld_pipe(struct net *net) } cn->cn_pipe->dentry = dentry; + cn->cn_has_legacy = false; nn->cld_net = cn; return 0; @@ -1171,6 +1179,10 @@ nfsd4_cld_check(struct nfs4_client *clp) { struct nfs4_client_reclaim *crp; struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + struct cld_net *cn = nn->cld_net; + int status; + char dname[HEXDIR_LEN]; + struct xdr_netobj name; /* did we already find that this client is stable? */ if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) @@ -1178,12 +1190,31 @@ nfsd4_cld_check(struct nfs4_client *clp) /* look for it in the reclaim hashtable otherwise */ crp = nfsd4_find_reclaim_client(clp->cl_name, nn); - if (crp) { - crp->cr_clp = clp; - return 0; - } + if (crp) + goto found; + if (cn->cn_has_legacy) { + status = nfs4_make_rec_clidname(dname, &clp->cl_name); + if (status) + return -ENOENT; + + name.data = kmemdup(dname, HEXDIR_LEN, GFP_KERNEL); + if (!name.data) { + dprintk("%s: failed to allocate memory for name.data!\n", + __func__); + return -ENOENT; + } + name.len = HEXDIR_LEN; + crp = nfsd4_find_reclaim_client(name, nn); + kfree(name.data); + if (crp) + goto found; + + } return -ENOENT; +found: + crp->cr_clp = clp; + return 0; } static int From 83dd59a0b9afc3b1a2642fb5c9b0585b1c08768f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 9 Apr 2019 11:46:14 -0400 Subject: [PATCH 08/23] SUNRPC/nfs: Fix return value for nfs4_callback_compound() RPC server procedures are normally expected to return a __be32 encoded status value of type 'enum rpc_accept_stat', however at least one function wants to return an authentication status of type 'enum rpc_auth_stat' in the case where authentication fails. This patch adds functionality to allow this. Fixes: a4e187d83d88 ("NFS: Don't drop CB requests with invalid principals") Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfs/callback_xdr.c | 2 +- include/linux/sunrpc/svc.h | 2 ++ net/sunrpc/svc.c | 27 ++++++++++++++++++++++----- 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 06233bfa6d73..73a5a5ea2976 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -983,7 +983,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp) out_invalidcred: pr_warn_ratelimited("NFS: NFSv4 callback contains invalid cred\n"); - return rpc_autherr_badcred; + return svc_return_autherr(rqstp, rpc_autherr_badcred); } /* diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index e52385340b3b..7ff12c9dbeaf 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -271,6 +271,7 @@ struct svc_rqst { #define RQ_VICTIM (5) /* about to be shut down */ #define RQ_BUSY (6) /* request is busy */ #define RQ_DATA (7) /* request has data */ +#define RQ_AUTHERR (8) /* Request status is auth error */ unsigned long rq_flags; /* flags field */ ktime_t rq_qtime; /* enqueue time */ @@ -504,6 +505,7 @@ unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, struct kvec *first, void *p, size_t total); +__be32 svc_return_autherr(struct svc_rqst *rqstp, __be32 auth_err); #define RPC_MAX_ADDRBUFLEN (63U) diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index dbd19697ee38..3d5dd6b86652 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1144,6 +1144,22 @@ void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) static __printf(2,3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) {} #endif +__be32 +svc_return_autherr(struct svc_rqst *rqstp, __be32 auth_err) +{ + set_bit(RQ_AUTHERR, &rqstp->rq_flags); + return auth_err; +} +EXPORT_SYMBOL_GPL(svc_return_autherr); + +static __be32 +svc_get_autherr(struct svc_rqst *rqstp, __be32 *statp) +{ + if (test_and_clear_bit(RQ_AUTHERR, &rqstp->rq_flags)) + return *statp; + return rpc_auth_ok; +} + /* * Common routine for processing the RPC request. */ @@ -1290,11 +1306,9 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) procp->pc_release(rqstp); goto dropit; } - if (*statp == rpc_autherr_badcred) { - if (procp->pc_release) - procp->pc_release(rqstp); - goto err_bad_auth; - } + auth_stat = svc_get_autherr(rqstp, statp); + if (auth_stat != rpc_auth_ok) + goto err_release_bad_auth; if (*statp == rpc_success && procp->pc_encode && !procp->pc_encode(rqstp, resv->iov_base + resv->iov_len)) { dprintk("svc: failed to encode reply\n"); @@ -1351,6 +1365,9 @@ err_bad_rpc: svc_putnl(resv, 2); goto sendit; +err_release_bad_auth: + if (procp->pc_release) + procp->pc_release(rqstp); err_bad_auth: dprintk("svc: authentication failed (%d)\n", ntohl(auth_stat)); serv->sv_stats->rpcbadauth++; From 8e5b67731d088e66fc6a59d9e2fc9a5e4e187303 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 9 Apr 2019 11:46:15 -0400 Subject: [PATCH 09/23] SUNRPC: Add a callback to initialise server requests Add a callback to help initialise server requests before they are processed. This will allow us to clean up the NFS server version support, and to make it container safe. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/lockd/svc.c | 3 +- fs/nfs/callback.c | 1 + fs/nfsd/nfssvc.c | 2 + include/linux/sunrpc/svc.h | 16 ++++++ net/sunrpc/svc.c | 115 +++++++++++++++++++++++++------------ 5 files changed, 98 insertions(+), 39 deletions(-) diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 346ed161756d..75415b21efda 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -807,5 +807,6 @@ static struct svc_program nlmsvc_program = { .pg_name = "lockd", /* service name */ .pg_class = "nfsd", /* share authentication with nfsd */ .pg_stats = &nlmsvc_stats, /* stats table */ - .pg_authenticate = &lockd_authenticate /* export authentication */ + .pg_authenticate = &lockd_authenticate, /* export authentication */ + .pg_init_request = svc_generic_init_request, }; diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 0b602a39dd71..a9510374bad7 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -457,4 +457,5 @@ static struct svc_program nfs4_callback_program = { .pg_class = "nfs", /* authentication class */ .pg_stats = &nfs4_callback_stats, .pg_authenticate = nfs_callback_authenticate, + .pg_init_request = svc_generic_init_request, }; diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 89cb484f1cfb..e26762e84798 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -86,6 +86,7 @@ static struct svc_program nfsd_acl_program = { .pg_class = "nfsd", .pg_stats = &nfsd_acl_svcstats, .pg_authenticate = &svc_set_client, + .pg_init_request = svc_generic_init_request, }; static struct svc_stat nfsd_acl_svcstats = { @@ -118,6 +119,7 @@ struct svc_program nfsd_program = { .pg_class = "nfsd", /* authentication class */ .pg_stats = &nfsd_svcstats, /* version table */ .pg_authenticate = &svc_set_client, /* export authentication */ + .pg_init_request = svc_generic_init_request, }; diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 7ff12c9dbeaf..f43d5765acff 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -383,6 +383,16 @@ struct svc_deferred_req { __be32 args[0]; }; +struct svc_process_info { + union { + int (*dispatch)(struct svc_rqst *, __be32 *); + struct { + unsigned int lovers; + unsigned int hivers; + } mismatch; + }; +}; + /* * List of RPC programs on the same transport endpoint */ @@ -397,6 +407,9 @@ struct svc_program { char * pg_class; /* class name: services sharing authentication */ struct svc_stat * pg_stats; /* rpc statistics */ int (*pg_authenticate)(struct svc_rqst *); + __be32 (*pg_init_request)(struct svc_rqst *, + const struct svc_program *, + struct svc_process_info *); }; /* @@ -506,6 +519,9 @@ char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, struct kvec *first, void *p, size_t total); __be32 svc_return_autherr(struct svc_rqst *rqstp, __be32 auth_err); +__be32 svc_generic_init_request(struct svc_rqst *rqstp, + const struct svc_program *progp, + struct svc_process_info *procinfo); #define RPC_MAX_ADDRBUFLEN (63U) diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 3d5dd6b86652..69f3b9e015ce 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1160,6 +1160,59 @@ svc_get_autherr(struct svc_rqst *rqstp, __be32 *statp) return rpc_auth_ok; } +__be32 +svc_generic_init_request(struct svc_rqst *rqstp, + const struct svc_program *progp, + struct svc_process_info *ret) +{ + const struct svc_version *versp = NULL; /* compiler food */ + const struct svc_procedure *procp = NULL; + + if (rqstp->rq_vers >= progp->pg_nvers ) + goto err_bad_vers; + versp = progp->pg_vers[rqstp->rq_vers]; + if (!versp) + goto err_bad_vers; + + /* + * Some protocol versions (namely NFSv4) require some form of + * congestion control. (See RFC 7530 section 3.1 paragraph 2) + * In other words, UDP is not allowed. We mark those when setting + * up the svc_xprt, and verify that here. + * + * The spec is not very clear about what error should be returned + * when someone tries to access a server that is listening on UDP + * for lower versions. RPC_PROG_MISMATCH seems to be the closest + * fit. + */ + if (versp->vs_need_cong_ctrl && rqstp->rq_xprt && + !test_bit(XPT_CONG_CTRL, &rqstp->rq_xprt->xpt_flags)) + goto err_bad_vers; + + if (rqstp->rq_proc >= versp->vs_nproc) + goto err_bad_proc; + rqstp->rq_procinfo = procp = &versp->vs_proc[rqstp->rq_proc]; + if (!procp) + goto err_bad_proc; + + /* Initialize storage for argp and resp */ + memset(rqstp->rq_argp, 0, procp->pc_argsize); + memset(rqstp->rq_resp, 0, procp->pc_ressize); + + /* Bump per-procedure stats counter */ + versp->vs_count[rqstp->rq_proc]++; + + ret->dispatch = versp->vs_dispatch; + return rpc_success; +err_bad_vers: + ret->mismatch.lovers = progp->pg_lovers; + ret->mismatch.hivers = progp->pg_hivers; + return rpc_prog_mismatch; +err_bad_proc: + return rpc_proc_unavail; +} +EXPORT_SYMBOL_GPL(svc_generic_init_request); + /* * Common routine for processing the RPC request. */ @@ -1167,11 +1220,11 @@ static int svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) { struct svc_program *progp; - const struct svc_version *versp = NULL; /* compiler food */ const struct svc_procedure *procp = NULL; struct svc_serv *serv = rqstp->rq_server; + struct svc_process_info process; __be32 *statp; - u32 prog, vers, proc; + u32 prog, vers; __be32 auth_stat, rpc_stat; int auth_res; __be32 *reply_statp; @@ -1203,8 +1256,8 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) svc_putnl(resv, 0); /* ACCEPT */ rqstp->rq_prog = prog = svc_getnl(argv); /* program number */ - rqstp->rq_vers = vers = svc_getnl(argv); /* version number */ - rqstp->rq_proc = proc = svc_getnl(argv); /* procedure number */ + rqstp->rq_vers = svc_getnl(argv); /* version number */ + rqstp->rq_proc = svc_getnl(argv); /* procedure number */ for (progp = serv->sv_program; progp; progp = progp->pg_next) if (prog == progp->pg_prog) @@ -1242,29 +1295,22 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) if (progp == NULL) goto err_bad_prog; - if (vers >= progp->pg_nvers || - !(versp = progp->pg_vers[vers])) + rpc_stat = progp->pg_init_request(rqstp, progp, &process); + switch (rpc_stat) { + case rpc_success: + break; + case rpc_prog_unavail: + goto err_bad_prog; + case rpc_prog_mismatch: goto err_bad_vers; - - /* - * Some protocol versions (namely NFSv4) require some form of - * congestion control. (See RFC 7530 section 3.1 paragraph 2) - * In other words, UDP is not allowed. We mark those when setting - * up the svc_xprt, and verify that here. - * - * The spec is not very clear about what error should be returned - * when someone tries to access a server that is listening on UDP - * for lower versions. RPC_PROG_MISMATCH seems to be the closest - * fit. - */ - if (versp->vs_need_cong_ctrl && rqstp->rq_xprt && - !test_bit(XPT_CONG_CTRL, &rqstp->rq_xprt->xpt_flags)) - goto err_bad_vers; - - procp = versp->vs_proc + proc; - if (proc >= versp->vs_nproc || !procp->pc_func) + case rpc_proc_unavail: + goto err_bad_proc; + } + + procp = rqstp->rq_procinfo; + /* Should this check go into the dispatcher? */ + if (!procp || !procp->pc_func) goto err_bad_proc; - rqstp->rq_procinfo = procp; /* Syntactic check complete */ serv->sv_stats->rpccnt++; @@ -1274,13 +1320,6 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) statp = resv->iov_base +resv->iov_len; svc_putnl(resv, RPC_SUCCESS); - /* Bump per-procedure stats counter */ - versp->vs_count[proc]++; - - /* Initialize storage for argp and resp */ - memset(rqstp->rq_argp, 0, procp->pc_argsize); - memset(rqstp->rq_resp, 0, procp->pc_ressize); - /* un-reserve some of the out-queue now that we have a * better idea of reply size */ @@ -1288,7 +1327,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) svc_reserve_auth(rqstp, procp->pc_xdrressize<<2); /* Call the function that processes the request. */ - if (!versp->vs_dispatch) { + if (!process.dispatch) { /* * Decode arguments * XXX: why do we ignore the return value? @@ -1317,7 +1356,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) } } else { dprintk("svc: calling dispatcher\n"); - if (!versp->vs_dispatch(rqstp, statp)) { + if (!process.dispatch(rqstp, statp)) { /* Release reply info */ if (procp->pc_release) procp->pc_release(rqstp); @@ -1386,16 +1425,16 @@ err_bad_prog: err_bad_vers: svc_printk(rqstp, "unknown version (%d for prog %d, %s)\n", - vers, prog, progp->pg_name); + rqstp->rq_vers, rqstp->rq_prog, progp->pg_name); serv->sv_stats->rpcbadfmt++; svc_putnl(resv, RPC_PROG_MISMATCH); - svc_putnl(resv, progp->pg_lovers); - svc_putnl(resv, progp->pg_hivers); + svc_putnl(resv, process.mismatch.lovers); + svc_putnl(resv, process.mismatch.hivers); goto sendit; err_bad_proc: - svc_printk(rqstp, "unknown procedure (%d)\n", proc); + svc_printk(rqstp, "unknown procedure (%d)\n", rqstp->rq_proc); serv->sv_stats->rpcbadfmt++; svc_putnl(resv, RPC_PROC_UNAVAIL); From 4532608d71c8ed6049c949a667eeed719cb9291d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 9 Apr 2019 11:46:16 -0400 Subject: [PATCH 10/23] SUNRPC: Clean up generic dispatcher code Simplify the generic server dispatcher. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- net/sunrpc/svc.c | 75 ++++++++++++++++++++++++++++++------------------ 1 file changed, 47 insertions(+), 28 deletions(-) diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 69f3b9e015ce..791c8076793f 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1160,6 +1160,45 @@ svc_get_autherr(struct svc_rqst *rqstp, __be32 *statp) return rpc_auth_ok; } +static int +svc_generic_dispatch(struct svc_rqst *rqstp, __be32 *statp) +{ + struct kvec *argv = &rqstp->rq_arg.head[0]; + struct kvec *resv = &rqstp->rq_res.head[0]; + const struct svc_procedure *procp = rqstp->rq_procinfo; + + /* + * Decode arguments + * XXX: why do we ignore the return value? + */ + if (procp->pc_decode && + !procp->pc_decode(rqstp, argv->iov_base)) { + *statp = rpc_garbage_args; + return 1; + } + + *statp = procp->pc_func(rqstp); + + if (*statp == rpc_drop_reply || + test_bit(RQ_DROPME, &rqstp->rq_flags)) + return 0; + + if (test_bit(RQ_AUTHERR, &rqstp->rq_flags)) + return 1; + + if (*statp != rpc_success) + return 1; + + /* Encode reply */ + if (procp->pc_encode && + !procp->pc_encode(rqstp, resv->iov_base + resv->iov_len)) { + dprintk("svc: failed to encode reply\n"); + /* serv->sv_stats->rpcsystemerr++; */ + *statp = rpc_system_err; + } + return 1; +} + __be32 svc_generic_init_request(struct svc_rqst *rqstp, const struct svc_program *progp, @@ -1328,40 +1367,17 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) /* Call the function that processes the request. */ if (!process.dispatch) { - /* - * Decode arguments - * XXX: why do we ignore the return value? - */ - if (procp->pc_decode && - !procp->pc_decode(rqstp, argv->iov_base)) + if (!svc_generic_dispatch(rqstp, statp)) + goto release_dropit; + if (*statp == rpc_garbage_args) goto err_garbage; - - *statp = procp->pc_func(rqstp); - - /* Encode reply */ - if (*statp == rpc_drop_reply || - test_bit(RQ_DROPME, &rqstp->rq_flags)) { - if (procp->pc_release) - procp->pc_release(rqstp); - goto dropit; - } auth_stat = svc_get_autherr(rqstp, statp); if (auth_stat != rpc_auth_ok) goto err_release_bad_auth; - if (*statp == rpc_success && procp->pc_encode && - !procp->pc_encode(rqstp, resv->iov_base + resv->iov_len)) { - dprintk("svc: failed to encode reply\n"); - /* serv->sv_stats->rpcsystemerr++; */ - *statp = rpc_system_err; - } } else { dprintk("svc: calling dispatcher\n"); - if (!process.dispatch(rqstp, statp)) { - /* Release reply info */ - if (procp->pc_release) - procp->pc_release(rqstp); - goto dropit; - } + if (!process.dispatch(rqstp, statp)) + goto release_dropit; /* Release reply info */ } /* Check RPC status result */ @@ -1380,6 +1396,9 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) goto close; return 1; /* Caller can now send it */ +release_dropit: + if (procp->pc_release) + procp->pc_release(rqstp); dropit: svc_authorise(rqstp); /* doesn't hurt to call this twice */ dprintk("svc: svc_process dropit\n"); From 642ee6b209c2f4c20fe0a7ed36b429c470162eae Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 9 Apr 2019 11:46:17 -0400 Subject: [PATCH 11/23] SUNRPC: Allow further customisation of RPC program registration Add a callback to allow customisation of the rpcbind registration. When clients have the ability to turn on and off version support, we want to allow them to also prevent registration of those versions with the rpc portmapper. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/lockd/svc.c | 1 + fs/nfs/callback.c | 1 + fs/nfsd/nfssvc.c | 3 +- include/linux/sunrpc/svc.h | 15 +++++++ net/sunrpc/svc.c | 85 ++++++++++++++++++++++++-------------- 5 files changed, 73 insertions(+), 32 deletions(-) diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 75415b21efda..96bb74c919f9 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -809,4 +809,5 @@ static struct svc_program nlmsvc_program = { .pg_stats = &nlmsvc_stats, /* stats table */ .pg_authenticate = &lockd_authenticate, /* export authentication */ .pg_init_request = svc_generic_init_request, + .pg_rpcbind_set = svc_generic_rpcbind_set, }; diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index a9510374bad7..15c9575e0e7a 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -458,4 +458,5 @@ static struct svc_program nfs4_callback_program = { .pg_stats = &nfs4_callback_stats, .pg_authenticate = nfs_callback_authenticate, .pg_init_request = svc_generic_init_request, + .pg_rpcbind_set = svc_generic_rpcbind_set, }; diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index e26762e84798..6a52400c85e0 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -87,6 +87,7 @@ static struct svc_program nfsd_acl_program = { .pg_stats = &nfsd_acl_svcstats, .pg_authenticate = &svc_set_client, .pg_init_request = svc_generic_init_request, + .pg_rpcbind_set = svc_generic_rpcbind_set, }; static struct svc_stat nfsd_acl_svcstats = { @@ -120,7 +121,7 @@ struct svc_program nfsd_program = { .pg_stats = &nfsd_svcstats, /* version table */ .pg_authenticate = &svc_set_client, /* export authentication */ .pg_init_request = svc_generic_init_request, - + .pg_rpcbind_set = svc_generic_rpcbind_set, }; static bool nfsd_supported_minorversions[NFSD_SUPPORTED_MINOR_VERSION + 1] = { diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index f43d5765acff..1afe38eb33f7 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -410,6 +410,11 @@ struct svc_program { __be32 (*pg_init_request)(struct svc_rqst *, const struct svc_program *, struct svc_process_info *); + int (*pg_rpcbind_set)(struct net *net, + const struct svc_program *, + u32 version, int family, + unsigned short proto, + unsigned short port); }; /* @@ -522,6 +527,16 @@ __be32 svc_return_autherr(struct svc_rqst *rqstp, __be32 auth_err); __be32 svc_generic_init_request(struct svc_rqst *rqstp, const struct svc_program *progp, struct svc_process_info *procinfo); +int svc_generic_rpcbind_set(struct net *net, + const struct svc_program *progp, + u32 version, int family, + unsigned short proto, + unsigned short port); +int svc_rpcbind_set_version(struct net *net, + const struct svc_program *progp, + u32 version, int family, + unsigned short proto, + unsigned short port); #define RPC_MAX_ADDRBUFLEN (63U) diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 791c8076793f..2be827820247 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -993,6 +993,58 @@ static int __svc_register(struct net *net, const char *progname, return error; } +int svc_rpcbind_set_version(struct net *net, + const struct svc_program *progp, + u32 version, int family, + unsigned short proto, + unsigned short port) +{ + dprintk("svc: svc_register(%sv%d, %s, %u, %u)\n", + progp->pg_name, version, + proto == IPPROTO_UDP? "udp" : "tcp", + port, family); + + return __svc_register(net, progp->pg_name, progp->pg_prog, + version, family, proto, port); + +} +EXPORT_SYMBOL_GPL(svc_rpcbind_set_version); + +int svc_generic_rpcbind_set(struct net *net, + const struct svc_program *progp, + u32 version, int family, + unsigned short proto, + unsigned short port) +{ + const struct svc_version *vers = progp->pg_vers[version]; + int error; + + if (vers == NULL) + return 0; + + if (vers->vs_hidden) { + dprintk("svc: svc_register(%sv%d, %s, %u, %u)" + " (but not telling portmap)\n", + progp->pg_name, version, + proto == IPPROTO_UDP? "udp" : "tcp", + port, family); + return 0; + } + + /* + * Don't register a UDP port if we need congestion + * control. + */ + if (vers->vs_need_cong_ctrl && proto == IPPROTO_UDP) + return 0; + + error = svc_rpcbind_set_version(net, progp, version, + family, proto, port); + + return (vers->vs_rpcb_optnl) ? 0 : error; +} +EXPORT_SYMBOL_GPL(svc_generic_rpcbind_set); + /** * svc_register - register an RPC service with the local portmapper * @serv: svc_serv struct for the service to register @@ -1008,7 +1060,6 @@ int svc_register(const struct svc_serv *serv, struct net *net, const unsigned short port) { struct svc_program *progp; - const struct svc_version *vers; unsigned int i; int error = 0; @@ -1018,37 +1069,9 @@ int svc_register(const struct svc_serv *serv, struct net *net, for (progp = serv->sv_program; progp; progp = progp->pg_next) { for (i = 0; i < progp->pg_nvers; i++) { - vers = progp->pg_vers[i]; - if (vers == NULL) - continue; - - dprintk("svc: svc_register(%sv%d, %s, %u, %u)%s\n", - progp->pg_name, - i, - proto == IPPROTO_UDP? "udp" : "tcp", - port, - family, - vers->vs_hidden ? - " (but not telling portmap)" : ""); - - if (vers->vs_hidden) - continue; - - /* - * Don't register a UDP port if we need congestion - * control. - */ - if (vers->vs_need_cong_ctrl && proto == IPPROTO_UDP) - continue; - - error = __svc_register(net, progp->pg_name, progp->pg_prog, - i, family, proto, port); - - if (vers->vs_rpcb_optnl) { - error = 0; - continue; - } + error = progp->pg_rpcbind_set(net, progp, i, + family, proto, port); if (error < 0) { printk(KERN_WARNING "svc: failed to register " "%sv%u RPC service (errno %d).\n", From 029be5d03357f0797d1b9a6b5aab5e7e311fd76a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 9 Apr 2019 11:46:18 -0400 Subject: [PATCH 12/23] nfsd: Add custom rpcbind callbacks for knfsd Add custom rpcbind callbacks in preparation for the knfsd per-container version feature. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfssvc.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 46 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 6a52400c85e0..378edcfe9701 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -32,6 +32,18 @@ extern struct svc_program nfsd_program; static int nfsd(void *vrqstp); +#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) +static int nfsd_acl_rpcbind_set(struct net *, + const struct svc_program *, + u32, int, + unsigned short, + unsigned short); +#endif +static int nfsd_rpcbind_set(struct net *, + const struct svc_program *, + u32, int, + unsigned short, + unsigned short); /* * nfsd_mutex protects nn->nfsd_serv -- both the pointer itself and the members @@ -87,7 +99,7 @@ static struct svc_program nfsd_acl_program = { .pg_stats = &nfsd_acl_svcstats, .pg_authenticate = &svc_set_client, .pg_init_request = svc_generic_init_request, - .pg_rpcbind_set = svc_generic_rpcbind_set, + .pg_rpcbind_set = nfsd_acl_rpcbind_set, }; static struct svc_stat nfsd_acl_svcstats = { @@ -121,7 +133,7 @@ struct svc_program nfsd_program = { .pg_stats = &nfsd_svcstats, /* version table */ .pg_authenticate = &svc_set_client, /* export authentication */ .pg_init_request = svc_generic_init_request, - .pg_rpcbind_set = svc_generic_rpcbind_set, + .pg_rpcbind_set = nfsd_rpcbind_set, }; static bool nfsd_supported_minorversions[NFSD_SUPPORTED_MINOR_VERSION + 1] = { @@ -670,6 +682,38 @@ out: return error; } +#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) +static bool +nfsd_support_acl_version(int vers) +{ + if (vers >= NFSD_ACL_MINVERS && vers < NFSD_ACL_NRVERS) + return nfsd_acl_version[vers] != NULL; + return false; +} + +static int +nfsd_acl_rpcbind_set(struct net *net, const struct svc_program *progp, + u32 version, int family, unsigned short proto, + unsigned short port) +{ + if (!nfsd_support_acl_version(version) || + !nfsd_vers(version, NFSD_TEST)) + return 0; + return svc_generic_rpcbind_set(net, progp, version, family, + proto, port); +} +#endif + +static int +nfsd_rpcbind_set(struct net *net, const struct svc_program *progp, + u32 version, int family, unsigned short proto, + unsigned short port) +{ + if (!nfsd_vers(version, NFSD_TEST)) + return 0; + return svc_generic_rpcbind_set(net, progp, version, family, + proto, port); +} /* * This is the NFS server kernel thread From e333f3bbefe3e25e2bcff58063f920a2eaba224b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 9 Apr 2019 11:46:19 -0400 Subject: [PATCH 13/23] nfsd: Allow containers to set supported nfs versions Support use of the --nfs-version/--no-nfs-version arguments to rpc.nfsd in containers. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/netns.h | 8 ++ fs/nfsd/nfs4proc.c | 3 +- fs/nfsd/nfsctl.c | 25 +++--- fs/nfsd/nfsd.h | 8 +- fs/nfsd/nfssvc.c | 214 +++++++++++++++++++++++++++++++++++---------- 5 files changed, 197 insertions(+), 61 deletions(-) diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index b42aaa22fba2..789abc4dd1d2 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -134,10 +134,18 @@ struct nfsd_net { u32 s2s_cp_cl_id; struct idr s2s_cp_stateids; spinlock_t s2s_cp_lock; + + /* + * Version information + */ + bool *nfsd_versions; + bool *nfsd4_minorversions; }; /* Simple check to find out if a given net was properly initialized */ #define nfsd_netns_ready(nn) ((nn)->sessionid_hashtbl) +extern void nfsd_netns_free_versions(struct nfsd_net *nn); + extern unsigned int nfsd_net_id; #endif /* __NFSD_NETNS_H__ */ diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 0cfd257ffdaf..b0ad72d701b1 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1926,6 +1926,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) struct nfsd4_compound_state *cstate = &resp->cstate; struct svc_fh *current_fh = &cstate->current_fh; struct svc_fh *save_fh = &cstate->save_fh; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); __be32 status; svcxdr_init_encode(rqstp, resp); @@ -1948,7 +1949,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) * According to RFC3010, this takes precedence over all other errors. */ status = nfserr_minor_vers_mismatch; - if (nfsd_minorversion(args->minorversion, NFSD_TEST) <= 0) + if (nfsd_minorversion(nn, args->minorversion, NFSD_TEST) <= 0) goto out; status = nfserr_resource; if (args->opcnt > NFSD_MAX_OPS_PER_COMPOUND) diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 6667d3a4f839..630d629090be 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -537,14 +537,14 @@ out_free: } static ssize_t -nfsd_print_version_support(char *buf, int remaining, const char *sep, - unsigned vers, int minor) +nfsd_print_version_support(struct nfsd_net *nn, char *buf, int remaining, + const char *sep, unsigned vers, int minor) { const char *format = minor < 0 ? "%s%c%u" : "%s%c%u.%u"; - bool supported = !!nfsd_vers(vers, NFSD_TEST); + bool supported = !!nfsd_vers(nn, vers, NFSD_TEST); if (vers == 4 && minor >= 0 && - !nfsd_minorversion(minor, NFSD_TEST)) + !nfsd_minorversion(nn, minor, NFSD_TEST)) supported = false; if (minor == 0 && supported) /* @@ -599,20 +599,20 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) switch(num) { case 2: case 3: - nfsd_vers(num, cmd); + nfsd_vers(nn, num, cmd); break; case 4: if (*minorp == '.') { - if (nfsd_minorversion(minor, cmd) < 0) + if (nfsd_minorversion(nn, minor, cmd) < 0) return -EINVAL; - } else if ((cmd == NFSD_SET) != nfsd_vers(num, NFSD_TEST)) { + } else if ((cmd == NFSD_SET) != nfsd_vers(nn, num, NFSD_TEST)) { /* * Either we have +4 and no minors are enabled, * or we have -4 and at least one minor is enabled. * In either case, propagate 'cmd' to all minors. */ minor = 0; - while (nfsd_minorversion(minor, cmd) >= 0) + while (nfsd_minorversion(nn, minor, cmd) >= 0) minor++; } break; @@ -624,7 +624,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) /* If all get turned off, turn them back on, as * having no versions is BAD */ - nfsd_reset_versions(); + nfsd_reset_versions(nn); } /* Now write current state into reply buffer */ @@ -633,12 +633,12 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) remaining = SIMPLE_TRANSACTION_LIMIT; for (num=2 ; num <= 4 ; num++) { int minor; - if (!nfsd_vers(num, NFSD_AVAIL)) + if (!nfsd_vers(nn, num, NFSD_AVAIL)) continue; minor = -1; do { - len = nfsd_print_version_support(buf, remaining, + len = nfsd_print_version_support(nn, buf, remaining, sep, num, minor); if (len >= remaining) goto out; @@ -1239,6 +1239,8 @@ static __net_init int nfsd_init_net(struct net *net) retval = nfsd_idmap_init(net); if (retval) goto out_idmap_error; + nn->nfsd_versions = NULL; + nn->nfsd4_minorversions = NULL; nn->nfsd4_lease = 90; /* default lease time */ nn->nfsd4_grace = 90; nn->somebody_reclaimed = false; @@ -1261,6 +1263,7 @@ static __net_exit void nfsd_exit_net(struct net *net) { nfsd_idmap_shutdown(net); nfsd_export_shutdown(net); + nfsd_netns_free_versions(net_generic(net, nfsd_net_id)); } static struct pernet_operations nfsd_net_ops = { diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 066899929863..6bae2554b2b2 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -98,10 +98,12 @@ extern const struct svc_version nfsd_acl_version3; #endif #endif +struct nfsd_net; + enum vers_op {NFSD_SET, NFSD_CLEAR, NFSD_TEST, NFSD_AVAIL }; -int nfsd_vers(int vers, enum vers_op change); -int nfsd_minorversion(u32 minorversion, enum vers_op change); -void nfsd_reset_versions(void); +int nfsd_vers(struct nfsd_net *nn, int vers, enum vers_op change); +int nfsd_minorversion(struct nfsd_net *nn, u32 minorversion, enum vers_op change); +void nfsd_reset_versions(struct nfsd_net *nn); int nfsd_create_serv(struct net *net); extern int nfsd_max_blksize; diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 378edcfe9701..520757774614 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -38,12 +38,18 @@ static int nfsd_acl_rpcbind_set(struct net *, u32, int, unsigned short, unsigned short); +static __be32 nfsd_acl_init_request(struct svc_rqst *, + const struct svc_program *, + struct svc_process_info *); #endif static int nfsd_rpcbind_set(struct net *, const struct svc_program *, u32, int, unsigned short, unsigned short); +static __be32 nfsd_init_request(struct svc_rqst *, + const struct svc_program *, + struct svc_process_info *); /* * nfsd_mutex protects nn->nfsd_serv -- both the pointer itself and the members @@ -98,7 +104,7 @@ static struct svc_program nfsd_acl_program = { .pg_class = "nfsd", .pg_stats = &nfsd_acl_svcstats, .pg_authenticate = &svc_set_client, - .pg_init_request = svc_generic_init_request, + .pg_init_request = nfsd_acl_init_request, .pg_rpcbind_set = nfsd_acl_rpcbind_set, }; @@ -119,7 +125,6 @@ static const struct svc_version *nfsd_version[] = { #define NFSD_MINVERS 2 #define NFSD_NRVERS ARRAY_SIZE(nfsd_version) -static const struct svc_version *nfsd_versions[NFSD_NRVERS]; struct svc_program nfsd_program = { #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) @@ -127,78 +132,136 @@ struct svc_program nfsd_program = { #endif .pg_prog = NFS_PROGRAM, /* program number */ .pg_nvers = NFSD_NRVERS, /* nr of entries in nfsd_version */ - .pg_vers = nfsd_versions, /* version table */ + .pg_vers = nfsd_version, /* version table */ .pg_name = "nfsd", /* program name */ .pg_class = "nfsd", /* authentication class */ .pg_stats = &nfsd_svcstats, /* version table */ .pg_authenticate = &svc_set_client, /* export authentication */ - .pg_init_request = svc_generic_init_request, + .pg_init_request = nfsd_init_request, .pg_rpcbind_set = nfsd_rpcbind_set, }; -static bool nfsd_supported_minorversions[NFSD_SUPPORTED_MINOR_VERSION + 1] = { - [0] = 1, - [1] = 1, - [2] = 1, -}; +static bool +nfsd_support_version(int vers) +{ + if (vers >= NFSD_MINVERS && vers < NFSD_NRVERS) + return nfsd_version[vers] != NULL; + return false; +} -int nfsd_vers(int vers, enum vers_op change) +static bool * +nfsd_alloc_versions(void) +{ + bool *vers = kmalloc_array(NFSD_NRVERS, sizeof(bool), GFP_KERNEL); + unsigned i; + + if (vers) { + /* All compiled versions are enabled by default */ + for (i = 0; i < NFSD_NRVERS; i++) + vers[i] = nfsd_support_version(i); + } + return vers; +} + +static bool * +nfsd_alloc_minorversions(void) +{ + bool *vers = kmalloc_array(NFSD_SUPPORTED_MINOR_VERSION + 1, + sizeof(bool), GFP_KERNEL); + unsigned i; + + if (vers) { + /* All minor versions are enabled by default */ + for (i = 0; i <= NFSD_SUPPORTED_MINOR_VERSION; i++) + vers[i] = nfsd_support_version(4); + } + return vers; +} + +void +nfsd_netns_free_versions(struct nfsd_net *nn) +{ + kfree(nn->nfsd_versions); + kfree(nn->nfsd4_minorversions); + nn->nfsd_versions = NULL; + nn->nfsd4_minorversions = NULL; +} + +static void +nfsd_netns_init_versions(struct nfsd_net *nn) +{ + if (!nn->nfsd_versions) { + nn->nfsd_versions = nfsd_alloc_versions(); + nn->nfsd4_minorversions = nfsd_alloc_minorversions(); + if (!nn->nfsd_versions || !nn->nfsd4_minorversions) + nfsd_netns_free_versions(nn); + } +} + +int nfsd_vers(struct nfsd_net *nn, int vers, enum vers_op change) { if (vers < NFSD_MINVERS || vers >= NFSD_NRVERS) return 0; switch(change) { case NFSD_SET: - nfsd_versions[vers] = nfsd_version[vers]; -#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) - if (vers < NFSD_ACL_NRVERS) - nfsd_acl_versions[vers] = nfsd_acl_version[vers]; -#endif + if (nn->nfsd_versions) + nn->nfsd_versions[vers] = nfsd_support_version(vers); break; case NFSD_CLEAR: - nfsd_versions[vers] = NULL; -#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) - if (vers < NFSD_ACL_NRVERS) - nfsd_acl_versions[vers] = NULL; -#endif + nfsd_netns_init_versions(nn); + if (nn->nfsd_versions) + nn->nfsd_versions[vers] = false; break; case NFSD_TEST: - return nfsd_versions[vers] != NULL; + if (nn->nfsd_versions) + return nn->nfsd_versions[vers]; + /* Fallthrough */ case NFSD_AVAIL: - return nfsd_version[vers] != NULL; + return nfsd_support_version(vers); } return 0; } static void -nfsd_adjust_nfsd_versions4(void) +nfsd_adjust_nfsd_versions4(struct nfsd_net *nn) { unsigned i; for (i = 0; i <= NFSD_SUPPORTED_MINOR_VERSION; i++) { - if (nfsd_supported_minorversions[i]) + if (nn->nfsd4_minorversions[i]) return; } - nfsd_vers(4, NFSD_CLEAR); + nfsd_vers(nn, 4, NFSD_CLEAR); } -int nfsd_minorversion(u32 minorversion, enum vers_op change) +int nfsd_minorversion(struct nfsd_net *nn, u32 minorversion, enum vers_op change) { if (minorversion > NFSD_SUPPORTED_MINOR_VERSION && change != NFSD_AVAIL) return -1; + switch(change) { case NFSD_SET: - nfsd_supported_minorversions[minorversion] = true; - nfsd_vers(4, NFSD_SET); + if (nn->nfsd4_minorversions) { + nfsd_vers(nn, 4, NFSD_SET); + nn->nfsd4_minorversions[minorversion] = + nfsd_vers(nn, 4, NFSD_TEST); + } break; case NFSD_CLEAR: - nfsd_supported_minorversions[minorversion] = false; - nfsd_adjust_nfsd_versions4(); + nfsd_netns_init_versions(nn); + if (nn->nfsd4_minorversions) { + nn->nfsd4_minorversions[minorversion] = false; + nfsd_adjust_nfsd_versions4(nn); + } break; case NFSD_TEST: - return nfsd_supported_minorversions[minorversion]; + if (nn->nfsd4_minorversions) + return nn->nfsd4_minorversions[minorversion]; + return nfsd_vers(nn, 4, NFSD_TEST); case NFSD_AVAIL: - return minorversion <= NFSD_SUPPORTED_MINOR_VERSION; + return minorversion <= NFSD_SUPPORTED_MINOR_VERSION && + nfsd_vers(nn, 4, NFSD_AVAIL); } return 0; } @@ -280,13 +343,9 @@ static void nfsd_shutdown_generic(void) nfsd_racache_shutdown(); } -static bool nfsd_needs_lockd(void) +static bool nfsd_needs_lockd(struct nfsd_net *nn) { -#if defined(CONFIG_NFSD_V3) - return (nfsd_versions[2] != NULL) || (nfsd_versions[3] != NULL); -#else - return (nfsd_versions[2] != NULL); -#endif + return nfsd_vers(nn, 2, NFSD_TEST) || nfsd_vers(nn, 3, NFSD_TEST); } static int nfsd_startup_net(int nrservs, struct net *net) @@ -304,7 +363,7 @@ static int nfsd_startup_net(int nrservs, struct net *net) if (ret) goto out_socks; - if (nfsd_needs_lockd() && !nn->lockd_up) { + if (nfsd_needs_lockd(nn) && !nn->lockd_up) { ret = lockd_up(net); if (ret) goto out_socks; @@ -437,20 +496,20 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net) nfsd_export_flush(net); } -void nfsd_reset_versions(void) +void nfsd_reset_versions(struct nfsd_net *nn) { int i; for (i = 0; i < NFSD_NRVERS; i++) - if (nfsd_vers(i, NFSD_TEST)) + if (nfsd_vers(nn, i, NFSD_TEST)) return; for (i = 0; i < NFSD_NRVERS; i++) if (i != 4) - nfsd_vers(i, NFSD_SET); + nfsd_vers(nn, i, NFSD_SET); else { int minor = 0; - while (nfsd_minorversion(minor, NFSD_SET) >= 0) + while (nfsd_minorversion(nn, minor, NFSD_SET) >= 0) minor++; } } @@ -518,7 +577,7 @@ int nfsd_create_serv(struct net *net) } if (nfsd_max_blksize == 0) nfsd_max_blksize = nfsd_get_default_max_blksize(); - nfsd_reset_versions(); + nfsd_reset_versions(nn); nn->nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, &nfsd_thread_sv_ops); if (nn->nfsd_serv == NULL) @@ -697,11 +756,44 @@ nfsd_acl_rpcbind_set(struct net *net, const struct svc_program *progp, unsigned short port) { if (!nfsd_support_acl_version(version) || - !nfsd_vers(version, NFSD_TEST)) + !nfsd_vers(net_generic(net, nfsd_net_id), version, NFSD_TEST)) return 0; return svc_generic_rpcbind_set(net, progp, version, family, proto, port); } + +static __be32 +nfsd_acl_init_request(struct svc_rqst *rqstp, + const struct svc_program *progp, + struct svc_process_info *ret) +{ + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + int i; + + if (likely(nfsd_support_acl_version(rqstp->rq_vers) && + nfsd_vers(nn, rqstp->rq_vers, NFSD_TEST))) + return svc_generic_init_request(rqstp, progp, ret); + + ret->mismatch.lovers = NFSD_ACL_NRVERS; + for (i = NFSD_ACL_MINVERS; i < NFSD_ACL_NRVERS; i++) { + if (nfsd_support_acl_version(rqstp->rq_vers) && + nfsd_vers(nn, i, NFSD_TEST)) { + ret->mismatch.lovers = i; + break; + } + } + if (ret->mismatch.lovers == NFSD_ACL_NRVERS) + return rpc_prog_unavail; + ret->mismatch.hivers = NFSD_ACL_MINVERS; + for (i = NFSD_ACL_NRVERS - 1; i >= NFSD_ACL_MINVERS; i--) { + if (nfsd_support_acl_version(rqstp->rq_vers) && + nfsd_vers(nn, i, NFSD_TEST)) { + ret->mismatch.hivers = i; + break; + } + } + return rpc_prog_mismatch; +} #endif static int @@ -709,12 +801,42 @@ nfsd_rpcbind_set(struct net *net, const struct svc_program *progp, u32 version, int family, unsigned short proto, unsigned short port) { - if (!nfsd_vers(version, NFSD_TEST)) + if (!nfsd_vers(net_generic(net, nfsd_net_id), version, NFSD_TEST)) return 0; return svc_generic_rpcbind_set(net, progp, version, family, proto, port); } +static __be32 +nfsd_init_request(struct svc_rqst *rqstp, + const struct svc_program *progp, + struct svc_process_info *ret) +{ + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + int i; + + if (likely(nfsd_vers(nn, rqstp->rq_vers, NFSD_TEST))) + return svc_generic_init_request(rqstp, progp, ret); + + ret->mismatch.lovers = NFSD_NRVERS; + for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++) { + if (nfsd_vers(nn, i, NFSD_TEST)) { + ret->mismatch.lovers = i; + break; + } + } + if (ret->mismatch.lovers == NFSD_NRVERS) + return rpc_prog_unavail; + ret->mismatch.hivers = NFSD_MINVERS; + for (i = NFSD_NRVERS - 1; i >= NFSD_MINVERS; i--) { + if (nfsd_vers(nn, i, NFSD_TEST)) { + ret->mismatch.hivers = i; + break; + } + } + return rpc_prog_mismatch; +} + /* * This is the NFS server kernel thread */ From 4df493a260d4c1844437b28009013d5dc408d0df Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 9 Apr 2019 12:13:37 -0400 Subject: [PATCH 14/23] SUNRPC: Cache the process user cred in the RPC server listener In order to be able to interpret uids and gids correctly in knfsd, we should cache the user namespace of the process that created the RPC server's listener. To do so, we refcount the credential of that process. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/lockd/svc.c | 3 ++- fs/nfs/callback.c | 7 +++++-- fs/nfsd/nfsctl.c | 16 ++++++++-------- fs/nfsd/nfsd.h | 2 +- fs/nfsd/nfssvc.c | 14 +++++++------- include/linux/sunrpc/svc_xprt.h | 4 +++- include/linux/sunrpc/svcsock.h | 3 ++- net/sunrpc/svc_xprt.c | 12 ++++++++---- net/sunrpc/svcsock.c | 4 +++- 9 files changed, 39 insertions(+), 26 deletions(-) diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 96bb74c919f9..82aa0f35e43d 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -190,12 +190,13 @@ static int create_lockd_listener(struct svc_serv *serv, const char *name, struct net *net, const int family, const unsigned short port) { + const struct cred *cred = current_cred(); struct svc_xprt *xprt; xprt = svc_find_xprt(serv, name, net, family, 0); if (xprt == NULL) return svc_create_xprt(serv, name, net, family, port, - SVC_SOCK_DEFAULTS); + SVC_SOCK_DEFAULTS, cred); svc_xprt_put(xprt); return 0; } diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 15c9575e0e7a..7817ad94a6ba 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -41,11 +41,13 @@ static struct svc_program nfs4_callback_program; static int nfs4_callback_up_net(struct svc_serv *serv, struct net *net) { + const struct cred *cred = current_cred(); int ret; struct nfs_net *nn = net_generic(net, nfs_net_id); ret = svc_create_xprt(serv, "tcp", net, PF_INET, - nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); + nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS, + cred); if (ret <= 0) goto out_err; nn->nfs_callback_tcpport = ret; @@ -53,7 +55,8 @@ static int nfs4_callback_up_net(struct svc_serv *serv, struct net *net) nn->nfs_callback_tcpport, PF_INET, net->ns.inum); ret = svc_create_xprt(serv, "tcp", net, PF_INET6, - nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); + nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS, + cred); if (ret > 0) { nn->nfs_callback_tcpport6 = ret; dprintk("NFS: Callback listener port = %u (af %u, net %x)\n", diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 630d629090be..90972e1fd785 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -439,7 +439,7 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size) return rv; if (newthreads < 0) return -EINVAL; - rv = nfsd_svc(newthreads, net); + rv = nfsd_svc(newthreads, net, file->f_cred); if (rv < 0) return rv; } else @@ -717,7 +717,7 @@ static ssize_t __write_ports_names(char *buf, struct net *net) * a socket of a supported family/protocol, and we use it as an * nfsd listener. */ -static ssize_t __write_ports_addfd(char *buf, struct net *net) +static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred *cred) { char *mesg = buf; int fd, err; @@ -736,7 +736,7 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net) if (err != 0) return err; - err = svc_addsock(nn->nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT); + err = svc_addsock(nn->nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT, cred); if (err < 0) { nfsd_destroy(net); return err; @@ -751,7 +751,7 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net) * A transport listener is added by writing it's transport name and * a port number. */ -static ssize_t __write_ports_addxprt(char *buf, struct net *net) +static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cred *cred) { char transport[16]; struct svc_xprt *xprt; @@ -769,12 +769,12 @@ static ssize_t __write_ports_addxprt(char *buf, struct net *net) return err; err = svc_create_xprt(nn->nfsd_serv, transport, net, - PF_INET, port, SVC_SOCK_ANONYMOUS); + PF_INET, port, SVC_SOCK_ANONYMOUS, cred); if (err < 0) goto out_err; err = svc_create_xprt(nn->nfsd_serv, transport, net, - PF_INET6, port, SVC_SOCK_ANONYMOUS); + PF_INET6, port, SVC_SOCK_ANONYMOUS, cred); if (err < 0 && err != -EAFNOSUPPORT) goto out_close; @@ -799,10 +799,10 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size, return __write_ports_names(buf, net); if (isdigit(buf[0])) - return __write_ports_addfd(buf, net); + return __write_ports_addfd(buf, net, file->f_cred); if (isalpha(buf[0])) - return __write_ports_addxprt(buf, net); + return __write_ports_addxprt(buf, net, file->f_cred); return -EINVAL; } diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 6bae2554b2b2..d200c8680259 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -73,7 +73,7 @@ extern const struct seq_operations nfs_exports_op; /* * Function prototypes. */ -int nfsd_svc(int nrservs, struct net *net); +int nfsd_svc(int nrservs, struct net *net, const struct cred *cred); int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp); int nfsd_nrthreads(struct net *); diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 520757774614..32e4d8a334e3 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -283,7 +283,7 @@ int nfsd_nrthreads(struct net *net) return rv; } -static int nfsd_init_socks(struct net *net) +static int nfsd_init_socks(struct net *net, const struct cred *cred) { int error; struct nfsd_net *nn = net_generic(net, nfsd_net_id); @@ -292,12 +292,12 @@ static int nfsd_init_socks(struct net *net) return 0; error = svc_create_xprt(nn->nfsd_serv, "udp", net, PF_INET, NFS_PORT, - SVC_SOCK_DEFAULTS); + SVC_SOCK_DEFAULTS, cred); if (error < 0) return error; error = svc_create_xprt(nn->nfsd_serv, "tcp", net, PF_INET, NFS_PORT, - SVC_SOCK_DEFAULTS); + SVC_SOCK_DEFAULTS, cred); if (error < 0) return error; @@ -348,7 +348,7 @@ static bool nfsd_needs_lockd(struct nfsd_net *nn) return nfsd_vers(nn, 2, NFSD_TEST) || nfsd_vers(nn, 3, NFSD_TEST); } -static int nfsd_startup_net(int nrservs, struct net *net) +static int nfsd_startup_net(int nrservs, struct net *net, const struct cred *cred) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); int ret; @@ -359,7 +359,7 @@ static int nfsd_startup_net(int nrservs, struct net *net) ret = nfsd_startup_generic(nrservs); if (ret) return ret; - ret = nfsd_init_socks(net); + ret = nfsd_init_socks(net, cred); if (ret) goto out_socks; @@ -697,7 +697,7 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net) * this is the first time nrservs is nonzero. */ int -nfsd_svc(int nrservs, struct net *net) +nfsd_svc(int nrservs, struct net *net, const struct cred *cred) { int error; bool nfsd_up_before; @@ -719,7 +719,7 @@ nfsd_svc(int nrservs, struct net *net) nfsd_up_before = nn->nfsd_net_up; - error = nfsd_startup_net(nrservs, net); + error = nfsd_startup_net(nrservs, net, cred); if (error) goto out_destroy; error = nn->nfsd_serv->sv_ops->svo_setup(nn->nfsd_serv, diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index b3f9577e17d6..ea6f46be9cb7 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -86,6 +86,7 @@ struct svc_xprt { struct list_head xpt_users; /* callbacks on free */ struct net *xpt_net; + const struct cred *xpt_cred; struct rpc_xprt *xpt_bc_xprt; /* NFSv4.1 backchannel */ struct rpc_xprt_switch *xpt_bc_xps; /* NFSv4.1 backchannel */ }; @@ -119,7 +120,8 @@ void svc_unreg_xprt_class(struct svc_xprt_class *); void svc_xprt_init(struct net *, struct svc_xprt_class *, struct svc_xprt *, struct svc_serv *); int svc_create_xprt(struct svc_serv *, const char *, struct net *, - const int, const unsigned short, int); + const int, const unsigned short, int, + const struct cred *); void svc_xprt_do_enqueue(struct svc_xprt *xprt); void svc_xprt_enqueue(struct svc_xprt *xprt); void svc_xprt_put(struct svc_xprt *xprt); diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index 119718a922f2..771baadaee9d 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -59,7 +59,8 @@ void svc_drop(struct svc_rqst *); void svc_sock_update_bufs(struct svc_serv *serv); bool svc_alien_sock(struct net *net, int fd); int svc_addsock(struct svc_serv *serv, const int fd, - char *name_return, const size_t len); + char *name_return, const size_t len, + const struct cred *cred); void svc_init_xprt_sock(void); void svc_cleanup_xprt_sock(void); struct svc_xprt *svc_sock_create(struct svc_serv *serv, int prot); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 61530b1b7754..b76ee83d4d43 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -136,6 +136,7 @@ static void svc_xprt_free(struct kref *kref) struct module *owner = xprt->xpt_class->xcl_owner; if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) svcauth_unix_info_release(xprt); + put_cred(xprt->xpt_cred); put_net(xprt->xpt_net); /* See comment on corresponding get in xs_setup_bc_tcp(): */ if (xprt->xpt_bc_xprt) @@ -252,7 +253,8 @@ void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *new) static int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name, struct net *net, const int family, - const unsigned short port, int flags) + const unsigned short port, int flags, + const struct cred *cred) { struct svc_xprt_class *xcl; @@ -273,6 +275,7 @@ static int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name, module_put(xcl->xcl_owner); return PTR_ERR(newxprt); } + newxprt->xpt_cred = get_cred(cred); svc_add_new_perm_xprt(serv, newxprt); newport = svc_xprt_local_port(newxprt); return newport; @@ -286,15 +289,16 @@ static int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name, int svc_create_xprt(struct svc_serv *serv, const char *xprt_name, struct net *net, const int family, - const unsigned short port, int flags) + const unsigned short port, int flags, + const struct cred *cred) { int err; dprintk("svc: creating transport %s[%d]\n", xprt_name, port); - err = _svc_create_xprt(serv, xprt_name, net, family, port, flags); + err = _svc_create_xprt(serv, xprt_name, net, family, port, flags, cred); if (err == -EPROTONOSUPPORT) { request_module("svc%s", xprt_name); - err = _svc_create_xprt(serv, xprt_name, net, family, port, flags); + err = _svc_create_xprt(serv, xprt_name, net, family, port, flags, cred); } if (err < 0) dprintk("svc: transport %s not found, err %d\n", diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 43590a968b73..540fde2804d0 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1332,13 +1332,14 @@ EXPORT_SYMBOL_GPL(svc_alien_sock); * @fd: file descriptor of the new listener * @name_return: pointer to buffer to fill in with name of listener * @len: size of the buffer + * @cred: credential * * Fills in socket name and returns positive length of name if successful. * Name is terminated with '\n'. On error, returns a negative errno * value. */ int svc_addsock(struct svc_serv *serv, const int fd, char *name_return, - const size_t len) + const size_t len, const struct cred *cred) { int err = 0; struct socket *so = sockfd_lookup(fd, &err); @@ -1371,6 +1372,7 @@ int svc_addsock(struct svc_serv *serv, const int fd, char *name_return, salen = kernel_getsockname(svsk->sk_sock, sin); if (salen >= 0) svc_xprt_set_local(&svsk->sk_xprt, sin, salen); + svsk->sk_xprt.xpt_cred = get_cred(cred); svc_add_new_perm_xprt(serv, &svsk->sk_xprt); return svc_one_sock_name(svsk, name_return, len); out: From 1237d3545c6715edfd09b94d27585069cd68274c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 9 Apr 2019 12:13:38 -0400 Subject: [PATCH 15/23] SUNRPC: Temporary sockets should inherit the cred from their parent Temporary sockets should inherit the credential (and hence the user namespace) from the parent listener transport. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- net/sunrpc/svc_xprt.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index b76ee83d4d43..9429b28e9ba0 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -786,9 +786,10 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) __module_get(xprt->xpt_class->xcl_owner); svc_check_conn_limits(xprt->xpt_server); newxpt = xprt->xpt_ops->xpo_accept(xprt); - if (newxpt) + if (newxpt) { + newxpt->xpt_cred = get_cred(xprt->xpt_cred); svc_add_new_temp_xprt(serv, newxpt); - else + } else module_put(xprt->xpt_class->xcl_owner); } else if (svc_xprt_reserve_slot(rqstp, xprt)) { /* XPT_DATA|XPT_DEFERRED case: */ From 40373b125de6bab186e71d5ea5498bb2b845398b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 9 Apr 2019 12:13:39 -0400 Subject: [PATCH 16/23] lockd: Pass the user cred from knfsd when starting the lockd server When starting up a new knfsd server, pass the user cred to the supporting lockd server. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/lockd/clntlock.c | 4 ++-- fs/lockd/svc.c | 28 ++++++++++++++++------------ fs/nfs/client.c | 1 + fs/nfsd/nfssvc.c | 2 +- include/linux/lockd/bind.h | 3 ++- 5 files changed, 22 insertions(+), 16 deletions(-) diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index c2a128678e6e..d61b72b693cf 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -56,7 +56,7 @@ struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init) u32 nlm_version = (nlm_init->nfs_version == 2) ? 1 : 4; int status; - status = lockd_up(nlm_init->net); + status = lockd_up(nlm_init->net, nlm_init->cred); if (status < 0) return ERR_PTR(status); @@ -241,7 +241,7 @@ reclaimer(void *ptr) allow_signal(SIGKILL); down_write(&host->h_rwsem); - lockd_up(net); /* note: this cannot fail as lockd is already running */ + lockd_up(net, NULL); /* note: this cannot fail as lockd is already running */ dprintk("lockd: reclaiming locks for host %s\n", host->h_name); diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 82aa0f35e43d..3056f3a0c270 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -188,9 +188,9 @@ lockd(void *vrqstp) static int create_lockd_listener(struct svc_serv *serv, const char *name, struct net *net, const int family, - const unsigned short port) + const unsigned short port, + const struct cred *cred) { - const struct cred *cred = current_cred(); struct svc_xprt *xprt; xprt = svc_find_xprt(serv, name, net, family, 0); @@ -202,15 +202,17 @@ static int create_lockd_listener(struct svc_serv *serv, const char *name, } static int create_lockd_family(struct svc_serv *serv, struct net *net, - const int family) + const int family, const struct cred *cred) { int err; - err = create_lockd_listener(serv, "udp", net, family, nlm_udpport); + err = create_lockd_listener(serv, "udp", net, family, nlm_udpport, + cred); if (err < 0) return err; - return create_lockd_listener(serv, "tcp", net, family, nlm_tcpport); + return create_lockd_listener(serv, "tcp", net, family, nlm_tcpport, + cred); } /* @@ -223,16 +225,17 @@ static int create_lockd_family(struct svc_serv *serv, struct net *net, * Returns zero if all listeners are available; otherwise a * negative errno value is returned. */ -static int make_socks(struct svc_serv *serv, struct net *net) +static int make_socks(struct svc_serv *serv, struct net *net, + const struct cred *cred) { static int warned; int err; - err = create_lockd_family(serv, net, PF_INET); + err = create_lockd_family(serv, net, PF_INET, cred); if (err < 0) goto out_err; - err = create_lockd_family(serv, net, PF_INET6); + err = create_lockd_family(serv, net, PF_INET6, cred); if (err < 0 && err != -EAFNOSUPPORT) goto out_err; @@ -247,7 +250,8 @@ out_err: return err; } -static int lockd_up_net(struct svc_serv *serv, struct net *net) +static int lockd_up_net(struct svc_serv *serv, struct net *net, + const struct cred *cred) { struct lockd_net *ln = net_generic(net, lockd_net_id); int error; @@ -259,7 +263,7 @@ static int lockd_up_net(struct svc_serv *serv, struct net *net) if (error) goto err_bind; - error = make_socks(serv, net); + error = make_socks(serv, net, cred); if (error < 0) goto err_bind; set_grace_period(net); @@ -462,7 +466,7 @@ static struct svc_serv *lockd_create_svc(void) /* * Bring up the lockd process if it's not already up. */ -int lockd_up(struct net *net) +int lockd_up(struct net *net, const struct cred *cred) { struct svc_serv *serv; int error; @@ -475,7 +479,7 @@ int lockd_up(struct net *net) goto err_create; } - error = lockd_up_net(serv, net); + error = lockd_up_net(serv, net, cred); if (error < 0) { lockd_unregister_notifiers(); goto err_put; diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 90d71fda65ce..92d07d99829d 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -552,6 +552,7 @@ static int nfs_start_lockd(struct nfs_server *server) 1 : 0, .net = clp->cl_net, .nlmclnt_ops = clp->cl_nfs_mod->rpc_ops->nlmclnt_ops, + .cred = current_cred(), }; if (nlm_init.nfs_version > 3) diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 32e4d8a334e3..18d94ea984ba 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -364,7 +364,7 @@ static int nfsd_startup_net(int nrservs, struct net *net, const struct cred *cre goto out_socks; if (nfsd_needs_lockd(nn) && !nn->lockd_up) { - ret = lockd_up(net); + ret = lockd_up(net, cred); if (ret) goto out_socks; nn->lockd_up = 1; diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h index 053a4ef3d431..0520c0cd73f4 100644 --- a/include/linux/lockd/bind.h +++ b/include/linux/lockd/bind.h @@ -46,6 +46,7 @@ struct nlmclnt_initdata { int noresvport; struct net *net; const struct nlmclnt_operations *nlmclnt_ops; + const struct cred *cred; }; /* @@ -75,7 +76,7 @@ struct nlmclnt_operations { }; extern int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl, void *data); -extern int lockd_up(struct net *net); +extern int lockd_up(struct net *net, const struct cred *cred); extern void lockd_down(struct net *net); #endif /* LINUX_LOCKD_BIND_H */ From ccfe51a5161c17d24157c08f9963f351a89268fa Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 9 Apr 2019 12:13:40 -0400 Subject: [PATCH 17/23] SUNRPC: Fix the server AUTH_UNIX userspace mappings gid_parse() is part of a downcall, so uids and gids should be assumed encoded using the current user namespace. svcauth_unix_accept() is, on the other hand, decoding uids and gids from the wire, so we assume those are encoded to match the user namespace of the server process. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- net/sunrpc/svcauth_unix.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index fb9041b92f72..f92ef79c8ea5 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -500,7 +500,7 @@ static int unix_gid_parse(struct cache_detail *cd, rv = get_int(&mesg, &id); if (rv) return -EINVAL; - uid = make_kuid(&init_user_ns, id); + uid = make_kuid(current_user_ns(), id); ug.uid = uid; expiry = get_expiry(&mesg); @@ -522,7 +522,7 @@ static int unix_gid_parse(struct cache_detail *cd, err = -EINVAL; if (rv) goto out; - kgid = make_kgid(&init_user_ns, gid); + kgid = make_kgid(current_user_ns(), gid); if (!gid_valid(kgid)) goto out; ug.gi->gid[i] = kgid; @@ -555,7 +555,7 @@ static int unix_gid_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h) { - struct user_namespace *user_ns = &init_user_ns; + struct user_namespace *user_ns = m->file->f_cred->user_ns; struct unix_gid *ug; int i; int glen; @@ -796,6 +796,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp) struct kvec *argv = &rqstp->rq_arg.head[0]; struct kvec *resv = &rqstp->rq_res.head[0]; struct svc_cred *cred = &rqstp->rq_cred; + struct user_namespace *userns; u32 slen, i; int len = argv->iov_len; @@ -816,8 +817,10 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp) * (export-specific) anonymous id by nfsd_setuser. * Supplementary gid's will be left alone. */ - cred->cr_uid = make_kuid(&init_user_ns, svc_getnl(argv)); /* uid */ - cred->cr_gid = make_kgid(&init_user_ns, svc_getnl(argv)); /* gid */ + userns = (rqstp->rq_xprt && rqstp->rq_xprt->xpt_cred) ? + rqstp->rq_xprt->xpt_cred->user_ns : &init_user_ns; + cred->cr_uid = make_kuid(userns, svc_getnl(argv)); /* uid */ + cred->cr_gid = make_kgid(userns, svc_getnl(argv)); /* gid */ slen = svc_getnl(argv); /* gids length */ if (slen > UNX_NGROUPS || (len -= (slen + 2)*4) < 0) goto badcred; @@ -825,7 +828,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp) if (cred->cr_group_info == NULL) return SVC_CLOSE; for (i = 0; i < slen; i++) { - kgid_t kgid = make_kgid(&init_user_ns, svc_getnl(argv)); + kgid_t kgid = make_kgid(userns, svc_getnl(argv)); cred->cr_group_info->gid[i] = kgid; } groups_sort(cred->cr_group_info); From e6667c73a27d80078f8d7fca516d6b14bc4e18a7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 9 Apr 2019 12:13:41 -0400 Subject: [PATCH 18/23] SUNRPC: rsi_parse() should use the current user namespace rsi_parse() is part of a downcall, so we must assume that the uids and gids are encoded using the current user namespace. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- net/sunrpc/auth_gss/svcauth_gss.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 0c5d7896d6dd..8be2f209982b 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -474,12 +474,12 @@ static int rsc_parse(struct cache_detail *cd, * treatment so are checked for validity here.) */ /* uid */ - rsci.cred.cr_uid = make_kuid(&init_user_ns, id); + rsci.cred.cr_uid = make_kuid(current_user_ns(), id); /* gid */ if (get_int(&mesg, &id)) goto out; - rsci.cred.cr_gid = make_kgid(&init_user_ns, id); + rsci.cred.cr_gid = make_kgid(current_user_ns(), id); /* number of additional gid's */ if (get_int(&mesg, &N)) @@ -497,7 +497,7 @@ static int rsc_parse(struct cache_detail *cd, kgid_t kgid; if (get_int(&mesg, &id)) goto out; - kgid = make_kgid(&init_user_ns, id); + kgid = make_kgid(current_user_ns(), id); if (!gid_valid(kgid)) goto out; rsci.cred.cr_group_info->gid[i] = kgid; From e45d1a1835b889676374fb74ff4d0e6b06ad173f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 9 Apr 2019 12:13:42 -0400 Subject: [PATCH 19/23] nfsd: knfsd must use the container user namespace Convert knfsd to use the user namespace of the container that started the server processes. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/export.c | 18 ++++++++++-------- fs/nfsd/nfs3xdr.c | 21 +++++++++++---------- fs/nfsd/nfs4idmap.c | 8 ++++---- fs/nfsd/nfs4xdr.c | 5 +++-- fs/nfsd/nfsd.h | 7 +++++++ fs/nfsd/nfsxdr.c | 17 +++++++++-------- 6 files changed, 44 insertions(+), 32 deletions(-) diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 802993d8912f..baa01956a5b3 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -570,13 +570,13 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) err = get_int(&mesg, &an_int); if (err) goto out3; - exp.ex_anon_uid= make_kuid(&init_user_ns, an_int); + exp.ex_anon_uid= make_kuid(current_user_ns(), an_int); /* anon gid */ err = get_int(&mesg, &an_int); if (err) goto out3; - exp.ex_anon_gid= make_kgid(&init_user_ns, an_int); + exp.ex_anon_gid= make_kgid(current_user_ns(), an_int); /* fsid */ err = get_int(&mesg, &an_int); @@ -1170,15 +1170,17 @@ static void show_secinfo(struct seq_file *m, struct svc_export *exp) static void exp_flags(struct seq_file *m, int flag, int fsid, kuid_t anonu, kgid_t anong, struct nfsd4_fs_locations *fsloc) { + struct user_namespace *userns = m->file->f_cred->user_ns; + show_expflags(m, flag, NFSEXP_ALLFLAGS); if (flag & NFSEXP_FSID) seq_printf(m, ",fsid=%d", fsid); - if (!uid_eq(anonu, make_kuid(&init_user_ns, (uid_t)-2)) && - !uid_eq(anonu, make_kuid(&init_user_ns, 0x10000-2))) - seq_printf(m, ",anonuid=%u", from_kuid(&init_user_ns, anonu)); - if (!gid_eq(anong, make_kgid(&init_user_ns, (gid_t)-2)) && - !gid_eq(anong, make_kgid(&init_user_ns, 0x10000-2))) - seq_printf(m, ",anongid=%u", from_kgid(&init_user_ns, anong)); + if (!uid_eq(anonu, make_kuid(userns, (uid_t)-2)) && + !uid_eq(anonu, make_kuid(userns, 0x10000-2))) + seq_printf(m, ",anonuid=%u", from_kuid_munged(userns, anonu)); + if (!gid_eq(anong, make_kgid(userns, (gid_t)-2)) && + !gid_eq(anong, make_kgid(userns, 0x10000-2))) + seq_printf(m, ",anongid=%u", from_kgid_munged(userns, anong)); if (fsloc && fsloc->locations_count > 0) { char *loctype = (fsloc->migrated) ? "refer" : "replicas"; int i; diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 8d789124ed3c..fcf31822c74c 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -96,7 +96,7 @@ decode_filename(__be32 *p, char **namp, unsigned int *lenp) } static __be32 * -decode_sattr3(__be32 *p, struct iattr *iap) +decode_sattr3(__be32 *p, struct iattr *iap, struct user_namespace *userns) { u32 tmp; @@ -107,12 +107,12 @@ decode_sattr3(__be32 *p, struct iattr *iap) iap->ia_mode = ntohl(*p++); } if (*p++) { - iap->ia_uid = make_kuid(&init_user_ns, ntohl(*p++)); + iap->ia_uid = make_kuid(userns, ntohl(*p++)); if (uid_valid(iap->ia_uid)) iap->ia_valid |= ATTR_UID; } if (*p++) { - iap->ia_gid = make_kgid(&init_user_ns, ntohl(*p++)); + iap->ia_gid = make_kgid(userns, ntohl(*p++)); if (gid_valid(iap->ia_gid)) iap->ia_valid |= ATTR_GID; } @@ -165,12 +165,13 @@ static __be32 * encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, struct kstat *stat) { + struct user_namespace *userns = nfsd_user_namespace(rqstp); struct timespec ts; *p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]); *p++ = htonl((u32) (stat->mode & S_IALLUGO)); *p++ = htonl((u32) stat->nlink); - *p++ = htonl((u32) from_kuid(&init_user_ns, stat->uid)); - *p++ = htonl((u32) from_kgid(&init_user_ns, stat->gid)); + *p++ = htonl((u32) from_kuid_munged(userns, stat->uid)); + *p++ = htonl((u32) from_kgid_munged(userns, stat->gid)); if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN) { p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN); } else { @@ -325,7 +326,7 @@ nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p) p = decode_fh(p, &args->fh); if (!p) return 0; - p = decode_sattr3(p, &args->attrs); + p = decode_sattr3(p, &args->attrs, nfsd_user_namespace(rqstp)); if ((args->check_guard = ntohl(*p++)) != 0) { struct timespec time; @@ -455,7 +456,7 @@ nfs3svc_decode_createargs(struct svc_rqst *rqstp, __be32 *p) switch (args->createmode = ntohl(*p++)) { case NFS3_CREATE_UNCHECKED: case NFS3_CREATE_GUARDED: - p = decode_sattr3(p, &args->attrs); + p = decode_sattr3(p, &args->attrs, nfsd_user_namespace(rqstp)); break; case NFS3_CREATE_EXCLUSIVE: args->verf = p; @@ -476,7 +477,7 @@ nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, __be32 *p) if (!(p = decode_fh(p, &args->fh)) || !(p = decode_filename(p, &args->name, &args->len))) return 0; - p = decode_sattr3(p, &args->attrs); + p = decode_sattr3(p, &args->attrs, nfsd_user_namespace(rqstp)); return xdr_argsize_check(rqstp, p); } @@ -491,7 +492,7 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p) if (!(p = decode_fh(p, &args->ffh)) || !(p = decode_filename(p, &args->fname, &args->flen))) return 0; - p = decode_sattr3(p, &args->attrs); + p = decode_sattr3(p, &args->attrs, nfsd_user_namespace(rqstp)); args->tlen = ntohl(*p++); @@ -519,7 +520,7 @@ nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, __be32 *p) if (args->ftype == NF3BLK || args->ftype == NF3CHR || args->ftype == NF3SOCK || args->ftype == NF3FIFO) - p = decode_sattr3(p, &args->attrs); + p = decode_sattr3(p, &args->attrs, nfsd_user_namespace(rqstp)); if (args->ftype == NF3BLK || args->ftype == NF3CHR) { args->major = ntohl(*p++); diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index bf137fec33ff..2961016097ac 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -634,7 +634,7 @@ nfsd_map_name_to_uid(struct svc_rqst *rqstp, const char *name, size_t namelen, return nfserr_inval; status = do_name_to_id(rqstp, IDMAP_TYPE_USER, name, namelen, &id); - *uid = make_kuid(&init_user_ns, id); + *uid = make_kuid(nfsd_user_namespace(rqstp), id); if (!uid_valid(*uid)) status = nfserr_badowner; return status; @@ -651,7 +651,7 @@ nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen, return nfserr_inval; status = do_name_to_id(rqstp, IDMAP_TYPE_GROUP, name, namelen, &id); - *gid = make_kgid(&init_user_ns, id); + *gid = make_kgid(nfsd_user_namespace(rqstp), id); if (!gid_valid(*gid)) status = nfserr_badowner; return status; @@ -660,13 +660,13 @@ nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen, __be32 nfsd4_encode_user(struct xdr_stream *xdr, struct svc_rqst *rqstp, kuid_t uid) { - u32 id = from_kuid(&init_user_ns, uid); + u32 id = from_kuid_munged(nfsd_user_namespace(rqstp), uid); return encode_name_from_id(xdr, rqstp, IDMAP_TYPE_USER, id); } __be32 nfsd4_encode_group(struct xdr_stream *xdr, struct svc_rqst *rqstp, kgid_t gid) { - u32 id = from_kgid(&init_user_ns, gid); + u32 id = from_kgid_munged(nfsd_user_namespace(rqstp), gid); return encode_name_from_id(xdr, rqstp, IDMAP_TYPE_GROUP, id); } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index a3a3455826aa..52c4f6daa649 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -521,6 +521,7 @@ nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_cb_sec *cbs) { DECODE_HEAD; + struct user_namespace *userns = nfsd_user_namespace(argp->rqstp); u32 dummy, uid, gid; char *machine_name; int i; @@ -563,8 +564,8 @@ static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_ dummy = be32_to_cpup(p++); READ_BUF(dummy * 4); if (cbs->flavor == (u32)(-1)) { - kuid_t kuid = make_kuid(&init_user_ns, uid); - kgid_t kgid = make_kgid(&init_user_ns, gid); + kuid_t kuid = make_kuid(userns, uid); + kgid_t kgid = make_kgid(userns, gid); if (uid_valid(kuid) && gid_valid(kgid)) { cbs->uid = kuid; cbs->gid = kgid; diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index d200c8680259..24187b5dd638 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -112,6 +113,12 @@ static inline int nfsd_v4client(struct svc_rqst *rq) { return rq->rq_prog == NFS_PROGRAM && rq->rq_vers == 4; } +static inline struct user_namespace * +nfsd_user_namespace(const struct svc_rqst *rqstp) +{ + const struct cred *cred = rqstp->rq_xprt->xpt_cred; + return cred ? cred->user_ns : &init_user_ns; +} /* * NFSv4 State diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 6b2e8b73d36e..b51fe515f06f 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -71,7 +71,7 @@ decode_filename(__be32 *p, char **namp, unsigned int *lenp) } static __be32 * -decode_sattr(__be32 *p, struct iattr *iap) +decode_sattr(__be32 *p, struct iattr *iap, struct user_namespace *userns) { u32 tmp, tmp1; @@ -86,12 +86,12 @@ decode_sattr(__be32 *p, struct iattr *iap) iap->ia_mode = tmp; } if ((tmp = ntohl(*p++)) != (u32)-1) { - iap->ia_uid = make_kuid(&init_user_ns, tmp); + iap->ia_uid = make_kuid(userns, tmp); if (uid_valid(iap->ia_uid)) iap->ia_valid |= ATTR_UID; } if ((tmp = ntohl(*p++)) != (u32)-1) { - iap->ia_gid = make_kgid(&init_user_ns, tmp); + iap->ia_gid = make_kgid(userns, tmp); if (gid_valid(iap->ia_gid)) iap->ia_valid |= ATTR_GID; } @@ -129,6 +129,7 @@ static __be32 * encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, struct kstat *stat) { + struct user_namespace *userns = nfsd_user_namespace(rqstp); struct dentry *dentry = fhp->fh_dentry; int type; struct timespec64 time; @@ -139,8 +140,8 @@ encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, *p++ = htonl(nfs_ftypes[type >> 12]); *p++ = htonl((u32) stat->mode); *p++ = htonl((u32) stat->nlink); - *p++ = htonl((u32) from_kuid(&init_user_ns, stat->uid)); - *p++ = htonl((u32) from_kgid(&init_user_ns, stat->gid)); + *p++ = htonl((u32) from_kuid_munged(userns, stat->uid)); + *p++ = htonl((u32) from_kgid_munged(userns, stat->gid)); if (S_ISLNK(type) && stat->size > NFS_MAXPATHLEN) { *p++ = htonl(NFS_MAXPATHLEN); @@ -216,7 +217,7 @@ nfssvc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p) p = decode_fh(p, &args->fh); if (!p) return 0; - p = decode_sattr(p, &args->attrs); + p = decode_sattr(p, &args->attrs, nfsd_user_namespace(rqstp)); return xdr_argsize_check(rqstp, p); } @@ -319,7 +320,7 @@ nfssvc_decode_createargs(struct svc_rqst *rqstp, __be32 *p) if ( !(p = decode_fh(p, &args->fh)) || !(p = decode_filename(p, &args->name, &args->len))) return 0; - p = decode_sattr(p, &args->attrs); + p = decode_sattr(p, &args->attrs, nfsd_user_namespace(rqstp)); return xdr_argsize_check(rqstp, p); } @@ -398,7 +399,7 @@ nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p) return 0; p += xdrlen; } - decode_sattr(p, &args->attrs); + decode_sattr(p, &args->attrs, nfsd_user_namespace(rqstp)); return 1; } From 0b8f62625dc309651d0efcb6a6247c933acd8b45 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 12 Apr 2019 16:37:30 -0400 Subject: [PATCH 20/23] nfsd: allow fh_want_write to be called twice A fuzzer recently triggered lockdep warnings about potential sb_writers deadlocks caused by fh_want_write(). Looks like we aren't careful to pair each fh_want_write() with an fh_drop_write(). It's not normally a problem since fh_put() will call fh_drop_write() for us. And was OK for NFSv3 where we'd do one operation that might call fh_want_write(), and then put the filehandle. But an NFSv4 protocol fuzzer can do weird things like call unlink twice in a compound, and then we get into trouble. I'm a little worried about this approach of just leaving everything to fh_put(). But I think there are probably a lot of fh_want_write()/fh_drop_write() imbalances so for now I think we need it to be more forgiving. Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index a7e107309f76..db351247892d 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -120,8 +120,11 @@ void nfsd_put_raparams(struct file *file, struct raparms *ra); static inline int fh_want_write(struct svc_fh *fh) { - int ret = mnt_want_write(fh->fh_export->ex_path.mnt); + int ret; + if (fh->fh_want_write) + return 0; + ret = mnt_want_write(fh->fh_export->ex_path.mnt); if (!ret) fh->fh_want_write = true; return ret; From 0ca0c9d7edcf4996da3ebf07947c4484d1e3de16 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 12 Apr 2019 16:26:30 -0400 Subject: [PATCH 21/23] nfsd: fh_drop_write in nfsd_unlink fh_want_write() can now be called twice, but I'm also fixing up the callers not to do that. Other cases include setattr and create. Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 7dc98e14655d..fc24ee47eab5 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1786,12 +1786,12 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, rdentry = lookup_one_len(fname, dentry, flen); host_err = PTR_ERR(rdentry); if (IS_ERR(rdentry)) - goto out_nfserr; + goto out_drop_write; if (d_really_is_negative(rdentry)) { dput(rdentry); - err = nfserr_noent; - goto out; + host_err = -ENOENT; + goto out_drop_write; } if (!type) @@ -1805,6 +1805,8 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, host_err = commit_metadata(fhp); dput(rdentry); +out_drop_write: + fh_drop_write(fhp); out_nfserr: err = nfserrno(host_err); out: From 5926459e7c897ee348c134f44da520c8bf234b05 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 24 Apr 2019 12:00:08 +1000 Subject: [PATCH 22/23] locks: move checks from locks_free_lock() to locks_release_private() Code that allocates locks using locks_alloc_lock() will free it using locks_free_lock(), and will benefit from the BUG_ON() consistency checks therein. However some code (nfsd and lockd) allocate a lock embedded in some other data structure, and so free the lock themselves after calling locks_release_private(). This path does not benefit from the consistency checks. To help catch future errors, move the BUG_ON() checks to locks_release_private() - which locks_free_lock() already calls. This ensures that all users for locks will find out if the lock isn't detached properly before being free. Signed-off-by: NeilBrown Reviewed-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/locks.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/locks.c b/fs/locks.c index 71d0c6c2aac5..456a3782c6ca 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -352,6 +352,12 @@ EXPORT_SYMBOL_GPL(locks_alloc_lock); void locks_release_private(struct file_lock *fl) { + BUG_ON(waitqueue_active(&fl->fl_wait)); + BUG_ON(!list_empty(&fl->fl_list)); + BUG_ON(!list_empty(&fl->fl_blocked_requests)); + BUG_ON(!list_empty(&fl->fl_blocked_member)); + BUG_ON(!hlist_unhashed(&fl->fl_link)); + if (fl->fl_ops) { if (fl->fl_ops->fl_release_private) fl->fl_ops->fl_release_private(fl); @@ -371,12 +377,6 @@ EXPORT_SYMBOL_GPL(locks_release_private); /* Free a lock which is not in use. */ void locks_free_lock(struct file_lock *fl) { - BUG_ON(waitqueue_active(&fl->fl_wait)); - BUG_ON(!list_empty(&fl->fl_list)); - BUG_ON(!list_empty(&fl->fl_blocked_requests)); - BUG_ON(!list_empty(&fl->fl_blocked_member)); - BUG_ON(!hlist_unhashed(&fl->fl_link)); - locks_release_private(fl); kmem_cache_free(filelock_cache, fl); } From 1c73b9d24f804935dbb06527b768f8f068c93472 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Thu, 2 May 2019 13:32:12 -0400 Subject: [PATCH 23/23] nfsd: update callback done processing Instead of having the convention where individual nfsd4_callback_ops->done operations return -1 to indicate the callback path is down, move the check to nfsd4_cb_done. Only mark the callback path down on transport-level errors, not NFS-level errors. The existing logic causes the server to set SEQ4_STATUS_CB_PATH_DOWN just because the client returned an error to a CB_RECALL for a delegation that the client had already done a FREE_STATEID for. But clearly that error doesn't mean that there's anything wrong with the backchannel. Additionally, handle NFS4ERR_DELAY in nfsd4_cb_recall_done. The client returns NFS4ERR_DELAY if it is already in the process of returning the delegation. Signed-off-by: Scott Mayhew Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 9 +++++---- fs/nfsd/nfs4layouts.c | 2 +- fs/nfsd/nfs4state.c | 5 ++++- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 7caa3801ce72..b9cbd7189d74 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -1122,10 +1122,11 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata) rpc_restart_call_prepare(task); return; case 1: - break; - case -1: - /* Network partition? */ - nfsd4_mark_cb_down(clp, task->tk_status); + switch (task->tk_status) { + case -EIO: + case -ETIMEDOUT: + nfsd4_mark_cb_down(clp, task->tk_status); + } break; default: BUG(); diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index 44517fb5c0de..a79e24b79095 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -693,7 +693,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task) ops->fence_client(ls); else nfsd4_cb_layout_fail(ls); - return -1; + return 1; case -NFS4ERR_NOMATCHING_LAYOUT: trace_nfsd_layout_recall_done(&ls->ls_stid.sc_stateid); task->tk_status = 0; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 8078314981f5..dfe4b596c2e5 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3970,6 +3970,9 @@ static int nfsd4_cb_recall_done(struct nfsd4_callback *cb, switch (task->tk_status) { case 0: return 1; + case -NFS4ERR_DELAY: + rpc_delay(task, 2 * HZ); + return 0; case -EBADHANDLE: case -NFS4ERR_BAD_STATEID: /* @@ -3982,7 +3985,7 @@ static int nfsd4_cb_recall_done(struct nfsd4_callback *cb, } /*FALLTHRU*/ default: - return -1; + return 1; } }