From 0cc11a61b80a1ab1d12f1597b27b8b45ef8bac4a Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 20 Oct 2016 09:34:31 -0400 Subject: [PATCH 1/5] nfsd: move blocked lock handling under a dedicated spinlock Bruce was hitting some lockdep warnings in testing, showing that we could hit a deadlock with the new CB_NOTIFY_LOCK handling, involving a rather complex situation involving four different spinlocks. The crux of the matter is that we end up taking the nn->client_lock in the lm_notify handler. The simplest fix is to just declare a new per-nfsd_net spinlock to protect the new CB_NOTIFY_LOCK structures. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/netns.h | 5 +++++ fs/nfsd/nfs4state.c | 28 +++++++++++++++------------- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index b10d557f9c9e..ee36efd5aece 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -84,6 +84,8 @@ struct nfsd_net { struct list_head client_lru; struct list_head close_lru; struct list_head del_recall_lru; + + /* protected by blocked_locks_lock */ struct list_head blocked_locks_lru; struct delayed_work laundromat_work; @@ -91,6 +93,9 @@ struct nfsd_net { /* client_lock protects the client lru list and session hash table */ spinlock_t client_lock; + /* protects blocked_locks_lru */ + spinlock_t blocked_locks_lock; + struct file *rec_file; bool in_grace; const struct nfsd4_client_tracking_ops *client_tracking_ops; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 9752beb78659..95474196a17e 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -217,7 +217,7 @@ find_blocked_lock(struct nfs4_lockowner *lo, struct knfsd_fh *fh, { struct nfsd4_blocked_lock *cur, *found = NULL; - spin_lock(&nn->client_lock); + spin_lock(&nn->blocked_locks_lock); list_for_each_entry(cur, &lo->lo_blocked, nbl_list) { if (fh_match(fh, &cur->nbl_fh)) { list_del_init(&cur->nbl_list); @@ -226,7 +226,7 @@ find_blocked_lock(struct nfs4_lockowner *lo, struct knfsd_fh *fh, break; } } - spin_unlock(&nn->client_lock); + spin_unlock(&nn->blocked_locks_lock); if (found) posix_unblock_lock(&found->nbl_lock); return found; @@ -4665,7 +4665,7 @@ nfs4_laundromat(struct nfsd_net *nn) * indefinitely once the lock does become free. */ BUG_ON(!list_empty(&reaplist)); - spin_lock(&nn->client_lock); + spin_lock(&nn->blocked_locks_lock); while (!list_empty(&nn->blocked_locks_lru)) { nbl = list_first_entry(&nn->blocked_locks_lru, struct nfsd4_blocked_lock, nbl_lru); @@ -4678,7 +4678,7 @@ nfs4_laundromat(struct nfsd_net *nn) list_move(&nbl->nbl_lru, &reaplist); list_del_init(&nbl->nbl_list); } - spin_unlock(&nn->client_lock); + spin_unlock(&nn->blocked_locks_lock); while (!list_empty(&reaplist)) { nbl = list_first_entry(&nn->blocked_locks_lru, @@ -5439,13 +5439,13 @@ nfsd4_lm_notify(struct file_lock *fl) bool queue = false; /* An empty list means that something else is going to be using it */ - spin_lock(&nn->client_lock); + spin_lock(&nn->blocked_locks_lock); if (!list_empty(&nbl->nbl_list)) { list_del_init(&nbl->nbl_list); list_del_init(&nbl->nbl_lru); queue = true; } - spin_unlock(&nn->client_lock); + spin_unlock(&nn->blocked_locks_lock); if (queue) nfsd4_run_cb(&nbl->nbl_cb); @@ -5868,10 +5868,10 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (fl_flags & FL_SLEEP) { nbl->nbl_time = jiffies; - spin_lock(&nn->client_lock); + spin_lock(&nn->blocked_locks_lock); list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked); list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru); - spin_unlock(&nn->client_lock); + spin_unlock(&nn->blocked_locks_lock); } err = vfs_lock_file(filp, F_SETLK, file_lock, conflock); @@ -5900,10 +5900,10 @@ out: if (nbl) { /* dequeue it if we queued it before */ if (fl_flags & FL_SLEEP) { - spin_lock(&nn->client_lock); + spin_lock(&nn->blocked_locks_lock); list_del_init(&nbl->nbl_list); list_del_init(&nbl->nbl_lru); - spin_unlock(&nn->client_lock); + spin_unlock(&nn->blocked_locks_lock); } free_blocked_lock(nbl); } @@ -6943,9 +6943,11 @@ static int nfs4_state_create_net(struct net *net) INIT_LIST_HEAD(&nn->client_lru); INIT_LIST_HEAD(&nn->close_lru); INIT_LIST_HEAD(&nn->del_recall_lru); - INIT_LIST_HEAD(&nn->blocked_locks_lru); spin_lock_init(&nn->client_lock); + spin_lock_init(&nn->blocked_locks_lock); + INIT_LIST_HEAD(&nn->blocked_locks_lru); + INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main); get_net(net); @@ -7063,14 +7065,14 @@ nfs4_state_shutdown_net(struct net *net) } BUG_ON(!list_empty(&reaplist)); - spin_lock(&nn->client_lock); + spin_lock(&nn->blocked_locks_lock); while (!list_empty(&nn->blocked_locks_lru)) { nbl = list_first_entry(&nn->blocked_locks_lru, struct nfsd4_blocked_lock, nbl_lru); list_move(&nbl->nbl_lru, &reaplist); list_del_init(&nbl->nbl_list); } - spin_unlock(&nn->client_lock); + spin_unlock(&nn->blocked_locks_lock); while (!list_empty(&reaplist)) { nbl = list_first_entry(&nn->blocked_locks_lru, From 2876a34466ce382a76b9ffb34757bb48928ac743 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 18 Oct 2016 16:30:09 -0400 Subject: [PATCH 2/5] sunrpc: don't pass on-stack memory to sg_set_buf As of ac4e97abce9b "scatterlist: sg_set_buf() argument must be in linear mapping", sg_set_buf hits a BUG when make_checksum_v2->xdr_process_buf, among other callers, passes it memory on the stack. We only need a scatterlist to pass this to the crypto code, and it seems like overkill to require kmalloc'd memory just to encrypt a few bytes, but for now this seems the best fix. Many of these callers are in the NFS write paths, so we allocate with GFP_NOFS. It might be possible to do without allocations here entirely, but that would probably be a bigger project. Cc: Rusty Russell Signed-off-by: J. Bruce Fields --- net/sunrpc/auth_gss/auth_gss.c | 13 +++-- net/sunrpc/auth_gss/gss_krb5_crypto.c | 84 ++++++++++++++++----------- net/sunrpc/auth_gss/svcauth_gss.c | 21 ++++--- 3 files changed, 72 insertions(+), 46 deletions(-) diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index d8bd97a5a7c9..3dfd769dc5b5 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1616,7 +1616,7 @@ gss_validate(struct rpc_task *task, __be32 *p) { struct rpc_cred *cred = task->tk_rqstp->rq_cred; struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); - __be32 seq; + __be32 *seq = NULL; struct kvec iov; struct xdr_buf verf_buf; struct xdr_netobj mic; @@ -1631,9 +1631,12 @@ gss_validate(struct rpc_task *task, __be32 *p) goto out_bad; if (flav != RPC_AUTH_GSS) goto out_bad; - seq = htonl(task->tk_rqstp->rq_seqno); - iov.iov_base = &seq; - iov.iov_len = sizeof(seq); + seq = kmalloc(4, GFP_NOFS); + if (!seq) + goto out_bad; + *seq = htonl(task->tk_rqstp->rq_seqno); + iov.iov_base = seq; + iov.iov_len = 4; xdr_buf_from_iov(&iov, &verf_buf); mic.data = (u8 *)p; mic.len = len; @@ -1653,11 +1656,13 @@ gss_validate(struct rpc_task *task, __be32 *p) gss_put_ctx(ctx); dprintk("RPC: %5u %s: gss_verify_mic succeeded.\n", task->tk_pid, __func__); + kfree(seq); return p + XDR_QUADLEN(len); out_bad: gss_put_ctx(ctx); dprintk("RPC: %5u %s failed ret %ld.\n", task->tk_pid, __func__, PTR_ERR(ret)); + kfree(seq); return ret; } diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 244245bcbbd2..90115ceefd49 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -166,8 +166,8 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen, unsigned int usage, struct xdr_netobj *cksumout) { struct scatterlist sg[1]; - int err; - u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN]; + int err = -1; + u8 *checksumdata; u8 rc4salt[4]; struct crypto_ahash *md5; struct crypto_ahash *hmac_md5; @@ -187,23 +187,22 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen, return GSS_S_FAILURE; } + checksumdata = kmalloc(GSS_KRB5_MAX_CKSUM_LEN, GFP_NOFS); + if (!checksumdata) + return GSS_S_FAILURE; + md5 = crypto_alloc_ahash("md5", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(md5)) - return GSS_S_FAILURE; + goto out_free_cksum; hmac_md5 = crypto_alloc_ahash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(hmac_md5)) { - crypto_free_ahash(md5); - return GSS_S_FAILURE; - } + if (IS_ERR(hmac_md5)) + goto out_free_md5; req = ahash_request_alloc(md5, GFP_KERNEL); - if (!req) { - crypto_free_ahash(hmac_md5); - crypto_free_ahash(md5); - return GSS_S_FAILURE; - } + if (!req) + goto out_free_hmac_md5; ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL); @@ -232,11 +231,8 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen, ahash_request_free(req); req = ahash_request_alloc(hmac_md5, GFP_KERNEL); - if (!req) { - crypto_free_ahash(hmac_md5); - crypto_free_ahash(md5); - return GSS_S_FAILURE; - } + if (!req) + goto out_free_hmac_md5; ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL); @@ -258,8 +254,12 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen, cksumout->len = kctx->gk5e->cksumlength; out: ahash_request_free(req); - crypto_free_ahash(md5); +out_free_hmac_md5: crypto_free_ahash(hmac_md5); +out_free_md5: + crypto_free_ahash(md5); +out_free_cksum: + kfree(checksumdata); return err ? GSS_S_FAILURE : 0; } @@ -276,8 +276,8 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen, struct crypto_ahash *tfm; struct ahash_request *req; struct scatterlist sg[1]; - int err; - u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN]; + int err = -1; + u8 *checksumdata; unsigned int checksumlen; if (kctx->gk5e->ctype == CKSUMTYPE_HMAC_MD5_ARCFOUR) @@ -291,15 +291,17 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen, return GSS_S_FAILURE; } - tfm = crypto_alloc_ahash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(tfm)) + checksumdata = kmalloc(GSS_KRB5_MAX_CKSUM_LEN, GFP_NOFS); + if (checksumdata == NULL) return GSS_S_FAILURE; + tfm = crypto_alloc_ahash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) + goto out_free_cksum; + req = ahash_request_alloc(tfm, GFP_KERNEL); - if (!req) { - crypto_free_ahash(tfm); - return GSS_S_FAILURE; - } + if (!req) + goto out_free_ahash; ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL); @@ -349,7 +351,10 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen, cksumout->len = kctx->gk5e->cksumlength; out: ahash_request_free(req); +out_free_ahash: crypto_free_ahash(tfm); +out_free_cksum: + kfree(checksumdata); return err ? GSS_S_FAILURE : 0; } @@ -368,8 +373,8 @@ make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen, struct crypto_ahash *tfm; struct ahash_request *req; struct scatterlist sg[1]; - int err; - u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN]; + int err = -1; + u8 *checksumdata; unsigned int checksumlen; if (kctx->gk5e->keyed_cksum == 0) { @@ -383,16 +388,18 @@ make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen, return GSS_S_FAILURE; } + checksumdata = kmalloc(GSS_KRB5_MAX_CKSUM_LEN, GFP_NOFS); + if (!checksumdata) + return GSS_S_FAILURE; + tfm = crypto_alloc_ahash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm)) - return GSS_S_FAILURE; + goto out_free_cksum; checksumlen = crypto_ahash_digestsize(tfm); req = ahash_request_alloc(tfm, GFP_KERNEL); - if (!req) { - crypto_free_ahash(tfm); - return GSS_S_FAILURE; - } + if (!req) + goto out_free_ahash; ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL); @@ -433,7 +440,10 @@ make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen, } out: ahash_request_free(req); +out_free_ahash: crypto_free_ahash(tfm); +out_free_cksum: + kfree(checksumdata); return err ? GSS_S_FAILURE : 0; } @@ -666,14 +676,17 @@ gss_krb5_cts_crypt(struct crypto_skcipher *cipher, struct xdr_buf *buf, u32 ret; struct scatterlist sg[1]; SKCIPHER_REQUEST_ON_STACK(req, cipher); - u8 data[GSS_KRB5_MAX_BLOCKSIZE * 2]; + u8 *data; struct page **save_pages; u32 len = buf->len - offset; - if (len > ARRAY_SIZE(data)) { + if (len > GSS_KRB5_MAX_BLOCKSIZE * 2) { WARN_ON(0); return -ENOMEM; } + data = kmalloc(GSS_KRB5_MAX_BLOCKSIZE * 2, GFP_NOFS); + if (!data) + return -ENOMEM; /* * For encryption, we want to read from the cleartext @@ -708,6 +721,7 @@ gss_krb5_cts_crypt(struct crypto_skcipher *cipher, struct xdr_buf *buf, ret = write_bytes_to_xdr_buf(buf, offset, data, len); out: + kfree(data); return ret; } diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index d67f7e1bc82d..45662d7f0943 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -718,30 +718,37 @@ gss_write_null_verf(struct svc_rqst *rqstp) static int gss_write_verf(struct svc_rqst *rqstp, struct gss_ctx *ctx_id, u32 seq) { - __be32 xdr_seq; + __be32 *xdr_seq; u32 maj_stat; struct xdr_buf verf_data; struct xdr_netobj mic; __be32 *p; struct kvec iov; + int err = -1; svc_putnl(rqstp->rq_res.head, RPC_AUTH_GSS); - xdr_seq = htonl(seq); + xdr_seq = kmalloc(4, GFP_KERNEL); + if (!xdr_seq) + return -1; + *xdr_seq = htonl(seq); - iov.iov_base = &xdr_seq; - iov.iov_len = sizeof(xdr_seq); + iov.iov_base = xdr_seq; + iov.iov_len = 4; xdr_buf_from_iov(&iov, &verf_data); p = rqstp->rq_res.head->iov_base + rqstp->rq_res.head->iov_len; mic.data = (u8 *)(p + 1); maj_stat = gss_get_mic(ctx_id, &verf_data, &mic); if (maj_stat != GSS_S_COMPLETE) - return -1; + goto out; *p++ = htonl(mic.len); memset((u8 *)p + mic.len, 0, round_up_to_quad(mic.len) - mic.len); p += XDR_QUADLEN(mic.len); if (!xdr_ressize_check(rqstp, p)) - return -1; - return 0; + goto out; + err = 0; +out: + kfree(xdr_seq); + return err; } struct gss_domain { From 18e601d6adae5042f82d105ccd3d4498050f2ebf Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 24 Oct 2016 20:33:23 -0400 Subject: [PATCH 3/5] sunrpc: fix some missing rq_rbuffer assignments We've been seeing some crashes in testing that look like this: BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] memcpy_orig+0x29/0x110 PGD 212ca2067 PUD 212ca3067 PMD 0 Oops: 0002 [#1] SMP Modules linked in: rpcsec_gss_krb5 nfsv4 dns_resolver nfs fscache ppdev parport_pc i2c_piix4 sg parport i2c_core virtio_balloon pcspkr acpi_cpufreq nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c sd_mod ata_generic pata_acpi virtio_scsi 8139too ata_piix libata 8139cp mii virtio_pci floppy virtio_ring serio_raw virtio CPU: 1 PID: 1540 Comm: nfsd Not tainted 4.9.0-rc1 #39 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2007 task: ffff88020d7ed200 task.stack: ffff880211838000 RIP: 0010:[] [] memcpy_orig+0x29/0x110 RSP: 0018:ffff88021183bdd0 EFLAGS: 00010206 RAX: 0000000000000000 RBX: ffff88020d7fa000 RCX: 000000f400000000 RDX: 0000000000000014 RSI: ffff880212927020 RDI: 0000000000000000 RBP: ffff88021183be30 R08: 01000000ef896996 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff880211704ca8 R13: ffff88021473f000 R14: 00000000ef896996 R15: ffff880211704800 FS: 0000000000000000(0000) GS:ffff88021fc80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 0000000212ca1000 CR4: 00000000000006e0 Stack: ffffffffa01ea087 ffffffff63400001 ffff880215145e00 ffff880211bacd00 ffff88021473f2b8 0000000000000004 00000000d0679d67 ffff880211bacd00 ffff88020d7fa000 ffff88021473f000 0000000000000000 ffff88020d7faa30 Call Trace: [] ? svc_tcp_recvfrom+0x5a7/0x790 [sunrpc] [] svc_recv+0xad8/0xbd0 [sunrpc] [] nfsd+0xde/0x160 [nfsd] [] ? nfsd_destroy+0x60/0x60 [nfsd] [] kthread+0xd8/0xf0 [] ret_from_fork+0x1f/0x40 [] ? kthread_park+0x60/0x60 Code: 00 00 48 89 f8 48 83 fa 20 72 7e 40 38 fe 7c 35 48 83 ea 20 48 83 ea 20 4c 8b 06 4c 8b 4e 08 4c 8b 56 10 4c 8b 5e 18 48 8d 76 20 <4c> 89 07 4c 89 4f 08 4c 89 57 10 4c 89 5f 18 48 8d 7f 20 73 d4 RIP [] memcpy_orig+0x29/0x110 RSP CR2: 0000000000000000 Both Bruce and Eryu ran a bisect here and found that the problematic patch was 68778945e46 (SUNRPC: Separate buffer pointers for RPC Call and Reply messages). That patch changed rpc_xdr_encode to use a new rq_rbuffer pointer to set up the receive buffer, but didn't change all of the necessary codepaths to set it properly. In particular the backchannel setup was missing. We need to set rq_rbuffer whenever rq_buffer is set. Ensure that it is. Reviewed-by: Chuck Lever Tested-by: Chuck Lever Reported-by: Eryu Guan Tested-by: Eryu Guan Fixes: 68778945e46 "SUNRPC: Separate buffer pointers..." Reported-by: J. Bruce Fields Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 1 + net/sunrpc/xprtsock.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index 2d8545c34095..fc4535ead7c2 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -182,6 +182,7 @@ xprt_rdma_bc_allocate(struct rpc_task *task) return -ENOMEM; rqst->rq_buffer = page_address(page); + rqst->rq_rbuffer = (char *)rqst->rq_buffer + rqst->rq_callsize; return 0; } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 0137af1c0916..e01c825bc683 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2563,6 +2563,7 @@ static int bc_malloc(struct rpc_task *task) buf->len = PAGE_SIZE; rqst->rq_buffer = buf->data; + rqst->rq_rbuffer = (char *)rqst->rq_buffer + rqst->rq_callsize; return 0; } From 8d42629be0cb6c58a5c77cd02e47974c0e724e63 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 28 Oct 2016 22:22:33 -0400 Subject: [PATCH 4/5] svcrdma: backchannel cannot share a page for send and rcv buffers The underlying transport releases the page pointed to by rq_buffer during xprt_rdma_bc_send_request. When the backchannel reply arrives, rq_rbuffer then points to freed memory. Fixes: 68778945e46f ('SUNRPC: Separate buffer pointers for RPC ...') Signed-off-by: Chuck Lever Cc: Jeff Layton Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index fc4535ead7c2..20027f8de129 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -177,19 +177,26 @@ xprt_rdma_bc_allocate(struct rpc_task *task) return -EINVAL; } + /* svc_rdma_sendto releases this page */ page = alloc_page(RPCRDMA_DEF_GFP); if (!page) return -ENOMEM; - rqst->rq_buffer = page_address(page); - rqst->rq_rbuffer = (char *)rqst->rq_buffer + rqst->rq_callsize; + + rqst->rq_rbuffer = kmalloc(rqst->rq_rcvsize, RPCRDMA_DEF_GFP); + if (!rqst->rq_rbuffer) { + put_page(page); + return -ENOMEM; + } return 0; } static void xprt_rdma_bc_free(struct rpc_task *task) { - /* No-op: ctxt and page have already been freed. */ + struct rpc_rqst *rqst = task->tk_rqstp; + + kfree(rqst->rq_rbuffer); } static int From f46c445b79906a9da55c13e0a6f6b6a006b892fe Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 29 Oct 2016 18:19:03 -0400 Subject: [PATCH 5/5] nfsd: Fix general protection fault in release_lock_stateid() When I push NFSv4.1 / RDMA hard, (xfstests generic/089, for example), I get this crash on the server: Oct 28 22:04:30 klimt kernel: general protection fault: 0000 [#1] SMP DEBUG_PAGEALLOC Oct 28 22:04:30 klimt kernel: Modules linked in: cts rpcsec_gss_krb5 iTCO_wdt iTCO_vendor_support sb_edac edac_core x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm btrfs irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd xor pcspkr raid6_pq i2c_i801 i2c_smbus lpc_ich mfd_core sg mei_me mei ioatdma shpchp wmi ipmi_si ipmi_msghandler rpcrdma ib_ipoib rdma_ucm acpi_power_meter acpi_pad ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c mlx4_ib mlx4_en ib_core sr_mod cdrom sd_mod ast drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm crc32c_intel igb ahci libahci ptp mlx4_core pps_core dca libata i2c_algo_bit i2c_core dm_mirror dm_region_hash dm_log dm_mod Oct 28 22:04:30 klimt kernel: CPU: 7 PID: 1558 Comm: nfsd Not tainted 4.9.0-rc2-00005-g82cd754 #8 Oct 28 22:04:30 klimt kernel: Hardware name: Supermicro Super Server/X10SRL-F, BIOS 1.0c 09/09/2015 Oct 28 22:04:30 klimt kernel: task: ffff880835c3a100 task.stack: ffff8808420d8000 Oct 28 22:04:30 klimt kernel: RIP: 0010:[] [] release_lock_stateid+0x1f/0x60 [nfsd] Oct 28 22:04:30 klimt kernel: RSP: 0018:ffff8808420dbce0 EFLAGS: 00010246 Oct 28 22:04:30 klimt kernel: RAX: ffff88084e6660f0 RBX: ffff88084e667020 RCX: 0000000000000000 Oct 28 22:04:30 klimt kernel: RDX: 0000000000000007 RSI: 0000000000000000 RDI: ffff88084e667020 Oct 28 22:04:30 klimt kernel: RBP: ffff8808420dbcf8 R08: 0000000000000001 R09: 0000000000000000 Oct 28 22:04:30 klimt kernel: R10: ffff880835c3a100 R11: ffff880835c3aca8 R12: 6b6b6b6b6b6b6b6b Oct 28 22:04:30 klimt kernel: R13: ffff88084e6670d8 R14: ffff880835f546f0 R15: ffff880835f1c548 Oct 28 22:04:30 klimt kernel: FS: 0000000000000000(0000) GS:ffff88087bdc0000(0000) knlGS:0000000000000000 Oct 28 22:04:30 klimt kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 Oct 28 22:04:30 klimt kernel: CR2: 00007ff020389000 CR3: 0000000001c06000 CR4: 00000000001406e0 Oct 28 22:04:30 klimt kernel: Stack: Oct 28 22:04:30 klimt kernel: ffff88084e667020 0000000000000000 ffff88084e6670d8 ffff8808420dbd20 Oct 28 22:04:30 klimt kernel: ffffffffa05ac80d ffff880835f54548 ffff88084e640008 ffff880835f545b0 Oct 28 22:04:30 klimt kernel: ffff8808420dbd70 ffffffffa059803d ffff880835f1c768 0000000000000870 Oct 28 22:04:30 klimt kernel: Call Trace: Oct 28 22:04:30 klimt kernel: [] nfsd4_free_stateid+0xfd/0x1b0 [nfsd] Oct 28 22:04:30 klimt kernel: [] nfsd4_proc_compound+0x40d/0x690 [nfsd] Oct 28 22:04:30 klimt kernel: [] nfsd_dispatch+0xd4/0x1d0 [nfsd] Oct 28 22:04:30 klimt kernel: [] svc_process_common+0x3d9/0x700 [sunrpc] Oct 28 22:04:30 klimt kernel: [] svc_process+0xf4/0x330 [sunrpc] Oct 28 22:04:30 klimt kernel: [] nfsd+0xfa/0x160 [nfsd] Oct 28 22:04:30 klimt kernel: [] ? nfsd_destroy+0x170/0x170 [nfsd] Oct 28 22:04:30 klimt kernel: [] kthread+0x10b/0x120 Oct 28 22:04:30 klimt kernel: [] ? kthread_stop+0x280/0x280 Oct 28 22:04:30 klimt kernel: [] ret_from_fork+0x2a/0x40 Oct 28 22:04:30 klimt kernel: Code: c3 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 55 48 89 e5 41 55 41 54 53 48 8b 87 b0 00 00 00 48 89 fb 4c 8b a0 98 00 00 00 <49> 8b 44 24 20 48 8d b8 80 03 00 00 e8 10 66 1a e1 48 89 df e8 Oct 28 22:04:30 klimt kernel: RIP [] release_lock_stateid+0x1f/0x60 [nfsd] Oct 28 22:04:30 klimt kernel: RSP Oct 28 22:04:30 klimt kernel: ---[ end trace cf5d0b371973e167 ]--- Jeff Layton says: > Hm...now that I look though, this is a little suspicious: > > struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner); > > I wonder if it's possible for the openstateid to have already been > destroyed at this point. > > We might be better off doing something like this to get the client pointer: > > stp->st_stid.sc_client; > > ...which should be more direct and less dependent on other stateids > staying valid. With the suggested change, I am no longer able to reproduce the above oops. v2: Fix unhash_lock_stateid() as well Fix-suggested-by: Jeff Layton Fixes: 42691398be08 ('nfsd: Fix race between FREE_STATEID and LOCK') Signed-off-by: Chuck Lever Reviewed-by: Jeff Layton Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 95474196a17e..4b4beaaa4eaa 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1227,9 +1227,7 @@ static void put_ol_stateid_locked(struct nfs4_ol_stateid *stp, static bool unhash_lock_stateid(struct nfs4_ol_stateid *stp) { - struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner); - - lockdep_assert_held(&oo->oo_owner.so_client->cl_lock); + lockdep_assert_held(&stp->st_stid.sc_client->cl_lock); list_del_init(&stp->st_locks); nfs4_unhash_stid(&stp->st_stid); @@ -1238,12 +1236,12 @@ static bool unhash_lock_stateid(struct nfs4_ol_stateid *stp) static void release_lock_stateid(struct nfs4_ol_stateid *stp) { - struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner); + struct nfs4_client *clp = stp->st_stid.sc_client; bool unhashed; - spin_lock(&oo->oo_owner.so_client->cl_lock); + spin_lock(&clp->cl_lock); unhashed = unhash_lock_stateid(stp); - spin_unlock(&oo->oo_owner.so_client->cl_lock); + spin_unlock(&clp->cl_lock); if (unhashed) nfs4_put_stid(&stp->st_stid); }