1
0
Fork 0

xprtrdma: Connection becomes unstable after a reconnect

commit a31b2f9392 upstream.

This is because xprt_request_get_cong() is allowing more than one
RPC Call to be transmitted before the first Receive on the new
connection. The first Receive fills the Receive Queue based on the
server's credit grant. Before that Receive, there is only a single
Receive WR posted because the client doesn't know the server's
credit grant.

Solution is to clear rq_cong on all outstanding rpc_rqsts when the
the cwnd is reset. This is because an RPC/RDMA credit is good for
one connection instance only.

Fixes: 75891f502f ("SUNRPC: Support for congestion control ... ")
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
5.4-rM2-2.2.x-imx-squashed
Chuck Lever 2019-10-09 13:07:27 -04:00 committed by Greg Kroah-Hartman
parent ee978cecd8
commit b2b36f91af
2 changed files with 25 additions and 0 deletions

View File

@ -428,8 +428,11 @@ void xprt_rdma_close(struct rpc_xprt *xprt)
/* Prepare @xprt for the next connection by reinitializing
* its credit grant to one (see RFC 8166, Section 3.3.3).
*/
spin_lock(&xprt->transport_lock);
r_xprt->rx_buf.rb_credits = 1;
xprt->cong = 0;
xprt->cwnd = RPC_CWNDSHIFT;
spin_unlock(&xprt->transport_lock);
out:
xprt->reestablish_timeout = 0;

View File

@ -75,6 +75,7 @@
* internal functions
*/
static void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc);
static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_reps_destroy(struct rpcrdma_buffer *buf);
static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf);
@ -780,6 +781,7 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
trace_xprtrdma_disconnect(r_xprt, rc);
rpcrdma_xprt_drain(r_xprt);
rpcrdma_reqs_reset(r_xprt);
}
/* Fixed-size circular FIFO queue. This implementation is wait-free and
@ -1042,6 +1044,26 @@ out1:
return NULL;
}
/**
* rpcrdma_reqs_reset - Reset all reqs owned by a transport
* @r_xprt: controlling transport instance
*
* ASSUMPTION: the rb_allreqs list is stable for the duration,
* and thus can be walked without holding rb_lock. Eg. the
* caller is holding the transport send lock to exclude
* device removal or disconnection.
*/
static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_req *req;
list_for_each_entry(req, &buf->rb_allreqs, rl_all) {
/* Credits are valid only for one connection */
req->rl_slot.rq_cong = 0;
}
}
static struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
bool temp)
{