diff --git a/net/rds/connection.c b/net/rds/connection.c index 49adeef8090c..d4564036a339 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -128,10 +128,7 @@ static struct rds_connection *__rds_conn_create(struct net *net, struct rds_transport *loop_trans; unsigned long flags; int ret; - struct rds_transport *otrans = trans; - if (!is_outgoing && otrans->t_type == RDS_TRANS_TCP) - goto new_conn; rcu_read_lock(); conn = rds_conn_lookup(net, head, laddr, faddr, trans); if (conn && conn->c_loopback && conn->c_trans != &rds_loop_transport && @@ -147,7 +144,6 @@ static struct rds_connection *__rds_conn_create(struct net *net, if (conn) goto out; -new_conn: conn = kmem_cache_zalloc(rds_conn_slab, gfp); if (!conn) { conn = ERR_PTR(-ENOMEM); @@ -207,6 +203,7 @@ new_conn: atomic_set(&conn->c_state, RDS_CONN_DOWN); conn->c_send_gen = 0; + conn->c_outgoing = (is_outgoing ? 1 : 0); conn->c_reconnect_jiffies = 0; INIT_DELAYED_WORK(&conn->c_send_w, rds_send_worker); INIT_DELAYED_WORK(&conn->c_recv_w, rds_recv_worker); @@ -243,22 +240,13 @@ new_conn: /* Creating normal conn */ struct rds_connection *found; - if (!is_outgoing && otrans->t_type == RDS_TRANS_TCP) - found = NULL; - else - found = rds_conn_lookup(net, head, laddr, faddr, trans); + found = rds_conn_lookup(net, head, laddr, faddr, trans); if (found) { trans->conn_free(conn->c_transport_data); kmem_cache_free(rds_conn_slab, conn); conn = found; } else { - if ((is_outgoing && otrans->t_type == RDS_TRANS_TCP) || - (otrans->t_type != RDS_TRANS_TCP)) { - /* Only the active side should be added to - * reconnect list for TCP. - */ - hlist_add_head_rcu(&conn->c_hash_node, head); - } + hlist_add_head_rcu(&conn->c_hash_node, head); rds_cong_add_conn(conn); rds_conn_count++; } @@ -337,7 +325,9 @@ void rds_conn_shutdown(struct rds_connection *conn) rcu_read_lock(); if (!hlist_unhashed(&conn->c_hash_node)) { rcu_read_unlock(); - rds_queue_reconnect(conn); + if (conn->c_trans->t_type != RDS_TRANS_TCP || + conn->c_outgoing == 1) + rds_queue_reconnect(conn); } else { rcu_read_unlock(); } diff --git a/net/rds/rds.h b/net/rds/rds.h index afb4048d0cfd..b4c7ac021d5b 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -86,7 +86,9 @@ struct rds_connection { struct hlist_node c_hash_node; __be32 c_laddr; __be32 c_faddr; - unsigned int c_loopback:1; + unsigned int c_loopback:1, + c_outgoing:1, + c_pad_to_32:30; struct rds_connection *c_passive; struct rds_cong_map *c_lcong; diff --git a/net/rds/tcp.c b/net/rds/tcp.c index c42b60bf4c68..9d6ddbacd875 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -67,21 +67,13 @@ void rds_tcp_nonagle(struct socket *sock) set_fs(oldfs); } +/* All module specific customizations to the RDS-TCP socket should be done in + * rds_tcp_tune() and applied after socket creation. In general these + * customizations should be tunable via module_param() + */ void rds_tcp_tune(struct socket *sock) { - struct sock *sk = sock->sk; - rds_tcp_nonagle(sock); - - /* - * We're trying to saturate gigabit with the default, - * see svc_sock_setbufsize(). - */ - lock_sock(sk); - sk->sk_sndbuf = RDS_TCP_DEFAULT_BUFSIZE; - sk->sk_rcvbuf = RDS_TCP_DEFAULT_BUFSIZE; - sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK; - release_sock(sk); } u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc) diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 444d78d0bd77..1d90240e5d82 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -110,28 +110,24 @@ int rds_tcp_accept_one(struct socket *sock) goto out; } /* An incoming SYN request came in, and TCP just accepted it. - * We always create a new conn for listen side of TCP, and do not - * add it to the c_hash_list. * * If the client reboots, this conn will need to be cleaned up. * rds_tcp_state_change() will do that cleanup */ rs_tcp = (struct rds_tcp_connection *)conn->c_transport_data; - WARN_ON(!rs_tcp || rs_tcp->t_sock); + if (rs_tcp->t_sock && + ntohl(inet->inet_saddr) < ntohl(inet->inet_daddr)) { + struct sock *nsk = new_sock->sk; - /* - * see the comment above rds_queue_delayed_reconnect() - */ - if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) { - if (rds_conn_state(conn) == RDS_CONN_UP) - rds_tcp_stats_inc(s_tcp_listen_closed_stale); - else - rds_tcp_stats_inc(s_tcp_connect_raced); - rds_conn_drop(conn); + nsk->sk_user_data = NULL; + nsk->sk_prot->disconnect(nsk, 0); + tcp_done(nsk); + new_sock = NULL; ret = 0; goto out; } + rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING); rds_tcp_set_callbacks(new_sock, conn); rds_connect_complete(conn); new_sock = NULL; diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index 53b17ca0dff5..2894e6095e3b 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -83,6 +83,7 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, struct rds_tcp_connection *tc = conn->c_transport_data; int done = 0; int ret = 0; + int more; if (hdr_off == 0) { /* @@ -116,12 +117,15 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, goto out; } + more = rm->data.op_nents > 1 ? (MSG_MORE | MSG_SENDPAGE_NOTLAST) : 0; while (sg < rm->data.op_nents) { + int flags = MSG_DONTWAIT | MSG_NOSIGNAL | more; + ret = tc->t_sock->ops->sendpage(tc->t_sock, sg_page(&rm->data.op_sg[sg]), rm->data.op_sg[sg].offset + off, rm->data.op_sg[sg].length - off, - MSG_DONTWAIT|MSG_NOSIGNAL); + flags); rdsdebug("tcp sendpage %p:%u:%u ret %d\n", (void *)sg_page(&rm->data.op_sg[sg]), rm->data.op_sg[sg].offset + off, rm->data.op_sg[sg].length - off, ret); @@ -134,6 +138,8 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, off = 0; sg++; } + if (sg == rm->data.op_nents - 1) + more = 0; } out: