From 8961749e5f498d91ded20dc797bb77aa366bca2e Mon Sep 17 00:00:00 2001 From: Tim Smith Date: Fri, 7 Feb 2014 18:58:43 +0000 Subject: [PATCH 1/8] af_rxrpc: Remove incorrect checksum calculation from rxrpc_recvmsg() The UDP checksum was already verified in rxrpc_data_ready() - which calls skb_checksum_complete() - as the RxRPC packet header contains no checksum of its own. Subsequent calls to skb_copy_and_csum_datagram_iovec() are thus redundant and are, in any case, being passed only a subset of the UDP payload - so the checksum will always fail if that path is taken. So there is no need to check skb->ip_summed in rxrpc_recvmsg(), and no need for the csum_copy_error: exit path. Signed-off-by: Tim Smith Signed-off-by: David Howells --- net/rxrpc/ar-recvmsg.c | 25 +------------------------ 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c index 34b5490dde65..e9aaa65c0778 100644 --- a/net/rxrpc/ar-recvmsg.c +++ b/net/rxrpc/ar-recvmsg.c @@ -180,16 +180,7 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock, if (copy > len - copied) copy = len - copied; - if (skb->ip_summed == CHECKSUM_UNNECESSARY || - skb->ip_summed == CHECKSUM_PARTIAL) { - ret = skb_copy_datagram_iovec(skb, offset, - msg->msg_iov, copy); - } else { - ret = skb_copy_and_csum_datagram_iovec(skb, offset, - msg->msg_iov); - if (ret == -EINVAL) - goto csum_copy_error; - } + ret = skb_copy_datagram_iovec(skb, offset, msg->msg_iov, copy); if (ret < 0) goto copy_error; @@ -348,20 +339,6 @@ copy_error: _leave(" = %d", ret); return ret; -csum_copy_error: - _debug("csum error"); - release_sock(&rx->sk); - if (continue_call) - rxrpc_put_call(continue_call); - rxrpc_kill_skb(skb); - if (!(flags & MSG_PEEK)) { - if (skb_dequeue(&rx->sk.sk_receive_queue) != skb) - BUG(); - } - skb_kill_datagram(&rx->sk, skb, flags); - rxrpc_put_call(call); - return -EAGAIN; - wait_interrupted: ret = sock_intr_errno(timeo); wait_error: From b6f3a40cb70fa53a5160b8f061ff219b00992626 Mon Sep 17 00:00:00 2001 From: Tim Smith Date: Fri, 7 Feb 2014 18:58:43 +0000 Subject: [PATCH 2/8] af_rxrpc: Prevent RxRPC peers from ABORT-storming one another When an ABORT is sent, aborting a connection, the sender quite reasonably forgets about the connection. If another frame is received, another ABORT will be sent. When the receiver gets it, it no longer applies to an extant connection, so an ABORT is sent, and so on... Prevent this by never sending a rejection for an ABORT packet. Signed-off-by: Tim Smith Signed-off-by: David Howells --- net/rxrpc/ar-input.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c index 529572f18d1f..eb7e16276cc1 100644 --- a/net/rxrpc/ar-input.c +++ b/net/rxrpc/ar-input.c @@ -606,8 +606,10 @@ dead_call: } _debug("dead call"); - skb->priority = RX_CALL_DEAD; - rxrpc_reject_packet(conn->trans->local, skb); + if (sp->hdr.type != RXRPC_PACKET_TYPE_ABORT) { + skb->priority = RX_CALL_DEAD; + rxrpc_reject_packet(conn->trans->local, skb); + } goto done; /* resend last packet of a completed call @@ -790,8 +792,10 @@ cant_route_call: skb->priority = RX_CALL_DEAD; } - _debug("reject"); - rxrpc_reject_packet(local, skb); + if (sp->hdr.type != RXRPC_PACKET_TYPE_ABORT) { + _debug("reject type %d",sp->hdr.type); + rxrpc_reject_packet(local, skb); + } rxrpc_put_local(local); _leave(" [no call]"); return; From 6c9a2d3202973a0266beabc5274c3e67dad5db96 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Feb 2014 12:02:41 +0000 Subject: [PATCH 3/8] af_rxrpc: Fix UDP MTU calculation from ICMP_FRAG_NEEDED AF_RXRPC sends UDP packets with the "Don't Fragment" bit set in an attempt to determine the maximum packet size between the local socket and the peer by invoking the generation of ICMP_FRAG_NEEDED packets. Once a packet is sent with the "Don't Fragment" bit set, it is then inconvenient to break it up as that requires recalculating all the rxrpc serial and sequence numbers and reencrypting all the fragments, so we switch off the "Don't Fragment" service temporarily and send the bounced packet again. Future packets then use the new MTU. That's all fine. The problem lies in rxrpc_UDP_error_report() where the code that deals with ICMP_FRAG_NEEDED packets lives. Packets of this type have a field (ee_info) to indicate the maximum packet size at the reporting node - but sometimes ee_info isn't filled in and is just left as 0 and the code must allow for this. When ee_info is 0, the code should take the MTU size we're currently using and reduce it for the next packet we want to send. However, it takes ee_info (which is known to be 0) and tries to reduce that instead. This was discovered by Coverity. Reported-by: Dave Jones Signed-off-by: David Howells --- net/rxrpc/ar-error.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/rxrpc/ar-error.c b/net/rxrpc/ar-error.c index a9206087b4d7..db57458c824c 100644 --- a/net/rxrpc/ar-error.c +++ b/net/rxrpc/ar-error.c @@ -83,6 +83,7 @@ void rxrpc_UDP_error_report(struct sock *sk) if (mtu == 0) { /* they didn't give us a size, estimate one */ + mtu = peer->if_mtu; if (mtu > 1500) { mtu >>= 1; if (mtu < 1500) From 5873c0834f8896aa9da338b941035a2f8b29e99b Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 7 Feb 2014 18:58:44 +0000 Subject: [PATCH 4/8] af_rxrpc: Add sysctls for configuring RxRPC parameters Add sysctls for configuring RxRPC protocol handling, specifically controls on delays before ack generation, the delay before resending a packet, the maximum lifetime of a call and the expiration times of calls, connections and transports that haven't been recently used. More info added in Documentation/networking/rxrpc.txt. Signed-off-by: David Howells --- Documentation/networking/rxrpc.txt | 62 ++++++++++++++++ net/rxrpc/Makefile | 5 +- net/rxrpc/af_rxrpc.c | 9 +++ net/rxrpc/ar-ack.c | 35 +++++++-- net/rxrpc/ar-call.c | 18 +++-- net/rxrpc/ar-connection.c | 10 ++- net/rxrpc/ar-input.c | 2 - net/rxrpc/ar-internal.h | 24 +++++- net/rxrpc/ar-output.c | 7 +- net/rxrpc/ar-skbuff.c | 5 ++ net/rxrpc/ar-transport.c | 10 ++- net/rxrpc/sysctl.c | 113 +++++++++++++++++++++++++++++ 12 files changed, 273 insertions(+), 27 deletions(-) create mode 100644 net/rxrpc/sysctl.c diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt index b89bc82eed46..aa08d2625f05 100644 --- a/Documentation/networking/rxrpc.txt +++ b/Documentation/networking/rxrpc.txt @@ -27,6 +27,8 @@ Contents of this document: (*) AF_RXRPC kernel interface. + (*) Configurable parameters. + ======== OVERVIEW @@ -864,3 +866,63 @@ The kernel interface functions are as follows: This is used to allocate a null RxRPC key that can be used to indicate anonymous security for a particular domain. + + +======================= +CONFIGURABLE PARAMETERS +======================= + +The RxRPC protocol driver has a number of configurable parameters that can be +adjusted through sysctls in /proc/net/rxrpc/: + + (*) req_ack_delay + + The amount of time in milliseconds after receiving a packet with the + request-ack flag set before we honour the flag and actually send the + requested ack. + + Usually the other side won't stop sending packets until the advertised + reception window is full (to a maximum of 255 packets), so delaying the + ACK permits several packets to be ACK'd in one go. + + (*) soft_ack_delay + + The amount of time in milliseconds after receiving a new packet before we + generate a soft-ACK to tell the sender that it doesn't need to resend. + + (*) idle_ack_delay + + The amount of time in milliseconds after all the packets currently in the + received queue have been consumed before we generate a hard-ACK to tell + the sender it can free its buffers, assuming no other reason occurs that + we would send an ACK. + + (*) resend_timeout + + The amount of time in milliseconds after transmitting a packet before we + transmit it again, assuming no ACK is received from the receiver telling + us they got it. + + (*) max_call_lifetime + + The maximum amount of time in seconds that a call may be in progress + before we preemptively kill it. + + (*) dead_call_expiry + + The amount of time in seconds before we remove a dead call from the call + list. Dead calls are kept around for a little while for the purpose of + repeating ACK and ABORT packets. + + (*) connection_expiry + + The amount of time in seconds after a connection was last used before we + remove it from the connection list. Whilst a connection is in existence, + it serves as a placeholder for negotiated security; when it is deleted, + the security must be renegotiated. + + (*) transport_expiry + + The amount of time in seconds after a transport was last used before we + remove it from the transport list. Whilst a transport is in existence, it + serves to anchor the peer data and keeps the connection ID counter. diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile index d1c3429b69ed..ec126f91276b 100644 --- a/net/rxrpc/Makefile +++ b/net/rxrpc/Makefile @@ -20,9 +20,8 @@ af-rxrpc-y := \ ar-skbuff.o \ ar-transport.o -ifeq ($(CONFIG_PROC_FS),y) -af-rxrpc-y += ar-proc.o -endif +af-rxrpc-$(CONFIG_PROC_FS) += ar-proc.o +af-rxrpc-$(CONFIG_SYSCTL) += sysctl.o obj-$(CONFIG_AF_RXRPC) += af-rxrpc.o diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index e61aa6001c65..7b1670489638 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -838,6 +838,12 @@ static int __init af_rxrpc_init(void) goto error_key_type_s; } + ret = rxrpc_sysctl_init(); + if (ret < 0) { + printk(KERN_CRIT "RxRPC: Cannot register sysctls\n"); + goto error_sysctls; + } + #ifdef CONFIG_PROC_FS proc_create("rxrpc_calls", 0, init_net.proc_net, &rxrpc_call_seq_fops); proc_create("rxrpc_conns", 0, init_net.proc_net, @@ -845,6 +851,8 @@ static int __init af_rxrpc_init(void) #endif return 0; +error_sysctls: + unregister_key_type(&key_type_rxrpc_s); error_key_type_s: unregister_key_type(&key_type_rxrpc); error_key_type: @@ -865,6 +873,7 @@ error_call_jar: static void __exit af_rxrpc_exit(void) { _enter(""); + rxrpc_sysctl_exit(); unregister_key_type(&key_type_rxrpc_s); unregister_key_type(&key_type_rxrpc); sock_unregister(PF_RXRPC); diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c index cd97a0ce48d8..732b82f540c5 100644 --- a/net/rxrpc/ar-ack.c +++ b/net/rxrpc/ar-ack.c @@ -19,7 +19,29 @@ #include #include "ar-internal.h" -static unsigned int rxrpc_ack_defer = 1; +/* + * How long to wait before scheduling ACK generation after seeing a + * packet with RXRPC_REQUEST_ACK set (in jiffies). + */ +unsigned rxrpc_requested_ack_delay = 1; + +/* + * How long to wait before scheduling an ACK with subtype DELAY (in jiffies). + * + * We use this when we've received new data packets. If those packets aren't + * all consumed within this time we will send a DELAY ACK if an ACK was not + * requested to let the sender know it doesn't need to resend. + */ +unsigned rxrpc_soft_ack_delay = 1 * HZ; + +/* + * How long to wait before scheduling an ACK with subtype IDLE (in jiffies). + * + * We use this when we've consumed some previously soft-ACK'd packets when + * further packets aren't immediately received to decide when to send an IDLE + * ACK let the other end know that it can free up its Tx buffer space. + */ +unsigned rxrpc_idle_ack_delay = 1; static const char *rxrpc_acks(u8 reason) { @@ -82,24 +104,23 @@ void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, switch (ack_reason) { case RXRPC_ACK_DELAY: _debug("run delay timer"); - call->ack_timer.expires = jiffies + rxrpc_ack_timeout * HZ; - add_timer(&call->ack_timer); - return; + expiry = rxrpc_soft_ack_delay; + goto run_timer; case RXRPC_ACK_IDLE: if (!immediate) { _debug("run defer timer"); - expiry = 1; + expiry = rxrpc_idle_ack_delay; goto run_timer; } goto cancel_timer; case RXRPC_ACK_REQUESTED: - if (!rxrpc_ack_defer) + expiry = rxrpc_requested_ack_delay; + if (!expiry) goto cancel_timer; if (!immediate || serial == cpu_to_be32(1)) { _debug("run defer timer"); - expiry = rxrpc_ack_defer; goto run_timer; } diff --git a/net/rxrpc/ar-call.c b/net/rxrpc/ar-call.c index a3bbb360a3f9..1e0903a2a0db 100644 --- a/net/rxrpc/ar-call.c +++ b/net/rxrpc/ar-call.c @@ -16,6 +16,16 @@ #include #include "ar-internal.h" +/* + * Maximum lifetime of a call (in jiffies). + */ +unsigned rxrpc_max_call_lifetime = 60 * HZ; + +/* + * Time till dead call expires after last use (in jiffies). + */ +unsigned rxrpc_dead_call_expiry = 2 * HZ; + const char *const rxrpc_call_states[] = { [RXRPC_CALL_CLIENT_SEND_REQUEST] = "ClSndReq", [RXRPC_CALL_CLIENT_AWAIT_REPLY] = "ClAwtRpl", @@ -38,8 +48,6 @@ const char *const rxrpc_call_states[] = { struct kmem_cache *rxrpc_call_jar; LIST_HEAD(rxrpc_calls); DEFINE_RWLOCK(rxrpc_call_lock); -static unsigned int rxrpc_call_max_lifetime = 60; -static unsigned int rxrpc_dead_call_timeout = 2; static void rxrpc_destroy_call(struct work_struct *work); static void rxrpc_call_life_expired(unsigned long _call); @@ -132,7 +140,7 @@ static struct rxrpc_call *rxrpc_alloc_client_call( list_add(&call->error_link, &call->conn->trans->peer->error_targets); spin_unlock(&call->conn->trans->peer->lock); - call->lifetimer.expires = jiffies + rxrpc_call_max_lifetime * HZ; + call->lifetimer.expires = jiffies + rxrpc_max_call_lifetime; add_timer(&call->lifetimer); _leave(" = %p", call); @@ -349,7 +357,7 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx, _net("CALL incoming %d on CONN %d", call->debug_id, call->conn->debug_id); - call->lifetimer.expires = jiffies + rxrpc_call_max_lifetime * HZ; + call->lifetimer.expires = jiffies + rxrpc_max_call_lifetime; add_timer(&call->lifetimer); _leave(" = %p {%d} [new]", call, call->debug_id); return call; @@ -533,7 +541,7 @@ void rxrpc_release_call(struct rxrpc_call *call) del_timer_sync(&call->resend_timer); del_timer_sync(&call->ack_timer); del_timer_sync(&call->lifetimer); - call->deadspan.expires = jiffies + rxrpc_dead_call_timeout * HZ; + call->deadspan.expires = jiffies + rxrpc_dead_call_expiry; add_timer(&call->deadspan); _leave(""); diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c index 7bf5b5b9e8b9..6631f4f1e39b 100644 --- a/net/rxrpc/ar-connection.c +++ b/net/rxrpc/ar-connection.c @@ -18,11 +18,15 @@ #include #include "ar-internal.h" +/* + * Time till a connection expires after last use (in seconds). + */ +unsigned rxrpc_connection_expiry = 10 * 60; + static void rxrpc_connection_reaper(struct work_struct *work); LIST_HEAD(rxrpc_connections); DEFINE_RWLOCK(rxrpc_connection_lock); -static unsigned long rxrpc_connection_timeout = 10 * 60; static DECLARE_DELAYED_WORK(rxrpc_connection_reap, rxrpc_connection_reaper); /* @@ -862,7 +866,7 @@ static void rxrpc_connection_reaper(struct work_struct *work) spin_lock(&conn->trans->client_lock); write_lock(&conn->trans->conn_lock); - reap_time = conn->put_time + rxrpc_connection_timeout; + reap_time = conn->put_time + rxrpc_connection_expiry; if (atomic_read(&conn->usage) > 0) { ; @@ -916,7 +920,7 @@ void __exit rxrpc_destroy_all_connections(void) { _enter(""); - rxrpc_connection_timeout = 0; + rxrpc_connection_expiry = 0; cancel_delayed_work(&rxrpc_connection_reap); rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0); diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c index eb7e16276cc1..540c03d338c9 100644 --- a/net/rxrpc/ar-input.c +++ b/net/rxrpc/ar-input.c @@ -25,8 +25,6 @@ #include #include "ar-internal.h" -unsigned long rxrpc_ack_timeout = 1; - const char *rxrpc_pkts[] = { "?00", "DATA", "ACK", "BUSY", "ABORT", "ACKALL", "CHALL", "RESP", "DEBUG", diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 5f43675ee1df..036e1dd84223 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -433,6 +433,10 @@ int rxrpc_reject_call(struct rxrpc_sock *); /* * ar-ack.c */ +extern unsigned rxrpc_requested_ack_delay; +extern unsigned rxrpc_soft_ack_delay; +extern unsigned rxrpc_idle_ack_delay; + void __rxrpc_propose_ACK(struct rxrpc_call *, u8, __be32, bool); void rxrpc_propose_ACK(struct rxrpc_call *, u8, __be32, bool); void rxrpc_process_call(struct work_struct *); @@ -440,6 +444,8 @@ void rxrpc_process_call(struct work_struct *); /* * ar-call.c */ +extern unsigned rxrpc_max_call_lifetime; +extern unsigned rxrpc_dead_call_expiry; extern struct kmem_cache *rxrpc_call_jar; extern struct list_head rxrpc_calls; extern rwlock_t rxrpc_call_lock; @@ -460,6 +466,7 @@ void __exit rxrpc_destroy_all_calls(void); /* * ar-connection.c */ +extern unsigned rxrpc_connection_expiry; extern struct list_head rxrpc_connections; extern rwlock_t rxrpc_connection_lock; @@ -493,7 +500,6 @@ void rxrpc_UDP_error_handler(struct work_struct *); /* * ar-input.c */ -extern unsigned long rxrpc_ack_timeout; extern const char *rxrpc_pkts[]; void rxrpc_data_ready(struct sock *, int); @@ -504,6 +510,7 @@ void rxrpc_fast_process_packet(struct rxrpc_call *, struct sk_buff *); * ar-local.c */ extern rwlock_t rxrpc_local_lock; + struct rxrpc_local *rxrpc_lookup_local(struct sockaddr_rxrpc *); void rxrpc_put_local(struct rxrpc_local *); void __exit rxrpc_destroy_all_locals(void); @@ -522,7 +529,7 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *, time_t, /* * ar-output.c */ -extern int rxrpc_resend_timeout; +extern unsigned rxrpc_resend_timeout; int rxrpc_send_packet(struct rxrpc_transport *, struct sk_buff *); int rxrpc_client_sendmsg(struct kiocb *, struct rxrpc_sock *, @@ -572,6 +579,8 @@ void rxrpc_packet_destructor(struct sk_buff *); /* * ar-transport.c */ +extern unsigned rxrpc_transport_expiry; + struct rxrpc_transport *rxrpc_get_transport(struct rxrpc_local *, struct rxrpc_peer *, gfp_t); void rxrpc_put_transport(struct rxrpc_transport *); @@ -579,6 +588,17 @@ void __exit rxrpc_destroy_all_transports(void); struct rxrpc_transport *rxrpc_find_transport(struct rxrpc_local *, struct rxrpc_peer *); +/* + * sysctl.c + */ +#ifdef CONFIG_SYSCTL +extern int __init rxrpc_sysctl_init(void); +extern void rxrpc_sysctl_exit(void); +#else +static inline int __init rxrpc_sysctl_init(void) { return 0; } +static inline void rxrpc_sysctl_exit(void) {} +#endif + /* * debug tracing */ diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c index d0e8f1c1898a..4814d882bcb4 100644 --- a/net/rxrpc/ar-output.c +++ b/net/rxrpc/ar-output.c @@ -18,7 +18,10 @@ #include #include "ar-internal.h" -int rxrpc_resend_timeout = 4; +/* + * Time till packet resend (in jiffies). + */ +unsigned rxrpc_resend_timeout = 4 * HZ; static int rxrpc_send_data(struct kiocb *iocb, struct rxrpc_sock *rx, @@ -487,7 +490,7 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, ntohl(sp->hdr.serial), ntohl(sp->hdr.seq)); sp->need_resend = false; - sp->resend_at = jiffies + rxrpc_resend_timeout * HZ; + sp->resend_at = jiffies + rxrpc_resend_timeout; if (!test_and_set_bit(RXRPC_CALL_RUN_RTIMER, &call->flags)) { _debug("run timer"); call->resend_timer.expires = sp->resend_at; diff --git a/net/rxrpc/ar-skbuff.c b/net/rxrpc/ar-skbuff.c index de755e04d29c..af9f4fd2a365 100644 --- a/net/rxrpc/ar-skbuff.c +++ b/net/rxrpc/ar-skbuff.c @@ -83,6 +83,11 @@ static void rxrpc_hard_ACK_data(struct rxrpc_call *call, rxrpc_request_final_ACK(call); } else if (atomic_dec_and_test(&call->ackr_not_idle) && test_and_clear_bit(RXRPC_CALL_TX_SOFT_ACK, &call->flags)) { + /* We previously soft-ACK'd some received packets that have now + * been consumed, so send a hard-ACK if no more packets are + * immediately forthcoming to allow the transmitter to free up + * its Tx bufferage. + */ _debug("send Rx idle ACK"); __rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, sp->hdr.serial, true); diff --git a/net/rxrpc/ar-transport.c b/net/rxrpc/ar-transport.c index 92df566930b9..1976dec84f29 100644 --- a/net/rxrpc/ar-transport.c +++ b/net/rxrpc/ar-transport.c @@ -17,11 +17,15 @@ #include #include "ar-internal.h" +/* + * Time after last use at which transport record is cleaned up. + */ +unsigned rxrpc_transport_expiry = 3600 * 24; + static void rxrpc_transport_reaper(struct work_struct *work); static LIST_HEAD(rxrpc_transports); static DEFINE_RWLOCK(rxrpc_transport_lock); -static unsigned long rxrpc_transport_timeout = 3600 * 24; static DECLARE_DELAYED_WORK(rxrpc_transport_reap, rxrpc_transport_reaper); /* @@ -235,7 +239,7 @@ static void rxrpc_transport_reaper(struct work_struct *work) if (likely(atomic_read(&trans->usage) > 0)) continue; - reap_time = trans->put_time + rxrpc_transport_timeout; + reap_time = trans->put_time + rxrpc_transport_expiry; if (reap_time <= now) list_move_tail(&trans->link, &graveyard); else if (reap_time < earliest) @@ -271,7 +275,7 @@ void __exit rxrpc_destroy_all_transports(void) { _enter(""); - rxrpc_transport_timeout = 0; + rxrpc_transport_expiry = 0; cancel_delayed_work(&rxrpc_transport_reap); rxrpc_queue_delayed_work(&rxrpc_transport_reap, 0); diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c new file mode 100644 index 000000000000..cdc85e72af5d --- /dev/null +++ b/net/rxrpc/sysctl.c @@ -0,0 +1,113 @@ +/* sysctls for configuring RxRPC operating parameters + * + * Copyright (C) 2014 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include +#include +#include +#include "ar-internal.h" + +static struct ctl_table_header *rxrpc_sysctl_reg_table; +static const unsigned zero = 0; +static const unsigned one = 1; + +/* + * RxRPC operating parameters. + * + * See Documentation/networking/rxrpc.txt and the variable definitions for more + * information on the individual parameters. + */ +static struct ctl_table rxrpc_sysctl_table[] = { + /* Values measured in milliseconds */ + { + .procname = "req_ack_delay", + .data = &rxrpc_requested_ack_delay, + .maxlen = sizeof(unsigned), + .mode = 0644, + .proc_handler = proc_dointvec_ms_jiffies, + .extra1 = (void *)&zero, + }, + { + .procname = "soft_ack_delay", + .data = &rxrpc_soft_ack_delay, + .maxlen = sizeof(unsigned), + .mode = 0644, + .proc_handler = proc_dointvec_ms_jiffies, + .extra1 = (void *)&one, + }, + { + .procname = "idle_ack_delay", + .data = &rxrpc_idle_ack_delay, + .maxlen = sizeof(unsigned), + .mode = 0644, + .proc_handler = proc_dointvec_ms_jiffies, + .extra1 = (void *)&one, + }, + { + .procname = "resend_timeout", + .data = &rxrpc_resend_timeout, + .maxlen = sizeof(unsigned), + .mode = 0644, + .proc_handler = proc_dointvec_ms_jiffies, + .extra1 = (void *)&one, + }, + + /* Values measured in seconds but used in jiffies */ + { + .procname = "max_call_lifetime", + .data = &rxrpc_max_call_lifetime, + .maxlen = sizeof(unsigned), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + .extra1 = (void *)&one, + }, + { + .procname = "dead_call_expiry", + .data = &rxrpc_dead_call_expiry, + .maxlen = sizeof(unsigned), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + .extra1 = (void *)&one, + }, + + /* Values measured in seconds */ + { + .procname = "connection_expiry", + .data = &rxrpc_connection_expiry, + .maxlen = sizeof(unsigned), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = (void *)&one, + }, + { + .procname = "transport_expiry", + .data = &rxrpc_transport_expiry, + .maxlen = sizeof(unsigned), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = (void *)&one, + }, + { } +}; + +int __init rxrpc_sysctl_init(void) +{ + rxrpc_sysctl_reg_table = register_net_sysctl(&init_net, "net/rxrpc", + rxrpc_sysctl_table); + if (!rxrpc_sysctl_reg_table) + return -ENOMEM; + return 0; +} + +void rxrpc_sysctl_exit(void) +{ + if (rxrpc_sysctl_reg_table) + unregister_net_sysctl_table(rxrpc_sysctl_reg_table); +} From 9823f39a1719dce0da8a47cdd5c66ff8831f03f2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 7 Feb 2014 18:58:45 +0000 Subject: [PATCH 5/8] af_rxrpc: Improve ACK production Improve ACK production by the following means: (1) Don't send an ACK_REQUESTED ack immediately even if the RXRPC_MORE_PACKETS flag isn't set on a data packet that has also has RXRPC_REQUEST_ACK set. MORE_PACKETS just means that the sender just emptied its Tx data buffer. More data will be forthcoming unless RXRPC_LAST_PACKET is also flagged. It is possible to see runs of DATA packets with MORE_PACKETS unset that aren't waiting for an ACK. It is therefore better to wait a small instant to see if we can combine an ACK for several packets. (2) Don't send an ACK_IDLE ack immediately unless we're responding to the terminal data packet of a call. Whilst sending an ACK_IDLE mid-call serves to let the other side know that we won't be asking it to resend certain Tx buffers and that it can discard them, spamming it with loads of acks just because we've temporarily run out of data just distracts it. (3) Put the ACK_IDLE ack generation timeout up to half a second rather than a single jiffy. Just because we haven't been given more data immediately doesn't mean that more isn't forthcoming. The other side may be busily finding the data to send to us. Signed-off-by: David Howells --- net/rxrpc/ar-ack.c | 2 +- net/rxrpc/ar-input.c | 3 +-- net/rxrpc/ar-skbuff.c | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c index 732b82f540c5..1a490d976e7e 100644 --- a/net/rxrpc/ar-ack.c +++ b/net/rxrpc/ar-ack.c @@ -41,7 +41,7 @@ unsigned rxrpc_soft_ack_delay = 1 * HZ; * further packets aren't immediately received to decide when to send an IDLE * ACK let the other end know that it can free up its Tx buffer space. */ -unsigned rxrpc_idle_ack_delay = 1; +unsigned rxrpc_idle_ack_delay = 0.5 * HZ; static const char *rxrpc_acks(u8 reason) { diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c index 540c03d338c9..e449c675c36a 100644 --- a/net/rxrpc/ar-input.c +++ b/net/rxrpc/ar-input.c @@ -347,8 +347,7 @@ void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb) * it */ if (sp->hdr.flags & RXRPC_REQUEST_ACK) { _proto("ACK Requested on %%%u", serial); - rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED, sp->hdr.serial, - !(sp->hdr.flags & RXRPC_MORE_PACKETS)); + rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED, sp->hdr.serial, false); } switch (sp->hdr.type) { diff --git a/net/rxrpc/ar-skbuff.c b/net/rxrpc/ar-skbuff.c index af9f4fd2a365..4cfab49e329d 100644 --- a/net/rxrpc/ar-skbuff.c +++ b/net/rxrpc/ar-skbuff.c @@ -90,7 +90,7 @@ static void rxrpc_hard_ACK_data(struct rxrpc_call *call, */ _debug("send Rx idle ACK"); __rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, sp->hdr.serial, - true); + false); } spin_unlock_bh(&call->lock); From 817913d8cd7627d9303bce97c3c339ceb0f8e199 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 7 Feb 2014 18:10:30 +0000 Subject: [PATCH 6/8] af_rxrpc: Expose more RxRPC parameters via sysctls Expose RxRPC parameters via sysctls to control the Rx window size, the Rx MTU maximum size and the number of packets that can be glued into a jumbo packet. More info added to Documentation/networking/rxrpc.txt. Signed-off-by: David Howells --- Documentation/networking/rxrpc.txt | 19 +++++++++++++++++ net/rxrpc/ar-ack.c | 26 ++++++++++++++++++++--- net/rxrpc/ar-call.c | 2 +- net/rxrpc/ar-internal.h | 3 +++ net/rxrpc/sysctl.c | 33 ++++++++++++++++++++++++++++++ 5 files changed, 79 insertions(+), 4 deletions(-) diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt index aa08d2625f05..16a924c486bf 100644 --- a/Documentation/networking/rxrpc.txt +++ b/Documentation/networking/rxrpc.txt @@ -926,3 +926,22 @@ adjusted through sysctls in /proc/net/rxrpc/: The amount of time in seconds after a transport was last used before we remove it from the transport list. Whilst a transport is in existence, it serves to anchor the peer data and keeps the connection ID counter. + + (*) rxrpc_rx_window_size + + The size of the receive window in packets. This is the maximum number of + unconsumed received packets we're willing to hold in memory for any + particular call. + + (*) rxrpc_rx_mtu + + The maximum packet MTU size that we're willing to receive in bytes. This + indicates to the peer whether we're willing to accept jumbo packets. + + (*) rxrpc_rx_jumbo_max + + The maximum number of packets that we're willing to accept in a jumbo + packet. Non-terminal packets in a jumbo packet must contain a four byte + header plus exactly 1412 bytes of data. The terminal packet must contain + a four byte header plus any amount of data. In any event, a jumbo packet + may not exceed rxrpc_rx_mtu in size. diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c index 1a490d976e7e..c6be17a959a6 100644 --- a/net/rxrpc/ar-ack.c +++ b/net/rxrpc/ar-ack.c @@ -43,6 +43,26 @@ unsigned rxrpc_soft_ack_delay = 1 * HZ; */ unsigned rxrpc_idle_ack_delay = 0.5 * HZ; +/* + * Receive window size in packets. This indicates the maximum number of + * unconsumed received packets we're willing to retain in memory. Once this + * limit is hit, we should generate an EXCEEDS_WINDOW ACK and discard further + * packets. + */ +unsigned rxrpc_rx_window_size = 32; + +/* + * Maximum Rx MTU size. This indicates to the sender the size of jumbo packet + * made by gluing normal packets together that we're willing to handle. + */ +unsigned rxrpc_rx_mtu = 5692; + +/* + * The maximum number of fragments in a received jumbo packet that we tell the + * sender that we're willing to handle. + */ +unsigned rxrpc_rx_jumbo_max = 4; + static const char *rxrpc_acks(u8 reason) { static const char *const str[] = { @@ -1195,11 +1215,11 @@ send_ACK: mtu = call->conn->trans->peer->if_mtu; mtu -= call->conn->trans->peer->hdrsize; ackinfo.maxMTU = htonl(mtu); - ackinfo.rwind = htonl(32); + ackinfo.rwind = htonl(rxrpc_rx_window_size); /* permit the peer to send us jumbo packets if it wants to */ - ackinfo.rxMTU = htonl(5692); - ackinfo.jumbo_max = htonl(4); + ackinfo.rxMTU = htonl(rxrpc_rx_mtu); + ackinfo.jumbo_max = htonl(rxrpc_rx_jumbo_max); hdr.serial = htonl(atomic_inc_return(&call->conn->serial)); _proto("Tx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }", diff --git a/net/rxrpc/ar-call.c b/net/rxrpc/ar-call.c index 1e0903a2a0db..6e4d58c9b042 100644 --- a/net/rxrpc/ar-call.c +++ b/net/rxrpc/ar-call.c @@ -99,7 +99,7 @@ static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) call->rx_data_expect = 1; call->rx_data_eaten = 0; call->rx_first_oos = 0; - call->ackr_win_top = call->rx_data_eaten + 1 + RXRPC_MAXACKS; + call->ackr_win_top = call->rx_data_eaten + 1 + rxrpc_rx_window_size; call->creation_jif = jiffies; return call; } diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 036e1dd84223..1ecd070e9149 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -436,6 +436,9 @@ int rxrpc_reject_call(struct rxrpc_sock *); extern unsigned rxrpc_requested_ack_delay; extern unsigned rxrpc_soft_ack_delay; extern unsigned rxrpc_idle_ack_delay; +extern unsigned rxrpc_rx_window_size; +extern unsigned rxrpc_rx_mtu; +extern unsigned rxrpc_rx_jumbo_max; void __rxrpc_propose_ACK(struct rxrpc_call *, u8, __be32, bool); void rxrpc_propose_ACK(struct rxrpc_call *, u8, __be32, bool); diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c index cdc85e72af5d..50a98a910eb1 100644 --- a/net/rxrpc/sysctl.c +++ b/net/rxrpc/sysctl.c @@ -17,6 +17,9 @@ static struct ctl_table_header *rxrpc_sysctl_reg_table; static const unsigned zero = 0; static const unsigned one = 1; +static const unsigned four = 4; +static const unsigned n_65535 = 65535; +static const unsigned n_max_acks = RXRPC_MAXACKS; /* * RxRPC operating parameters. @@ -94,6 +97,36 @@ static struct ctl_table rxrpc_sysctl_table[] = { .proc_handler = proc_dointvec_minmax, .extra1 = (void *)&one, }, + + /* Non-time values */ + { + .procname = "rx_window_size", + .data = &rxrpc_rx_window_size, + .maxlen = sizeof(unsigned), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = (void *)&one, + .extra2 = (void *)&n_max_acks, + }, + { + .procname = "rx_mtu", + .data = &rxrpc_rx_mtu, + .maxlen = sizeof(unsigned), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = (void *)&one, + .extra1 = (void *)&n_65535, + }, + { + .procname = "rx_jumbo_max", + .data = &rxrpc_rx_jumbo_max, + .maxlen = sizeof(unsigned), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = (void *)&one, + .extra2 = (void *)&four, + }, + { } }; From e8388eb10371745627d1e538e018cb10ded86aa7 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Feb 2014 20:05:32 +0000 Subject: [PATCH 7/8] af_rxrpc: Request an ACK for every alternate DATA packet Set the RxRPC header flag to request an ACK packet for every odd-numbered DATA packet unless it's the last one (which implicitly requests an ACK anyway). This is similar to how librx appears to work. If we don't do this, we'll send out a full window of packets and then just sit there until the other side gets bored and sends an ACK to indicate that it's been idle for a while and has received no new packets. Requesting a lot of ACKs shouldn't be a problem as ACKs should be merged when possible. As AF_RXRPC currently works, it will schedule an ACK to be generated upon receipt of a DATA packet with the ACK-request packet set - and in the time taken to schedule this in a work queue, several other packets are likely to arrive and then all get ACK'd together. Signed-off-by: David Howells --- net/rxrpc/ar-output.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c index 4814d882bcb4..0b4b9a79f5ab 100644 --- a/net/rxrpc/ar-output.c +++ b/net/rxrpc/ar-output.c @@ -669,6 +669,7 @@ static int rxrpc_send_data(struct kiocb *iocb, /* add the packet to the send queue if it's now full */ if (sp->remain <= 0 || (segment == 0 && !more)) { struct rxrpc_connection *conn = call->conn; + uint32_t seq; size_t pad; /* pad out if we're using security */ @@ -681,11 +682,12 @@ static int rxrpc_send_data(struct kiocb *iocb, memset(skb_put(skb, pad), 0, pad); } + seq = atomic_inc_return(&call->sequence); + sp->hdr.epoch = conn->epoch; sp->hdr.cid = call->cid; sp->hdr.callNumber = call->call_id; - sp->hdr.seq = - htonl(atomic_inc_return(&call->sequence)); + sp->hdr.seq = htonl(seq); sp->hdr.serial = htonl(atomic_inc_return(&conn->serial)); sp->hdr.type = RXRPC_PACKET_TYPE_DATA; @@ -700,6 +702,8 @@ static int rxrpc_send_data(struct kiocb *iocb, else if (CIRC_SPACE(call->acks_head, call->acks_tail, call->acks_winsz) > 1) sp->hdr.flags |= RXRPC_MORE_PACKETS; + if (more && seq & 1) + sp->hdr.flags |= RXRPC_REQUEST_ACK; ret = rxrpc_secure_packet( call, skb, skb->mark, From 7727640cc3c4d03b6a3cb5bf26d48c72e31403ca Mon Sep 17 00:00:00 2001 From: Tim Smith Date: Mon, 3 Mar 2014 23:04:45 +0000 Subject: [PATCH 8/8] af_rxrpc: Keep rxrpc_call pointers in a hashtable Keep track of rxrpc_call structures in a hashtable so they can be found directly from the network parameters which define the call. This allows incoming packets to be routed directly to a call without walking through hierarchy of peer -> transport -> connection -> call and all the spinlocks that that entailed. Signed-off-by: Tim Smith Signed-off-by: David Howells --- net/rxrpc/ar-call.c | 193 +++++++++++++++++++++++++++++++++++++++- net/rxrpc/ar-input.c | 177 +++++++++++++++--------------------- net/rxrpc/ar-internal.h | 13 +++ 3 files changed, 277 insertions(+), 106 deletions(-) diff --git a/net/rxrpc/ar-call.c b/net/rxrpc/ar-call.c index 6e4d58c9b042..a9e05db0f5d5 100644 --- a/net/rxrpc/ar-call.c +++ b/net/rxrpc/ar-call.c @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include #include #include "ar-internal.h" @@ -55,6 +57,145 @@ static void rxrpc_dead_call_expired(unsigned long _call); static void rxrpc_ack_time_expired(unsigned long _call); static void rxrpc_resend_time_expired(unsigned long _call); +static DEFINE_SPINLOCK(rxrpc_call_hash_lock); +static DEFINE_HASHTABLE(rxrpc_call_hash, 10); + +/* + * Hash function for rxrpc_call_hash + */ +static unsigned long rxrpc_call_hashfunc( + u8 clientflag, + __be32 cid, + __be32 call_id, + __be32 epoch, + __be16 service_id, + sa_family_t proto, + void *localptr, + unsigned int addr_size, + const u8 *peer_addr) +{ + const u16 *p; + unsigned int i; + unsigned long key; + u32 hcid = ntohl(cid); + + _enter(""); + + key = (unsigned long)localptr; + /* We just want to add up the __be32 values, so forcing the + * cast should be okay. + */ + key += (__force u32)epoch; + key += (__force u16)service_id; + key += (__force u32)call_id; + key += (hcid & RXRPC_CIDMASK) >> RXRPC_CIDSHIFT; + key += hcid & RXRPC_CHANNELMASK; + key += clientflag; + key += proto; + /* Step through the peer address in 16-bit portions for speed */ + for (i = 0, p = (const u16 *)peer_addr; i < addr_size >> 1; i++, p++) + key += *p; + _leave(" key = 0x%lx", key); + return key; +} + +/* + * Add a call to the hashtable + */ +static void rxrpc_call_hash_add(struct rxrpc_call *call) +{ + unsigned long key; + unsigned int addr_size = 0; + + _enter(""); + switch (call->proto) { + case AF_INET: + addr_size = sizeof(call->peer_ip.ipv4_addr); + break; + case AF_INET6: + addr_size = sizeof(call->peer_ip.ipv6_addr); + break; + default: + break; + } + key = rxrpc_call_hashfunc(call->in_clientflag, call->cid, + call->call_id, call->epoch, + call->service_id, call->proto, + call->conn->trans->local, addr_size, + call->peer_ip.ipv6_addr); + /* Store the full key in the call */ + call->hash_key = key; + spin_lock(&rxrpc_call_hash_lock); + hash_add_rcu(rxrpc_call_hash, &call->hash_node, key); + spin_unlock(&rxrpc_call_hash_lock); + _leave(""); +} + +/* + * Remove a call from the hashtable + */ +static void rxrpc_call_hash_del(struct rxrpc_call *call) +{ + _enter(""); + spin_lock(&rxrpc_call_hash_lock); + hash_del_rcu(&call->hash_node); + spin_unlock(&rxrpc_call_hash_lock); + _leave(""); +} + +/* + * Find a call in the hashtable and return it, or NULL if it + * isn't there. + */ +struct rxrpc_call *rxrpc_find_call_hash( + u8 clientflag, + __be32 cid, + __be32 call_id, + __be32 epoch, + __be16 service_id, + void *localptr, + sa_family_t proto, + const u8 *peer_addr) +{ + unsigned long key; + unsigned int addr_size = 0; + struct rxrpc_call *call = NULL; + struct rxrpc_call *ret = NULL; + + _enter(""); + switch (proto) { + case AF_INET: + addr_size = sizeof(call->peer_ip.ipv4_addr); + break; + case AF_INET6: + addr_size = sizeof(call->peer_ip.ipv6_addr); + break; + default: + break; + } + + key = rxrpc_call_hashfunc(clientflag, cid, call_id, epoch, + service_id, proto, localptr, addr_size, + peer_addr); + hash_for_each_possible_rcu(rxrpc_call_hash, call, hash_node, key) { + if (call->hash_key == key && + call->call_id == call_id && + call->cid == cid && + call->in_clientflag == clientflag && + call->service_id == service_id && + call->proto == proto && + call->local == localptr && + memcmp(call->peer_ip.ipv6_addr, peer_addr, + addr_size) == 0 && + call->epoch == epoch) { + ret = call; + break; + } + } + _leave(" = %p", ret); + return ret; +} + /* * allocate a new call */ @@ -136,6 +277,26 @@ static struct rxrpc_call *rxrpc_alloc_client_call( return ERR_PTR(ret); } + /* Record copies of information for hashtable lookup */ + call->proto = rx->proto; + call->local = trans->local; + switch (call->proto) { + case AF_INET: + call->peer_ip.ipv4_addr = + trans->peer->srx.transport.sin.sin_addr.s_addr; + break; + case AF_INET6: + memcpy(call->peer_ip.ipv6_addr, + trans->peer->srx.transport.sin6.sin6_addr.in6_u.u6_addr8, + sizeof(call->peer_ip.ipv6_addr)); + break; + } + call->epoch = call->conn->epoch; + call->service_id = call->conn->service_id; + call->in_clientflag = call->conn->in_clientflag; + /* Add the new call to the hashtable */ + rxrpc_call_hash_add(call); + spin_lock(&call->conn->trans->peer->lock); list_add(&call->error_link, &call->conn->trans->peer->error_targets); spin_unlock(&call->conn->trans->peer->lock); @@ -328,9 +489,12 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx, parent = *p; call = rb_entry(parent, struct rxrpc_call, conn_node); - if (call_id < call->call_id) + /* The tree is sorted in order of the __be32 value without + * turning it into host order. + */ + if ((__force u32)call_id < (__force u32)call->call_id) p = &(*p)->rb_left; - else if (call_id > call->call_id) + else if ((__force u32)call_id > (__force u32)call->call_id) p = &(*p)->rb_right; else goto old_call; @@ -355,6 +519,28 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx, list_add_tail(&call->link, &rxrpc_calls); write_unlock_bh(&rxrpc_call_lock); + /* Record copies of information for hashtable lookup */ + call->proto = rx->proto; + call->local = conn->trans->local; + switch (call->proto) { + case AF_INET: + call->peer_ip.ipv4_addr = + conn->trans->peer->srx.transport.sin.sin_addr.s_addr; + break; + case AF_INET6: + memcpy(call->peer_ip.ipv6_addr, + conn->trans->peer->srx.transport.sin6.sin6_addr.in6_u.u6_addr8, + sizeof(call->peer_ip.ipv6_addr)); + break; + default: + break; + } + call->epoch = conn->epoch; + call->service_id = conn->service_id; + call->in_clientflag = conn->in_clientflag; + /* Add the new call to the hashtable */ + rxrpc_call_hash_add(call); + _net("CALL incoming %d on CONN %d", call->debug_id, call->conn->debug_id); call->lifetimer.expires = jiffies + rxrpc_max_call_lifetime; @@ -673,6 +859,9 @@ static void rxrpc_cleanup_call(struct rxrpc_call *call) rxrpc_put_connection(call->conn); } + /* Remove the call from the hash */ + rxrpc_call_hash_del(call); + if (call->acks_window) { _debug("kill Tx window %d", CIRC_CNT(call->acks_head, call->acks_tail, diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c index e449c675c36a..73742647c135 100644 --- a/net/rxrpc/ar-input.c +++ b/net/rxrpc/ar-input.c @@ -523,36 +523,38 @@ protocol_error: * post an incoming packet to the appropriate call/socket to deal with * - must get rid of the sk_buff, either by freeing it or by queuing it */ -static void rxrpc_post_packet_to_call(struct rxrpc_connection *conn, +static void rxrpc_post_packet_to_call(struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_skb_priv *sp; - struct rxrpc_call *call; - struct rb_node *p; - __be32 call_id; - _enter("%p,%p", conn, skb); - - read_lock_bh(&conn->lock); + _enter("%p,%p", call, skb); sp = rxrpc_skb(skb); - /* look at extant calls by channel number first */ - call = conn->channels[ntohl(sp->hdr.cid) & RXRPC_CHANNELMASK]; - if (!call || call->call_id != sp->hdr.callNumber) - goto call_not_extant; - _debug("extant call [%d]", call->state); - ASSERTCMP(call->conn, ==, conn); read_lock(&call->state_lock); switch (call->state) { case RXRPC_CALL_LOCALLY_ABORTED: - if (!test_and_set_bit(RXRPC_CALL_ABORT, &call->events)) + if (!test_and_set_bit(RXRPC_CALL_ABORT, &call->events)) { rxrpc_queue_call(call); + goto free_unlock; + } case RXRPC_CALL_REMOTELY_ABORTED: case RXRPC_CALL_NETWORK_ERROR: case RXRPC_CALL_DEAD: + goto dead_call; + case RXRPC_CALL_COMPLETE: + case RXRPC_CALL_CLIENT_FINAL_ACK: + /* complete server call */ + if (call->conn->in_clientflag) + goto dead_call; + /* resend last packet of a completed call */ + _debug("final ack again"); + rxrpc_get_call(call); + set_bit(RXRPC_CALL_ACK_FINAL, &call->events); + rxrpc_queue_call(call); goto free_unlock; default: break; @@ -560,7 +562,6 @@ static void rxrpc_post_packet_to_call(struct rxrpc_connection *conn, read_unlock(&call->state_lock); rxrpc_get_call(call); - read_unlock_bh(&conn->lock); if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA && sp->hdr.flags & RXRPC_JUMBO_PACKET) @@ -571,80 +572,16 @@ static void rxrpc_post_packet_to_call(struct rxrpc_connection *conn, rxrpc_put_call(call); goto done; -call_not_extant: - /* search the completed calls in case what we're dealing with is - * there */ - _debug("call not extant"); - - call_id = sp->hdr.callNumber; - p = conn->calls.rb_node; - while (p) { - call = rb_entry(p, struct rxrpc_call, conn_node); - - if (call_id < call->call_id) - p = p->rb_left; - else if (call_id > call->call_id) - p = p->rb_right; - else - goto found_completed_call; - } - dead_call: - /* it's a either a really old call that we no longer remember or its a - * new incoming call */ - read_unlock_bh(&conn->lock); - - if (sp->hdr.flags & RXRPC_CLIENT_INITIATED && - sp->hdr.seq == cpu_to_be32(1)) { - _debug("incoming call"); - skb_queue_tail(&conn->trans->local->accept_queue, skb); - rxrpc_queue_work(&conn->trans->local->acceptor); - goto done; - } - - _debug("dead call"); if (sp->hdr.type != RXRPC_PACKET_TYPE_ABORT) { skb->priority = RX_CALL_DEAD; - rxrpc_reject_packet(conn->trans->local, skb); + rxrpc_reject_packet(call->conn->trans->local, skb); + goto unlock; } - goto done; - - /* resend last packet of a completed call - * - client calls may have been aborted or ACK'd - * - server calls may have been aborted - */ -found_completed_call: - _debug("completed call"); - - if (atomic_read(&call->usage) == 0) - goto dead_call; - - /* synchronise any state changes */ - read_lock(&call->state_lock); - ASSERTIFCMP(call->state != RXRPC_CALL_CLIENT_FINAL_ACK, - call->state, >=, RXRPC_CALL_COMPLETE); - - if (call->state == RXRPC_CALL_LOCALLY_ABORTED || - call->state == RXRPC_CALL_REMOTELY_ABORTED || - call->state == RXRPC_CALL_DEAD) { - read_unlock(&call->state_lock); - goto dead_call; - } - - if (call->conn->in_clientflag) { - read_unlock(&call->state_lock); - goto dead_call; /* complete server call */ - } - - _debug("final ack again"); - rxrpc_get_call(call); - set_bit(RXRPC_CALL_ACK_FINAL, &call->events); - rxrpc_queue_call(call); - free_unlock: - read_unlock(&call->state_lock); - read_unlock_bh(&conn->lock); rxrpc_free_skb(skb); +unlock: + read_unlock(&call->state_lock); done: _leave(""); } @@ -663,17 +600,42 @@ static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn, rxrpc_queue_conn(conn); } +static struct rxrpc_connection *rxrpc_conn_from_local(struct rxrpc_local *local, + struct sk_buff *skb, + struct rxrpc_skb_priv *sp) +{ + struct rxrpc_peer *peer; + struct rxrpc_transport *trans; + struct rxrpc_connection *conn; + + peer = rxrpc_find_peer(local, ip_hdr(skb)->saddr, + udp_hdr(skb)->source); + if (IS_ERR(peer)) + goto cant_find_conn; + + trans = rxrpc_find_transport(local, peer); + rxrpc_put_peer(peer); + if (!trans) + goto cant_find_conn; + + conn = rxrpc_find_connection(trans, &sp->hdr); + rxrpc_put_transport(trans); + if (!conn) + goto cant_find_conn; + + return conn; +cant_find_conn: + return NULL; +} + /* * handle data received on the local endpoint * - may be called in interrupt context */ void rxrpc_data_ready(struct sock *sk, int count) { - struct rxrpc_connection *conn; - struct rxrpc_transport *trans; struct rxrpc_skb_priv *sp; struct rxrpc_local *local; - struct rxrpc_peer *peer; struct sk_buff *skb; int ret; @@ -748,27 +710,34 @@ void rxrpc_data_ready(struct sock *sk, int count) (sp->hdr.callNumber == 0 || sp->hdr.seq == 0)) goto bad_message; - peer = rxrpc_find_peer(local, ip_hdr(skb)->saddr, udp_hdr(skb)->source); - if (IS_ERR(peer)) - goto cant_route_call; + if (sp->hdr.callNumber == 0) { + /* This is a connection-level packet. These should be + * fairly rare, so the extra overhead of looking them up the + * old-fashioned way doesn't really hurt */ + struct rxrpc_connection *conn; - trans = rxrpc_find_transport(local, peer); - rxrpc_put_peer(peer); - if (!trans) - goto cant_route_call; + conn = rxrpc_conn_from_local(local, skb, sp); + if (!conn) + goto cant_route_call; - conn = rxrpc_find_connection(trans, &sp->hdr); - rxrpc_put_transport(trans); - if (!conn) - goto cant_route_call; - - _debug("CONN %p {%d}", conn, conn->debug_id); - - if (sp->hdr.callNumber == 0) + _debug("CONN %p {%d}", conn, conn->debug_id); rxrpc_post_packet_to_conn(conn, skb); - else - rxrpc_post_packet_to_call(conn, skb); - rxrpc_put_connection(conn); + rxrpc_put_connection(conn); + } else { + struct rxrpc_call *call; + u8 in_clientflag = 0; + + if (sp->hdr.flags & RXRPC_CLIENT_INITIATED) + in_clientflag = RXRPC_CLIENT_INITIATED; + call = rxrpc_find_call_hash(in_clientflag, sp->hdr.cid, + sp->hdr.callNumber, sp->hdr.epoch, + sp->hdr.serviceId, local, AF_INET, + (u8 *)&ip_hdr(skb)->saddr); + if (call) + rxrpc_post_packet_to_call(call, skb); + else + goto cant_route_call; + } rxrpc_put_local(local); return; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 1ecd070e9149..c831d44b0841 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -396,9 +396,20 @@ struct rxrpc_call { #define RXRPC_ACKR_WINDOW_ASZ DIV_ROUND_UP(RXRPC_MAXACKS, BITS_PER_LONG) unsigned long ackr_window[RXRPC_ACKR_WINDOW_ASZ + 1]; + struct hlist_node hash_node; + unsigned long hash_key; /* Full hash key */ + u8 in_clientflag; /* Copy of conn->in_clientflag for hashing */ + struct rxrpc_local *local; /* Local endpoint. Used for hashing. */ + sa_family_t proto; /* Frame protocol */ /* the following should all be in net order */ __be32 cid; /* connection ID + channel index */ __be32 call_id; /* call ID on connection */ + __be32 epoch; /* epoch of this connection */ + __be16 service_id; /* service ID */ + union { /* Peer IP address for hashing */ + __be32 ipv4_addr; + __u8 ipv6_addr[16]; /* Anticipates eventual IPv6 support */ + } peer_ip; }; /* @@ -453,6 +464,8 @@ extern struct kmem_cache *rxrpc_call_jar; extern struct list_head rxrpc_calls; extern rwlock_t rxrpc_call_lock; +struct rxrpc_call *rxrpc_find_call_hash(u8, __be32, __be32, __be32, + __be16, void *, sa_family_t, const u8 *); struct rxrpc_call *rxrpc_get_client_call(struct rxrpc_sock *, struct rxrpc_transport *, struct rxrpc_conn_bundle *,