remarkable-linux/net/rxrpc/ar-error.c
David Howells 6c9a2d3202 af_rxrpc: Fix UDP MTU calculation from ICMP_FRAG_NEEDED
AF_RXRPC sends UDP packets with the "Don't Fragment" bit set in an attempt to
determine the maximum packet size between the local socket and the peer by
invoking the generation of ICMP_FRAG_NEEDED packets.

Once a packet is sent with the "Don't Fragment" bit set, it is then
inconvenient to break it up as that requires recalculating all the rxrpc serial
and sequence numbers and reencrypting all the fragments, so we switch off the
"Don't Fragment" service temporarily and send the bounced packet again.  Future
packets then use the new MTU.

That's all fine.  The problem lies in rxrpc_UDP_error_report() where the code
that deals with ICMP_FRAG_NEEDED packets lives.  Packets of this type have a
field (ee_info) to indicate the maximum packet size at the reporting node - but
sometimes ee_info isn't filled in and is just left as 0 and the code must allow
for this.

When ee_info is 0, the code should take the MTU size we're currently using and
reduce it for the next packet we want to send.  However, it takes ee_info
(which is known to be 0) and tries to reduce that instead.

This was discovered by Coverity.

Reported-by: Dave Jones <davej@redhat.com>
Signed-off-by: David Howells <dhowells@redhat.com>
2014-02-26 17:25:01 +00:00

249 lines
5.7 KiB
C

/* Error message handling (ICMP)
*
* Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/module.h>
#include <linux/net.h>
#include <linux/skbuff.h>
#include <linux/errqueue.h>
#include <linux/udp.h>
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/icmp.h>
#include <net/sock.h>
#include <net/af_rxrpc.h>
#include <net/ip.h>
#include "ar-internal.h"
/*
* handle an error received on the local endpoint
*/
void rxrpc_UDP_error_report(struct sock *sk)
{
struct sock_exterr_skb *serr;
struct rxrpc_transport *trans;
struct rxrpc_local *local = sk->sk_user_data;
struct rxrpc_peer *peer;
struct sk_buff *skb;
__be32 addr;
__be16 port;
_enter("%p{%d}", sk, local->debug_id);
skb = skb_dequeue(&sk->sk_error_queue);
if (!skb) {
_leave("UDP socket errqueue empty");
return;
}
rxrpc_new_skb(skb);
serr = SKB_EXT_ERR(skb);
addr = *(__be32 *)(skb_network_header(skb) + serr->addr_offset);
port = serr->port;
_net("Rx UDP Error from %pI4:%hu", &addr, ntohs(port));
_debug("Msg l:%d d:%d", skb->len, skb->data_len);
peer = rxrpc_find_peer(local, addr, port);
if (IS_ERR(peer)) {
rxrpc_free_skb(skb);
_leave(" [no peer]");
return;
}
trans = rxrpc_find_transport(local, peer);
if (!trans) {
rxrpc_put_peer(peer);
rxrpc_free_skb(skb);
_leave(" [no trans]");
return;
}
if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP &&
serr->ee.ee_type == ICMP_DEST_UNREACH &&
serr->ee.ee_code == ICMP_FRAG_NEEDED
) {
u32 mtu = serr->ee.ee_info;
_net("Rx Received ICMP Fragmentation Needed (%d)", mtu);
/* wind down the local interface MTU */
if (mtu > 0 && peer->if_mtu == 65535 && mtu < peer->if_mtu) {
peer->if_mtu = mtu;
_net("I/F MTU %u", mtu);
}
if (mtu == 0) {
/* they didn't give us a size, estimate one */
mtu = peer->if_mtu;
if (mtu > 1500) {
mtu >>= 1;
if (mtu < 1500)
mtu = 1500;
} else {
mtu -= 100;
if (mtu < peer->hdrsize)
mtu = peer->hdrsize + 4;
}
}
if (mtu < peer->mtu) {
spin_lock_bh(&peer->lock);
peer->mtu = mtu;
peer->maxdata = peer->mtu - peer->hdrsize;
spin_unlock_bh(&peer->lock);
_net("Net MTU %u (maxdata %u)",
peer->mtu, peer->maxdata);
}
}
rxrpc_put_peer(peer);
/* pass the transport ref to error_handler to release */
skb_queue_tail(&trans->error_queue, skb);
rxrpc_queue_work(&trans->error_handler);
/* reset and regenerate socket error */
spin_lock_bh(&sk->sk_error_queue.lock);
sk->sk_err = 0;
skb = skb_peek(&sk->sk_error_queue);
if (skb) {
sk->sk_err = SKB_EXT_ERR(skb)->ee.ee_errno;
spin_unlock_bh(&sk->sk_error_queue.lock);
sk->sk_error_report(sk);
} else {
spin_unlock_bh(&sk->sk_error_queue.lock);
}
_leave("");
}
/*
* deal with UDP error messages
*/
void rxrpc_UDP_error_handler(struct work_struct *work)
{
struct sock_extended_err *ee;
struct sock_exterr_skb *serr;
struct rxrpc_transport *trans =
container_of(work, struct rxrpc_transport, error_handler);
struct sk_buff *skb;
int err;
_enter("");
skb = skb_dequeue(&trans->error_queue);
if (!skb)
return;
serr = SKB_EXT_ERR(skb);
ee = &serr->ee;
_net("Rx Error o=%d t=%d c=%d e=%d",
ee->ee_origin, ee->ee_type, ee->ee_code, ee->ee_errno);
err = ee->ee_errno;
switch (ee->ee_origin) {
case SO_EE_ORIGIN_ICMP:
switch (ee->ee_type) {
case ICMP_DEST_UNREACH:
switch (ee->ee_code) {
case ICMP_NET_UNREACH:
_net("Rx Received ICMP Network Unreachable");
err = ENETUNREACH;
break;
case ICMP_HOST_UNREACH:
_net("Rx Received ICMP Host Unreachable");
err = EHOSTUNREACH;
break;
case ICMP_PORT_UNREACH:
_net("Rx Received ICMP Port Unreachable");
err = ECONNREFUSED;
break;
case ICMP_FRAG_NEEDED:
_net("Rx Received ICMP Fragmentation Needed (%d)",
ee->ee_info);
err = 0; /* dealt with elsewhere */
break;
case ICMP_NET_UNKNOWN:
_net("Rx Received ICMP Unknown Network");
err = ENETUNREACH;
break;
case ICMP_HOST_UNKNOWN:
_net("Rx Received ICMP Unknown Host");
err = EHOSTUNREACH;
break;
default:
_net("Rx Received ICMP DestUnreach code=%u",
ee->ee_code);
break;
}
break;
case ICMP_TIME_EXCEEDED:
_net("Rx Received ICMP TTL Exceeded");
break;
default:
_proto("Rx Received ICMP error { type=%u code=%u }",
ee->ee_type, ee->ee_code);
break;
}
break;
case SO_EE_ORIGIN_LOCAL:
_proto("Rx Received local error { error=%d }",
ee->ee_errno);
break;
case SO_EE_ORIGIN_NONE:
case SO_EE_ORIGIN_ICMP6:
default:
_proto("Rx Received error report { orig=%u }",
ee->ee_origin);
break;
}
/* terminate all the affected calls if there's an unrecoverable
* error */
if (err) {
struct rxrpc_call *call, *_n;
_debug("ISSUE ERROR %d", err);
spin_lock_bh(&trans->peer->lock);
trans->peer->net_error = err;
list_for_each_entry_safe(call, _n, &trans->peer->error_targets,
error_link) {
write_lock(&call->state_lock);
if (call->state != RXRPC_CALL_COMPLETE &&
call->state < RXRPC_CALL_NETWORK_ERROR) {
call->state = RXRPC_CALL_NETWORK_ERROR;
set_bit(RXRPC_CALL_RCVD_ERROR, &call->events);
rxrpc_queue_call(call);
}
write_unlock(&call->state_lock);
list_del_init(&call->error_link);
}
spin_unlock_bh(&trans->peer->lock);
}
if (!skb_queue_empty(&trans->error_queue))
rxrpc_queue_work(&trans->error_handler);
rxrpc_free_skb(skb);
rxrpc_put_transport(trans);
_leave("");
}