From b2155e7f70b3f058efe94c0c459db023b05057bd Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Thu, 7 Feb 2008 17:54:56 -0800 Subject: [PATCH 01/21] [NETFILTER]: nf_conntrack: TCP conntrack reopening fix TCP connection tracking in netfilter did not handle TCP reopening properly: active close was taken into account for one side only and not for any side, which is fixed now. The patch includes more comments to explain the logic how the different cases are handled. The bug was discovered by Jeff Chua. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_proto_tcp.c | 32 +++++++++++++++++++++----- 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 3e0cccae5636..202d7fa09483 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -125,7 +125,7 @@ enum tcp_bit_set { * CLOSE_WAIT: ACK seen (after FIN) * LAST_ACK: FIN seen (after FIN) * TIME_WAIT: last ACK seen - * CLOSE: closed connection + * CLOSE: closed connection (RST) * * LISTEN state is not used. * @@ -824,7 +824,21 @@ static int tcp_packet(struct nf_conn *ct, case TCP_CONNTRACK_SYN_SENT: if (old_state < TCP_CONNTRACK_TIME_WAIT) break; - if ((ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_CLOSE_INIT) + /* RFC 1122: "When a connection is closed actively, + * it MUST linger in TIME-WAIT state for a time 2xMSL + * (Maximum Segment Lifetime). However, it MAY accept + * a new SYN from the remote TCP to reopen the connection + * directly from TIME-WAIT state, if..." + * We ignore the conditions because we are in the + * TIME-WAIT state anyway. + * + * Handle aborted connections: we and the server + * think there is an existing connection but the client + * aborts it and starts a new one. + */ + if (((ct->proto.tcp.seen[dir].flags + | ct->proto.tcp.seen[!dir].flags) + & IP_CT_TCP_FLAG_CLOSE_INIT) || (ct->proto.tcp.last_dir == dir && ct->proto.tcp.last_index == TCP_RST_SET)) { /* Attempt to reopen a closed/aborted connection. @@ -837,16 +851,23 @@ static int tcp_packet(struct nf_conn *ct, /* Fall through */ case TCP_CONNTRACK_IGNORE: /* Ignored packets: + * + * Our connection entry may be out of sync, so ignore + * packets which may signal the real connection between + * the client and the server. * * a) SYN in ORIGINAL * b) SYN/ACK in REPLY * c) ACK in reply direction after initial SYN in original. + * + * If the ignored packet is invalid, the receiver will send + * a RST we'll catch below. */ if (index == TCP_SYNACK_SET && ct->proto.tcp.last_index == TCP_SYN_SET && ct->proto.tcp.last_dir != dir && ntohl(th->ack_seq) == ct->proto.tcp.last_end) { - /* This SYN/ACK acknowledges a SYN that we earlier + /* b) This SYN/ACK acknowledges a SYN that we earlier * ignored as invalid. This means that the client and * the server are both in sync, while the firewall is * not. We kill this session and block the SYN/ACK so @@ -870,7 +891,7 @@ static int tcp_packet(struct nf_conn *ct, write_unlock_bh(&tcp_lock); if (LOG_INVALID(IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, - "nf_ct_tcp: invalid packed ignored "); + "nf_ct_tcp: invalid packet ignored "); return NF_ACCEPT; case TCP_CONNTRACK_MAX: /* Invalid packet */ @@ -924,8 +945,7 @@ static int tcp_packet(struct nf_conn *ct, ct->proto.tcp.state = new_state; if (old_state != new_state - && (new_state == TCP_CONNTRACK_FIN_WAIT - || new_state == TCP_CONNTRACK_CLOSE)) + && new_state == TCP_CONNTRACK_CLOSE) ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT; timeout = ct->proto.tcp.retrans >= nf_ct_tcp_max_retrans && tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans From 86577c661bc01d5c4e477d74567df4470d6c5138 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 7 Feb 2008 17:56:34 -0800 Subject: [PATCH 02/21] [NETFILTER]: nf_conntrack: fix ct_extend ->move operation The ->move operation has two bugs: - It is called with the same extension as source and destination, so it doesn't update the new extension. - The address of the old extension is calculated incorrectly, instead of (void *)ct->ext + ct->ext->offset[i] it uses ct->ext + ct->ext->offset[i]. Fixes a crash on x86_64 reported by Chuck Ebbert and Thomas Woerner . Tested-by: Thomas Woerner Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack_extend.h | 2 +- net/ipv4/netfilter/nf_nat_core.c | 6 +++--- net/netfilter/nf_conntrack_extend.c | 3 ++- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index 73b5711faf32..49aac6323fbe 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -67,7 +67,7 @@ struct nf_ct_ext_type void (*destroy)(struct nf_conn *ct); /* Called when realloacted (can be NULL). Contents has already been moved. */ - void (*move)(struct nf_conn *ct, void *old); + void (*move)(void *new, void *old); enum nf_ct_ext_id id; diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index dd07362d2b8f..0d5fa3a54d04 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -600,10 +600,10 @@ static void nf_nat_cleanup_conntrack(struct nf_conn *ct) spin_unlock_bh(&nf_nat_lock); } -static void nf_nat_move_storage(struct nf_conn *conntrack, void *old) +static void nf_nat_move_storage(void *new, void *old) { - struct nf_conn_nat *new_nat = nf_ct_ext_find(conntrack, NF_CT_EXT_NAT); - struct nf_conn_nat *old_nat = (struct nf_conn_nat *)old; + struct nf_conn_nat *new_nat = new; + struct nf_conn_nat *old_nat = old; struct nf_conn *ct = old_nat->ct; if (!ct || !(ct->status & IPS_NAT_DONE_MASK)) diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index cf6ba6659a80..8b9be1e978cd 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -109,7 +109,8 @@ void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) rcu_read_lock(); t = rcu_dereference(nf_ct_ext_types[i]); if (t && t->move) - t->move(ct, ct->ext + ct->ext->offset[i]); + t->move((void *)new + new->offset[i], + (void *)ct->ext + ct->ext->offset[i]); rcu_read_unlock(); } kfree(ct->ext); From d9d17578d9f11cdbe41e4559e8f264ec757ddce8 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 7 Feb 2008 17:56:49 -0800 Subject: [PATCH 03/21] [NETFILTER]: xt_iprange: fix typo in address family The family for iprange_mt4 should be AF_INET, not AF_INET6. Noticed by Jiri Moravec . Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/xt_iprange.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c index 01035fc0e140..97715e31c016 100644 --- a/net/netfilter/xt_iprange.c +++ b/net/netfilter/xt_iprange.c @@ -148,7 +148,7 @@ static struct xt_match iprange_mt_reg[] __read_mostly = { { .name = "iprange", .revision = 1, - .family = AF_INET6, + .family = AF_INET, .match = iprange_mt4, .matchsize = sizeof(struct xt_iprange_mtinfo), .me = THIS_MODULE, From 5da621f1c514b8a39c6f7112becb97262ae76900 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Thu, 7 Feb 2008 17:57:11 -0800 Subject: [PATCH 04/21] [NETFILTER]: xt_iprange: add missing #include Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/xt_iprange.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c index 97715e31c016..4f984dc60319 100644 --- a/net/netfilter/xt_iprange.c +++ b/net/netfilter/xt_iprange.c @@ -13,6 +13,7 @@ #include #include #include +#include #include static bool From 4136cd523eb0c0bd53173e16fd7406d31d05824f Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 7 Feb 2008 17:58:20 -0800 Subject: [PATCH 05/21] [IPV4]: route: fix crash ip_route_input ip_route_me_harder() may call ip_route_input() with skbs that don't have skb->dev set for skbs rerouted in LOCAL_OUT and TCP resets generated by the REJECT target, resulting in a crash when dereferencing skb->dev->nd_net. Since ip_route_input() has an input device argument, it seems correct to use that one anyway. Bug introduced in b5921910a1 (Routing cache virtualization). Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 8842ecb9be48..525787b52b72 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2041,7 +2041,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, int iif = dev->ifindex; struct net *net; - net = skb->dev->nd_net; + net = dev->nd_net; tos &= IPTOS_RT_MASK; hash = rt_hash(daddr, saddr, iif); From 5f58a5c8725b48f3e32851f9748527c8d1ff71b2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Feb 2008 18:03:18 -0800 Subject: [PATCH 06/21] [IPSEC] flow: Remove an unnecessary ____cacheline_aligned We use a percpu variable named flow_hash_info, which holds 12 bytes. It is currently marked as ____cacheline_aligned, which makes linker skip space to properly align this variable. Before : c065cc90 D per_cpu__softnet_data c065cd00 d per_cpu__flow_tables c065cd80 d per_cpu__flow_hash_info c065ce00 d per_cpu__flow_flush_tasklets c065ce14 d per_cpu__rt_cache_stat This alignement is quite unproductive, and removing it reduces the size of percpu data (by 240 bytes on my x86 machine), and improves performance (flow_tables & flow_hash_info can share a single cache line) After patch : c065cc04 D per_cpu__softnet_data c065cc4c d per_cpu__flow_tables c065cc50 d per_cpu__flow_hash_info c065cc5c d per_cpu__flow_flush_tasklets c065cc70 d per_cpu__rt_cache_stat Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/flow.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/flow.c b/net/core/flow.c index 46b38e06e0d7..9cfe84571ca5 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -52,7 +52,7 @@ struct flow_percpu_info { int hash_rnd_recalc; u32 hash_rnd; int count; -} ____cacheline_aligned; +}; static DEFINE_PER_CPU(struct flow_percpu_info, flow_hash_info) = { 0 }; #define flow_hash_rnd_recalc(cpu) \ From 5423dd67bd0108a180784c6f307646622e804c9b Mon Sep 17 00:00:00 2001 From: Urs Thuermann Date: Thu, 7 Feb 2008 18:04:21 -0800 Subject: [PATCH 07/21] [CAN]: Clean up module auto loading Remove local char array to construct module name. Don't call request_module() when CONFIG_KMOD is not set. Signed-off-by: Urs Thuermann Signed-off-by: Oliver Hartkopp Signed-off-by: David S. Miller --- net/can/af_can.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/net/can/af_can.c b/net/can/af_can.c index 5158e886630f..1f51b8a18242 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -118,7 +118,6 @@ static int can_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; struct can_proto *cp; - char module_name[sizeof("can-proto-000")]; int err = 0; sock->state = SS_UNCONNECTED; @@ -129,26 +128,21 @@ static int can_create(struct net *net, struct socket *sock, int protocol) if (net != &init_net) return -EAFNOSUPPORT; +#ifdef CONFIG_KMOD /* try to load protocol module, when CONFIG_KMOD is defined */ if (!proto_tab[protocol]) { - sprintf(module_name, "can-proto-%d", protocol); - err = request_module(module_name); + err = request_module("can-proto-%d", protocol); /* * In case of error we only print a message but don't * return the error code immediately. Below we will * return -EPROTONOSUPPORT */ - if (err == -ENOSYS) { - if (printk_ratelimit()) - printk(KERN_INFO "can: request_module(%s)" - " not implemented.\n", module_name); - } else if (err) { - if (printk_ratelimit()) - printk(KERN_ERR "can: request_module(%s)" - " failed.\n", module_name); - } + if (err && printk_ratelimit()) + printk(KERN_ERR "can: request_module " + "(can-proto-%d) failed.\n", protocol); } +#endif spin_lock(&proto_tab_lock); cp = proto_tab[protocol]; From a2fea5f19f970b45e854c22cab25250a79613643 Mon Sep 17 00:00:00 2001 From: Urs Thuermann Date: Thu, 7 Feb 2008 18:04:45 -0800 Subject: [PATCH 08/21] [CAN]: Move proto_{,un}register() out of spin-locked region The implementation of proto_register() has changed so that it can now sleep. The call to proto_register() must be moved out of the spin-locked region. Signed-off-by: Urs Thuermann Signed-off-by: Oliver Hartkopp Signed-off-by: David S. Miller --- net/can/af_can.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/net/can/af_can.c b/net/can/af_can.c index 1f51b8a18242..36b9f22ed83a 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -656,27 +656,27 @@ int can_proto_register(struct can_proto *cp) return -EINVAL; } + err = proto_register(cp->prot, 0); + if (err < 0) + return err; + spin_lock(&proto_tab_lock); if (proto_tab[proto]) { printk(KERN_ERR "can: protocol %d already registered\n", proto); err = -EBUSY; - goto errout; + } else { + proto_tab[proto] = cp; + + /* use generic ioctl function if not defined by module */ + if (!cp->ops->ioctl) + cp->ops->ioctl = can_ioctl; } - - err = proto_register(cp->prot, 0); - if (err < 0) - goto errout; - - proto_tab[proto] = cp; - - /* use generic ioctl function if the module doesn't bring its own */ - if (!cp->ops->ioctl) - cp->ops->ioctl = can_ioctl; - - errout: spin_unlock(&proto_tab_lock); + if (err < 0) + proto_unregister(cp->prot); + return err; } EXPORT_SYMBOL(can_proto_register); @@ -694,9 +694,10 @@ void can_proto_unregister(struct can_proto *cp) printk(KERN_ERR "BUG: can: protocol %d is not registered\n", proto); } - proto_unregister(cp->prot); proto_tab[proto] = NULL; spin_unlock(&proto_tab_lock); + + proto_unregister(cp->prot); } EXPORT_SYMBOL(can_proto_unregister); From a219994bf5cca1208fb741b20ea9eb78e1711f81 Mon Sep 17 00:00:00 2001 From: Urs Thuermann Date: Thu, 7 Feb 2008 18:05:04 -0800 Subject: [PATCH 09/21] [CAN]: Minor clean-ups Remove unneeded variable. Rename local variable error to err like in all other places. Some white-space changes. Signed-off-by: Urs Thuermann Signed-off-by: Oliver Hartkopp Signed-off-by: David S. Miller --- net/can/raw.c | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/net/can/raw.c b/net/can/raw.c index aeefd1419d00..94cd7f27c444 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -98,7 +98,6 @@ static void raw_rcv(struct sk_buff *skb, void *data) struct sock *sk = (struct sock *)data; struct raw_sock *ro = raw_sk(sk); struct sockaddr_can *addr; - int error; if (!ro->recv_own_msgs) { /* check the received tx sock reference */ @@ -121,14 +120,12 @@ static void raw_rcv(struct sk_buff *skb, void *data) addr->can_family = AF_CAN; addr->can_ifindex = skb->dev->ifindex; - error = sock_queue_rcv_skb(sk, skb); - if (error < 0) + if (sock_queue_rcv_skb(sk, skb) < 0) kfree_skb(skb); } static int raw_enable_filters(struct net_device *dev, struct sock *sk, - struct can_filter *filter, - int count) + struct can_filter *filter, int count) { int err = 0; int i; @@ -163,8 +160,7 @@ static int raw_enable_errfilter(struct net_device *dev, struct sock *sk, } static void raw_disable_filters(struct net_device *dev, struct sock *sk, - struct can_filter *filter, - int count) + struct can_filter *filter, int count) { int i; @@ -353,7 +349,6 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len) /* filters set by default/setsockopt */ err = raw_enable_allfilters(dev, sk); dev_put(dev); - } else { ifindex = 0; @@ -466,7 +461,6 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, if (err) { if (count > 1) kfree(filter); - goto out_fil; } @@ -673,25 +667,25 @@ static int raw_recvmsg(struct kiocb *iocb, struct socket *sock, { struct sock *sk = sock->sk; struct sk_buff *skb; - int error = 0; + int err = 0; int noblock; noblock = flags & MSG_DONTWAIT; flags &= ~MSG_DONTWAIT; - skb = skb_recv_datagram(sk, flags, noblock, &error); + skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) - return error; + return err; if (size < skb->len) msg->msg_flags |= MSG_TRUNC; else size = skb->len; - error = memcpy_toiovec(msg->msg_iov, skb->data, size); - if (error < 0) { + err = memcpy_toiovec(msg->msg_iov, skb->data, size); + if (err < 0) { skb_free_datagram(sk, skb); - return error; + return err; } sock_recv_timestamp(msg, sk, skb); From 435bc9dfc6927eed9465e297d7aca1217aa61956 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 7 Feb 2008 18:06:52 -0800 Subject: [PATCH 10/21] [IUCV]: wrong irq-disabling locking at module load time Linux may hang when running af_iucv socket programs concurrently with a load of module netiucv. iucv_register() tries to take the iucv_table_lock with spin_lock_irq. This conflicts with iucv_connect() which has a need for an smp_call_function while holding the iucv_table_lock. Solution: use bh-disabling locking in iucv_register() Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/iucv/iucv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index f13fe8821cbd..2753b0c448f3 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -693,9 +693,9 @@ int iucv_register(struct iucv_handler *handler, int smp) iucv_setmask_up(); INIT_LIST_HEAD(&handler->paths); - spin_lock_irq(&iucv_table_lock); + spin_lock_bh(&iucv_table_lock); list_add_tail(&handler->list, &iucv_handler_list); - spin_unlock_irq(&iucv_table_lock); + spin_unlock_bh(&iucv_table_lock); rc = 0; out_mutex: mutex_unlock(&iucv_register_mutex); From d44447229e35115675d166b51a52e512c281475c Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 7 Feb 2008 18:07:19 -0800 Subject: [PATCH 11/21] [AF_IUCV]: broken send_skb_q results in endless loop A race has been detected in iucv_callback_txdone(). skb_unlink has to be done inside the locked area. In addition checkings for successful allocations are inserted. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/iucv/af_iucv.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 2255e3c082ed..b3f5f840d067 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -482,6 +482,10 @@ static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr, /* Create path. */ iucv->path = iucv_path_alloc(IUCV_QUEUELEN_DEFAULT, IPRMDATA, GFP_KERNEL); + if (!iucv->path) { + err = -ENOMEM; + goto done; + } err = iucv_path_connect(iucv->path, &af_iucv_handler, sa->siucv_user_id, NULL, user_data, sk); if (err) { @@ -1094,6 +1098,8 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg) save_message: save_msg = kzalloc(sizeof(struct sock_msg_q), GFP_ATOMIC | GFP_DMA); + if (!save_msg) + return; save_msg->path = path; save_msg->msg = *msg; @@ -1118,10 +1124,10 @@ static void iucv_callback_txdone(struct iucv_path *path, this = list_skb; list_skb = list_skb->next; } while (memcmp(&msg->tag, this->cb, 4) && list_skb); + __skb_unlink(this, list); spin_unlock_irqrestore(&list->lock, flags); - skb_unlink(this, &iucv_sk(sk)->send_skb_q); kfree_skb(this); } From f2a77991a918218be4a3ac78250e7eba2282be59 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 7 Feb 2008 18:07:44 -0800 Subject: [PATCH 12/21] [AF_IUCV]: defensive programming of iucv_callback_txdone The loop in iucv_callback_txdone presumes existence of an entry with msg->tag in the send_skb_q list. In error cases this assumption might be wrong and might cause an endless loop. Loop is rewritten to guarantee loop end in case of missing msg->tag entry in send_skb_q. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/iucv/af_iucv.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index b3f5f840d067..fee22caf1bad 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1112,24 +1112,31 @@ static void iucv_callback_txdone(struct iucv_path *path, struct iucv_message *msg) { struct sock *sk = path->private; - struct sk_buff *this; + struct sk_buff *this = NULL; struct sk_buff_head *list = &iucv_sk(sk)->send_skb_q; struct sk_buff *list_skb = list->next; unsigned long flags; - if (list_skb) { + if (!skb_queue_empty(list)) { spin_lock_irqsave(&list->lock, flags); - do { - this = list_skb; + while (list_skb != (struct sk_buff *)list) { + if (!memcmp(&msg->tag, list_skb->cb, 4)) { + this = list_skb; + break; + } list_skb = list_skb->next; - } while (memcmp(&msg->tag, this->cb, 4) && list_skb); - __skb_unlink(this, list); + } + if (this) + __skb_unlink(this, list); spin_unlock_irqrestore(&list->lock, flags); - kfree_skb(this); + if (this) + kfree_skb(this); } + if (!this) + printk(KERN_ERR "AF_IUCV msg tag %u not found\n", msg->tag); if (sk->sk_state == IUCV_CLOSING) { if (skb_queue_empty(&iucv_sk(sk)->send_skb_q)) { From bba536a3d5809c88313849fb49d24d9e0f57e0bf Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Thu, 7 Feb 2008 18:10:19 -0800 Subject: [PATCH 13/21] [IPV6] Minor clenup: remove two unused definitions in net/ip6_route.h Remove IP6_RT_PRIO_FW and IP6_RT_FLOW_MASK definitions in include/net/ip6_route.h, as they are not used in the kernel. Signed-off-by: Rami Rosen Signed-off-by: David S. Miller --- include/net/ip6_route.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index faac0eee1ef3..f99e4f0f568f 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -1,11 +1,9 @@ #ifndef _NET_IP6_ROUTE_H #define _NET_IP6_ROUTE_H -#define IP6_RT_PRIO_FW 16 #define IP6_RT_PRIO_USER 1024 #define IP6_RT_PRIO_ADDRCONF 256 #define IP6_RT_PRIO_KERN 512 -#define IP6_RT_FLOW_MASK 0x00ff struct route_info { __u8 type; From 4e881a217bd1403960eb8f32297ba9d226c6d5ae Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Thu, 7 Feb 2008 18:11:49 -0800 Subject: [PATCH 14/21] [IPV6] Minor cleanup: remove unused definitions in net/ip6_fib.h This patch removes some unused definitions and one method typedef declaration (f_pnode) in include/net/ip6_fib.h, as they are not used in the kernel. Signed-off-by: Rami Rosen Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index d8d85b13364d..953d6040ff50 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -150,19 +150,6 @@ struct rt6_statistics { * */ -#define RTPRI_FIREWALL 8 /* Firewall control information */ -#define RTPRI_FLOW 16 /* Flow based forwarding rules */ -#define RTPRI_KERN_CTL 32 /* Kernel control routes */ - -#define RTPRI_USER_MIN 256 /* Mimimum user priority */ -#define RTPRI_USER_MAX 1024 /* Maximum user priority */ - -#define RTPRI_KERN_DFLT 4096 /* Kernel default routes */ - -#define MAX_FLOW_BACKTRACE 32 - - -typedef void (*f_pnode)(struct fib6_node *fn, void *); struct fib6_table { struct hlist_node tb6_hlist; From 04f217aca4d803fe72c2c54fe460d68f5233ce52 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 7 Feb 2008 18:13:00 -0800 Subject: [PATCH 15/21] [TC]: oops in em_meta If userspace passes a unknown match index into em_meta, then em_meta_change will return an error and the data for the match will not be set. This then causes an null pointer dereference when the cleanup is done in the error path via tcf_em_tree_destroy. Since the tree structure comes kzalloc, it is initialized to NULL. Discovered when testing a new version of tc command against an accidental older kernel. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/sched/em_meta.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 2a7e648fbcf4..d417ec8e3ca3 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -735,11 +735,13 @@ static int em_meta_match(struct sk_buff *skb, struct tcf_ematch *m, static inline void meta_delete(struct meta_match *meta) { - struct meta_type_ops *ops = meta_type_ops(&meta->lvalue); + if (meta) { + struct meta_type_ops *ops = meta_type_ops(&meta->lvalue); - if (ops && ops->destroy) { - ops->destroy(&meta->lvalue); - ops->destroy(&meta->rvalue); + if (ops && ops->destroy) { + ops->destroy(&meta->lvalue); + ops->destroy(&meta->rvalue); + } } kfree(meta); From 86121fe5b4f170829429433cd99ec7f884c8ae75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Thu, 7 Feb 2008 18:17:13 -0800 Subject: [PATCH 16/21] [TIPC]: Kill unused static inline (x5) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All these static inlines are unused: in_own_zone 1 (net/tipc/addr.h) msg_dataoctet 1 (net/tipc/msg.h) msg_direct 1 (include/net/tipc/tipc_msg.h) msg_options 1 (include/net/tipc/tipc_msg.h) tipc_nmap_get 1 (net/tipc/bcast.h) Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/net/tipc/tipc_msg.h | 16 ---------------- net/tipc/addr.h | 5 ----- net/tipc/bcast.h | 13 ------------- net/tipc/msg.h | 5 ----- 4 files changed, 39 deletions(-) diff --git a/include/net/tipc/tipc_msg.h b/include/net/tipc/tipc_msg.h index fb42eb7a86a5..2e159a812f83 100644 --- a/include/net/tipc/tipc_msg.h +++ b/include/net/tipc/tipc_msg.h @@ -130,11 +130,6 @@ static inline u32 msg_type(struct tipc_msg *m) return msg_bits(m, 1, 29, 0x7); } -static inline u32 msg_direct(struct tipc_msg *m) -{ - return (msg_type(m) == TIPC_DIRECT_MSG); -} - static inline u32 msg_named(struct tipc_msg *m) { return (msg_type(m) == TIPC_NAMED_MSG); @@ -207,17 +202,6 @@ static inline u32 msg_nameupper(struct tipc_msg *m) return msg_word(m, 10); } -static inline char *msg_options(struct tipc_msg *m, u32 *len) -{ - u32 pos = msg_bits(m, 1, 16, 0x7); - - if (!pos) - return 0; - pos = (pos * 4) + 28; - *len = msg_hdr_sz(m) - pos; - return (char *)&m->hdr[pos/4]; -} - #endif #endif diff --git a/net/tipc/addr.h b/net/tipc/addr.h index e4bd5335e48d..3ba67e6ce03e 100644 --- a/net/tipc/addr.h +++ b/net/tipc/addr.h @@ -57,11 +57,6 @@ static inline int in_own_cluster(u32 addr) return !((addr ^ tipc_own_addr) >> 12); } -static inline int in_own_zone(u32 addr) -{ - return !((addr ^ tipc_own_addr) >> 24); -} - static inline int is_slave(u32 addr) { return addr & 0x800; diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index f910ed29d055..a2416fa6b906 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -73,19 +73,6 @@ struct node; extern char tipc_bclink_name[]; -/** - * nmap_get - determine if node exists in a node map - */ - -static inline int tipc_nmap_get(struct node_map *nm_ptr, u32 node) -{ - int n = tipc_node(node); - int w = n / WSIZE; - int b = n % WSIZE; - - return nm_ptr->map[w] & (1 << b); -} - /** * nmap_add - add a node to a node map */ diff --git a/net/tipc/msg.h b/net/tipc/msg.h index ce2659836374..e9ef6df26562 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -663,11 +663,6 @@ static inline void msg_set_remote_node(struct tipc_msg *m, u32 a) msg_set_word(m, msg_hdr_sz(m)/4, a); } -static inline int msg_dataoctet(struct tipc_msg *m, u32 pos) -{ - return(msg_data(m)[pos + 4] != 0); -} - static inline void msg_set_dataoctet(struct tipc_msg *m, u32 pos) { msg_data(m)[pos + 4] = 1; From bca65eae394e95c125837b6eb3a8246c40777608 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 7 Feb 2008 18:18:01 -0800 Subject: [PATCH 17/21] [TIPC]: declare proto_ops structures as 'const'. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/tipc/socket.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 24ddfd2ca38b..22909036b9bc 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -71,9 +71,9 @@ struct tipc_sock { static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf); static void wakeupdispatch(struct tipc_port *tport); -static struct proto_ops packet_ops; -static struct proto_ops stream_ops; -static struct proto_ops msg_ops; +static const struct proto_ops packet_ops; +static const struct proto_ops stream_ops; +static const struct proto_ops msg_ops; static struct proto tipc_proto; @@ -1615,7 +1615,7 @@ static int getsockopt(struct socket *sock, * Protocol switches for the various types of TIPC sockets */ -static struct proto_ops msg_ops = { +static const struct proto_ops msg_ops = { .owner = THIS_MODULE, .family = AF_TIPC, .release = release, @@ -1636,7 +1636,7 @@ static struct proto_ops msg_ops = { .sendpage = sock_no_sendpage }; -static struct proto_ops packet_ops = { +static const struct proto_ops packet_ops = { .owner = THIS_MODULE, .family = AF_TIPC, .release = release, @@ -1657,7 +1657,7 @@ static struct proto_ops packet_ops = { .sendpage = sock_no_sendpage }; -static struct proto_ops stream_ops = { +static const struct proto_ops stream_ops = { .owner = THIS_MODULE, .family = AF_TIPC, .release = release, @@ -1678,7 +1678,7 @@ static struct proto_ops stream_ops = { .sendpage = sock_no_sendpage }; -static struct net_proto_family tipc_family_ops = { +static const struct net_proto_family tipc_family_ops = { .owner = THIS_MODULE, .family = AF_TIPC, .create = tipc_create From 054b0e2b2d5ed460784e8dfbf30ff4768dbf4376 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Thu, 7 Feb 2008 18:20:29 -0800 Subject: [PATCH 18/21] [ISDN]: fix section mismatch warning in enpci_card_msg Fix following warnings: WARNING: drivers/isdn/hisax/built-in.o(.text+0x3cf50): Section mismatch in reference from the function enpci_card_msg() to the function .devinit.text:Amd7930_init() WARNING: drivers/isdn/hisax/built-in.o(.text+0x3cf85): Section mismatch in reference from the function enpci_card_msg() to the function .devinit.text:Amd7930_init() enpci_card_msg() can be called outside __devinit context referenced function should not be annotated __devinit. Remove annotation of Amd7930_init to fix this. Signed-off-by: Sam Ravnborg Signed-off-by: David S. Miller --- drivers/isdn/hisax/amd7930_fn.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/isdn/hisax/amd7930_fn.c b/drivers/isdn/hisax/amd7930_fn.c index c0d7036404a5..341faf58a65c 100644 --- a/drivers/isdn/hisax/amd7930_fn.c +++ b/drivers/isdn/hisax/amd7930_fn.c @@ -744,8 +744,7 @@ dbusy_timer_handler(struct IsdnCardState *cs) -void __devinit -Amd7930_init(struct IsdnCardState *cs) +void Amd7930_init(struct IsdnCardState *cs) { WORD *ptr; BYTE cmd, cnt; From 405137d16fbe4c80b9e06e61af05856027745d23 Mon Sep 17 00:00:00 2001 From: Joy Latten Date: Thu, 7 Feb 2008 23:11:56 -0800 Subject: [PATCH 19/21] [IPSEC]: Add support for aes-ctr. The below patch allows IPsec to use CTR mode with AES encryption algorithm. Tested this using setkey in ipsec-tools. Signed-off-by: Joy Latten Acked-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/pfkeyv2.h | 1 + net/xfrm/xfrm_algo.c | 17 +++++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/include/linux/pfkeyv2.h b/include/linux/pfkeyv2.h index 6db69ff5d83e..700725ddcaae 100644 --- a/include/linux/pfkeyv2.h +++ b/include/linux/pfkeyv2.h @@ -298,6 +298,7 @@ struct sadb_x_sec_ctx { #define SADB_X_EALG_BLOWFISHCBC 7 #define SADB_EALG_NULL 11 #define SADB_X_EALG_AESCBC 12 +#define SADB_X_EALG_AESCTR 13 #define SADB_X_EALG_AES_CCM_ICV8 14 #define SADB_X_EALG_AES_CCM_ICV12 15 #define SADB_X_EALG_AES_CCM_ICV16 16 diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 6cc15250de69..8aa6440d689f 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -399,6 +399,23 @@ static struct xfrm_algo_desc ealg_list[] = { .sadb_alg_maxbits = 256 } }, +{ + .name = "rfc3686(ctr(aes))", + + .uinfo = { + .encr = { + .blockbits = 128, + .defkeybits = 160, /* 128-bit key + 32-bit nonce */ + } + }, + + .desc = { + .sadb_alg_id = SADB_X_EALG_AESCTR, + .sadb_alg_ivlen = 8, + .sadb_alg_minbits = 128, + .sadb_alg_maxbits = 256 + } +}, }; static struct xfrm_algo_desc calg_list[] = { From fca09fb732b2cc310110b2fcbf3449df043a96d0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Feb 2008 23:29:57 -0800 Subject: [PATCH 20/21] [DECNET] ROUTE: remove unecessary alignment Same alignment requirement was removed on IP route cache in the past. This alignment actually has bad effect on 32 bit arches, uniprocessor, since sizeof(dn_rt_hash_bucket) is forced to 8 bytes instead of 4. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/decnet/dn_route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 31be29b8b5a3..9dc0abb50eaf 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -94,7 +94,7 @@ struct dn_rt_hash_bucket { struct dn_route *chain; spinlock_t lock; -} __attribute__((__aligned__(8))); +}; extern struct neigh_table dn_neigh_table; From dd5a1843d566911dbb077c4022c4936697495af6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Feb 2008 23:30:42 -0800 Subject: [PATCH 21/21] [IPSEC] flow: reorder "struct flow_cache_entry" and remove SLAB_HWCACHE_ALIGN 1) We can shrink sizeof(struct flow_cache_entry) by 8 bytes on 64bit arches. 2) No need to align these structures to hardware cache lines, this only waste ram for very litle gain. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/flow.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/flow.c b/net/core/flow.c index 9cfe84571ca5..a77531c139b7 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -30,8 +30,8 @@ struct flow_cache_entry { struct flow_cache_entry *next; u16 family; u8 dir; - struct flowi key; u32 genid; + struct flowi key; void *object; atomic_t *object_ref; }; @@ -346,7 +346,7 @@ static int __init flow_cache_init(void) flow_cachep = kmem_cache_create("flow_cache", sizeof(struct flow_cache_entry), - 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, + 0, SLAB_PANIC, NULL); flow_hash_shift = 10; flow_lwm = 2 * flow_hash_size;