remarkable-linux/net/netfilter/nf_conntrack_extend.c
Liping Zhang 9c3f379492 netfilter: nf_ct_ext: fix possible panic after nf_ct_extend_unregister
If one cpu is doing nf_ct_extend_unregister while another cpu is doing
__nf_ct_ext_add_length, then we may hit BUG_ON(t == NULL). Moreover,
there's no synchronize_rcu invocation after set nf_ct_ext_types[id] to
NULL, so it's possible that we may access invalid pointer.

But actually, most of the ct extends are built-in, so the problem listed
above will not happen. However, there are two exceptions: NF_CT_EXT_NAT
and NF_CT_EXT_SYNPROXY.

For _EXT_NAT, the panic will not happen, since adding the nat extend and
unregistering the nat extend are located in the same file(nf_nat_core.c),
this means that after the nat module is removed, we cannot add the nat
extend too.

For _EXT_SYNPROXY, synproxy extend may be added by init_conntrack, while
synproxy extend unregister will be done by synproxy_core_exit. So after
nf_synproxy_core.ko is removed, we may still try to add the synproxy
extend, then kernel panic may happen.

I know it's very hard to reproduce this issue, but I can play a tricky
game to make it happen very easily :)

Step 1. Enable SYNPROXY for tcp dport 1234 at FORWARD hook:
  # iptables -I FORWARD -p tcp --dport 1234 -j SYNPROXY
Step 2. Queue the syn packet to the userspace at raw table OUTPUT hook.
        Also note, in the userspace we only add a 20s' delay, then
        reinject the syn packet to the kernel:
  # iptables -t raw -I OUTPUT -p tcp --syn -j NFQUEUE --queue-num 1
Step 3. Using "nc 2.2.2.2 1234" to connect the server.
Step 4. Now remove the nf_synproxy_core.ko quickly:
  # iptables -F FORWARD
  # rmmod ipt_SYNPROXY
  # rmmod nf_synproxy_core
Step 5. After 20s' delay, the syn packet is reinjected to the kernel.

Now you will see the panic like this:
  kernel BUG at net/netfilter/nf_conntrack_extend.c:91!
  Call Trace:
   ? __nf_ct_ext_add_length+0x53/0x3c0 [nf_conntrack]
   init_conntrack+0x12b/0x600 [nf_conntrack]
   nf_conntrack_in+0x4cc/0x580 [nf_conntrack]
   ipv4_conntrack_local+0x48/0x50 [nf_conntrack_ipv4]
   nf_reinject+0x104/0x270
   nfqnl_recv_verdict+0x3e1/0x5f9 [nfnetlink_queue]
   ? nfqnl_recv_verdict+0x5/0x5f9 [nfnetlink_queue]
   ? nla_parse+0xa0/0x100
   nfnetlink_rcv_msg+0x175/0x6a9 [nfnetlink]
   [...]

One possible solution is to make NF_CT_EXT_SYNPROXY extend built-in, i.e.
introduce nf_conntrack_synproxy.c and only do ct extend register and
unregister in it, similar to nf_conntrack_timeout.c.

But having such a obscure restriction of nf_ct_extend_unregister is not a
good idea, so we should invoke synchronize_rcu after set nf_ct_ext_types
to NULL, and check the NULL pointer when do __nf_ct_ext_add_length. Then
it will be easier if we add new ct extend in the future.

Last, we use kfree_rcu to free nf_ct_ext, so rcu_barrier() is unnecessary
anymore, remove it too.

Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2017-03-27 13:47:29 +02:00

188 lines
4.7 KiB
C

/* Structure dynamic extension infrastructure
* Copyright (C) 2004 Rusty Russell IBM Corporation
* Copyright (C) 2007 Netfilter Core Team <coreteam@netfilter.org>
* Copyright (C) 2007 USAGI/WIDE Project <http://www.linux-ipv6.org>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/rcupdate.h>
#include <linux/slab.h>
#include <linux/skbuff.h>
#include <net/netfilter/nf_conntrack_extend.h>
static struct nf_ct_ext_type __rcu *nf_ct_ext_types[NF_CT_EXT_NUM];
static DEFINE_MUTEX(nf_ct_ext_type_mutex);
void __nf_ct_ext_destroy(struct nf_conn *ct)
{
unsigned int i;
struct nf_ct_ext_type *t;
struct nf_ct_ext *ext = ct->ext;
for (i = 0; i < NF_CT_EXT_NUM; i++) {
if (!__nf_ct_ext_exist(ext, i))
continue;
rcu_read_lock();
t = rcu_dereference(nf_ct_ext_types[i]);
/* Here the nf_ct_ext_type might have been unregisterd.
* I.e., it has responsible to cleanup private
* area in all conntracks when it is unregisterd.
*/
if (t && t->destroy)
t->destroy(ct);
rcu_read_unlock();
}
}
EXPORT_SYMBOL(__nf_ct_ext_destroy);
static void *
nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id,
size_t var_alloc_len, gfp_t gfp)
{
unsigned int off, len;
struct nf_ct_ext_type *t;
size_t alloc_size;
rcu_read_lock();
t = rcu_dereference(nf_ct_ext_types[id]);
if (!t) {
rcu_read_unlock();
return NULL;
}
off = ALIGN(sizeof(struct nf_ct_ext), t->align);
len = off + t->len + var_alloc_len;
alloc_size = t->alloc_size + var_alloc_len;
rcu_read_unlock();
*ext = kzalloc(alloc_size, gfp);
if (!*ext)
return NULL;
(*ext)->offset[id] = off;
(*ext)->len = len;
return (void *)(*ext) + off;
}
void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id,
size_t var_alloc_len, gfp_t gfp)
{
struct nf_ct_ext *old, *new;
int newlen, newoff;
struct nf_ct_ext_type *t;
/* Conntrack must not be confirmed to avoid races on reallocation. */
NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
old = ct->ext;
if (!old)
return nf_ct_ext_create(&ct->ext, id, var_alloc_len, gfp);
if (__nf_ct_ext_exist(old, id))
return NULL;
rcu_read_lock();
t = rcu_dereference(nf_ct_ext_types[id]);
if (!t) {
rcu_read_unlock();
return NULL;
}
newoff = ALIGN(old->len, t->align);
newlen = newoff + t->len + var_alloc_len;
rcu_read_unlock();
new = __krealloc(old, newlen, gfp);
if (!new)
return NULL;
if (new != old) {
kfree_rcu(old, rcu);
rcu_assign_pointer(ct->ext, new);
}
new->offset[id] = newoff;
new->len = newlen;
memset((void *)new + newoff, 0, newlen - newoff);
return (void *)new + newoff;
}
EXPORT_SYMBOL(__nf_ct_ext_add_length);
static void update_alloc_size(struct nf_ct_ext_type *type)
{
int i, j;
struct nf_ct_ext_type *t1, *t2;
enum nf_ct_ext_id min = 0, max = NF_CT_EXT_NUM - 1;
/* unnecessary to update all types */
if ((type->flags & NF_CT_EXT_F_PREALLOC) == 0) {
min = type->id;
max = type->id;
}
/* This assumes that extended areas in conntrack for the types
whose NF_CT_EXT_F_PREALLOC bit set are allocated in order */
for (i = min; i <= max; i++) {
t1 = rcu_dereference_protected(nf_ct_ext_types[i],
lockdep_is_held(&nf_ct_ext_type_mutex));
if (!t1)
continue;
t1->alloc_size = ALIGN(sizeof(struct nf_ct_ext), t1->align) +
t1->len;
for (j = 0; j < NF_CT_EXT_NUM; j++) {
t2 = rcu_dereference_protected(nf_ct_ext_types[j],
lockdep_is_held(&nf_ct_ext_type_mutex));
if (t2 == NULL || t2 == t1 ||
(t2->flags & NF_CT_EXT_F_PREALLOC) == 0)
continue;
t1->alloc_size = ALIGN(t1->alloc_size, t2->align)
+ t2->len;
}
}
}
/* This MUST be called in process context. */
int nf_ct_extend_register(struct nf_ct_ext_type *type)
{
int ret = 0;
mutex_lock(&nf_ct_ext_type_mutex);
if (nf_ct_ext_types[type->id]) {
ret = -EBUSY;
goto out;
}
/* This ensures that nf_ct_ext_create() can allocate enough area
before updating alloc_size */
type->alloc_size = ALIGN(sizeof(struct nf_ct_ext), type->align)
+ type->len;
rcu_assign_pointer(nf_ct_ext_types[type->id], type);
update_alloc_size(type);
out:
mutex_unlock(&nf_ct_ext_type_mutex);
return ret;
}
EXPORT_SYMBOL_GPL(nf_ct_extend_register);
/* This MUST be called in process context. */
void nf_ct_extend_unregister(struct nf_ct_ext_type *type)
{
mutex_lock(&nf_ct_ext_type_mutex);
RCU_INIT_POINTER(nf_ct_ext_types[type->id], NULL);
update_alloc_size(type);
mutex_unlock(&nf_ct_ext_type_mutex);
synchronize_rcu();
}
EXPORT_SYMBOL_GPL(nf_ct_extend_unregister);