remarkable-linux/include/net/netns/conntrack.h
Jesper Dangaard Brouer 93bb0ceb75 netfilter: conntrack: remove central spinlock nf_conntrack_lock
nf_conntrack_lock is a monolithic lock and suffers from huge contention
on current generation servers (8 or more core/threads).

Perf locking congestion is clear on base kernel:

-  72.56%  ksoftirqd/6  [kernel.kallsyms]    [k] _raw_spin_lock_bh
   - _raw_spin_lock_bh
      + 25.33% init_conntrack
      + 24.86% nf_ct_delete_from_lists
      + 24.62% __nf_conntrack_confirm
      + 24.38% destroy_conntrack
      + 0.70% tcp_packet
+   2.21%  ksoftirqd/6  [kernel.kallsyms]    [k] fib_table_lookup
+   1.15%  ksoftirqd/6  [kernel.kallsyms]    [k] __slab_free
+   0.77%  ksoftirqd/6  [kernel.kallsyms]    [k] inet_getpeer
+   0.70%  ksoftirqd/6  [nf_conntrack]       [k] nf_ct_delete
+   0.55%  ksoftirqd/6  [ip_tables]          [k] ipt_do_table

This patch change conntrack locking and provides a huge performance
improvement.  SYN-flood attack tested on a 24-core E5-2695v2(ES) with
10Gbit/s ixgbe (with tool trafgen):

 Base kernel:   810.405 new conntrack/sec
 After patch: 2.233.876 new conntrack/sec

Notice other floods attack (SYN+ACK or ACK) can easily be deflected using:
 # iptables -A INPUT -m state --state INVALID -j DROP
 # sysctl -w net/netfilter/nf_conntrack_tcp_loose=0

Use an array of hashed spinlocks to protect insertions/deletions of
conntracks into the hash table. 1024 spinlocks seem to give good
results, at minimal cost (4KB memory). Due to lockdep max depth,
1024 becomes 8 if CONFIG_LOCKDEP=y

The hash resize is a bit tricky, because we need to take all locks in
the array. A seqcount_t is used to synchronize the hash table users
with the resizing process.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Reviewed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2014-03-07 11:41:13 +01:00

113 lines
2.6 KiB
C

#ifndef __NETNS_CONNTRACK_H
#define __NETNS_CONNTRACK_H
#include <linux/list.h>
#include <linux/list_nulls.h>
#include <linux/atomic.h>
#include <linux/netfilter/nf_conntrack_tcp.h>
#include <linux/seqlock.h>
struct ctl_table_header;
struct nf_conntrack_ecache;
struct nf_proto_net {
#ifdef CONFIG_SYSCTL
struct ctl_table_header *ctl_table_header;
struct ctl_table *ctl_table;
#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
struct ctl_table_header *ctl_compat_header;
struct ctl_table *ctl_compat_table;
#endif
#endif
unsigned int users;
};
struct nf_generic_net {
struct nf_proto_net pn;
unsigned int timeout;
};
struct nf_tcp_net {
struct nf_proto_net pn;
unsigned int timeouts[TCP_CONNTRACK_TIMEOUT_MAX];
unsigned int tcp_loose;
unsigned int tcp_be_liberal;
unsigned int tcp_max_retrans;
};
enum udp_conntrack {
UDP_CT_UNREPLIED,
UDP_CT_REPLIED,
UDP_CT_MAX
};
struct nf_udp_net {
struct nf_proto_net pn;
unsigned int timeouts[UDP_CT_MAX];
};
struct nf_icmp_net {
struct nf_proto_net pn;
unsigned int timeout;
};
struct nf_ip_net {
struct nf_generic_net generic;
struct nf_tcp_net tcp;
struct nf_udp_net udp;
struct nf_icmp_net icmp;
struct nf_icmp_net icmpv6;
#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
struct ctl_table_header *ctl_table_header;
struct ctl_table *ctl_table;
#endif
};
struct ct_pcpu {
spinlock_t lock;
struct hlist_nulls_head unconfirmed;
struct hlist_nulls_head dying;
struct hlist_nulls_head tmpl;
};
struct netns_ct {
atomic_t count;
unsigned int expect_count;
#ifdef CONFIG_SYSCTL
struct ctl_table_header *sysctl_header;
struct ctl_table_header *acct_sysctl_header;
struct ctl_table_header *tstamp_sysctl_header;
struct ctl_table_header *event_sysctl_header;
struct ctl_table_header *helper_sysctl_header;
#endif
char *slabname;
unsigned int sysctl_log_invalid; /* Log invalid packets */
unsigned int sysctl_events_retry_timeout;
int sysctl_events;
int sysctl_acct;
int sysctl_auto_assign_helper;
bool auto_assign_helper_warned;
int sysctl_tstamp;
int sysctl_checksum;
unsigned int htable_size;
seqcount_t generation;
struct kmem_cache *nf_conntrack_cachep;
struct hlist_nulls_head *hash;
struct hlist_head *expect_hash;
struct ct_pcpu __percpu *pcpu_lists;
struct ip_conntrack_stat __percpu *stat;
struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb;
struct nf_exp_event_notifier __rcu *nf_expect_event_cb;
struct nf_ip_net nf_ct_proto;
#if defined(CONFIG_NF_CONNTRACK_LABELS)
unsigned int labels_used;
u8 label_words;
#endif
#ifdef CONFIG_NF_NAT_NEEDED
struct hlist_head *nat_bysource;
unsigned int nat_htable_size;
#endif
};
#endif