2007-02-12 12:15:49 -07:00
|
|
|
/* netfilter.c: look after the filters for various protocols.
|
2005-08-09 21:21:49 -06:00
|
|
|
* Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
|
|
|
|
*
|
|
|
|
* Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
|
|
|
|
* way.
|
|
|
|
*
|
|
|
|
* Rusty Russell (C)2000 -- This code is GPL.
|
2013-04-06 07:24:29 -06:00
|
|
|
* Patrick McHardy (c) 2006-2012
|
2005-08-09 21:21:49 -06:00
|
|
|
*/
|
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/netfilter.h>
|
|
|
|
#include <net/protocol.h>
|
|
|
|
#include <linux/init.h>
|
|
|
|
#include <linux/skbuff.h>
|
|
|
|
#include <linux/wait.h>
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/interrupt.h>
|
|
|
|
#include <linux/if.h>
|
|
|
|
#include <linux/netdevice.h>
|
2014-11-13 02:04:16 -07:00
|
|
|
#include <linux/netfilter_ipv6.h>
|
2005-08-09 21:21:49 -06:00
|
|
|
#include <linux/inetdevice.h>
|
|
|
|
#include <linux/proc_fs.h>
|
2007-02-12 12:09:55 -07:00
|
|
|
#include <linux/mutex.h>
|
2017-08-23 16:08:32 -06:00
|
|
|
#include <linux/mm.h>
|
2016-09-21 09:35:07 -06:00
|
|
|
#include <linux/rcupdate.h>
|
2007-09-12 04:01:34 -06:00
|
|
|
#include <net/net_namespace.h>
|
2005-08-09 21:21:49 -06:00
|
|
|
#include <net/sock.h>
|
|
|
|
|
|
|
|
#include "nf_internals.h"
|
|
|
|
|
2007-02-12 12:09:55 -07:00
|
|
|
static DEFINE_MUTEX(afinfo_mutex);
|
2006-04-06 15:18:09 -06:00
|
|
|
|
2010-03-09 12:59:15 -07:00
|
|
|
const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly;
|
2006-04-06 15:18:09 -06:00
|
|
|
EXPORT_SYMBOL(nf_afinfo);
|
2013-05-16 21:56:10 -06:00
|
|
|
const struct nf_ipv6_ops __rcu *nf_ipv6_ops __read_mostly;
|
|
|
|
EXPORT_SYMBOL_GPL(nf_ipv6_ops);
|
2006-04-06 15:18:09 -06:00
|
|
|
|
2015-07-14 09:51:07 -06:00
|
|
|
DEFINE_PER_CPU(bool, nf_skb_duplicated);
|
|
|
|
EXPORT_SYMBOL_GPL(nf_skb_duplicated);
|
|
|
|
|
2007-12-17 23:42:27 -07:00
|
|
|
int nf_register_afinfo(const struct nf_afinfo *afinfo)
|
2006-04-06 15:18:09 -06:00
|
|
|
{
|
2014-07-31 12:38:46 -06:00
|
|
|
mutex_lock(&afinfo_mutex);
|
2011-08-01 10:19:00 -06:00
|
|
|
RCU_INIT_POINTER(nf_afinfo[afinfo->family], afinfo);
|
2007-02-12 12:09:55 -07:00
|
|
|
mutex_unlock(&afinfo_mutex);
|
2006-04-06 15:18:09 -06:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(nf_register_afinfo);
|
|
|
|
|
2007-12-17 23:42:27 -07:00
|
|
|
void nf_unregister_afinfo(const struct nf_afinfo *afinfo)
|
2006-04-06 15:18:09 -06:00
|
|
|
{
|
2007-02-12 12:09:55 -07:00
|
|
|
mutex_lock(&afinfo_mutex);
|
2011-08-01 10:19:00 -06:00
|
|
|
RCU_INIT_POINTER(nf_afinfo[afinfo->family], NULL);
|
2007-02-12 12:09:55 -07:00
|
|
|
mutex_unlock(&afinfo_mutex);
|
2006-04-06 15:18:09 -06:00
|
|
|
synchronize_rcu();
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(nf_unregister_afinfo);
|
|
|
|
|
2014-08-21 20:40:15 -06:00
|
|
|
#ifdef HAVE_JUMP_LABEL
|
2012-02-24 00:31:31 -07:00
|
|
|
struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
|
2011-11-18 10:32:46 -07:00
|
|
|
EXPORT_SYMBOL(nf_hooks_needed);
|
|
|
|
#endif
|
|
|
|
|
2007-02-12 12:10:14 -07:00
|
|
|
static DEFINE_MUTEX(nf_hook_mutex);
|
2017-08-23 16:08:32 -06:00
|
|
|
|
|
|
|
/* max hooks per family/hooknum */
|
|
|
|
#define MAX_HOOK_COUNT 1024
|
|
|
|
|
2016-09-21 09:35:07 -06:00
|
|
|
#define nf_entry_dereference(e) \
|
|
|
|
rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex))
|
2005-08-09 21:21:49 -06:00
|
|
|
|
2017-08-23 16:08:32 -06:00
|
|
|
static struct nf_hook_entries *allocate_hook_entries_size(u16 num)
|
|
|
|
{
|
|
|
|
struct nf_hook_entries *e;
|
|
|
|
size_t alloc = sizeof(*e) +
|
|
|
|
sizeof(struct nf_hook_entry) * num +
|
|
|
|
sizeof(struct nf_hook_ops *) * num;
|
|
|
|
|
|
|
|
if (num == 0)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
e = kvzalloc(alloc, GFP_KERNEL);
|
|
|
|
if (e)
|
|
|
|
e->num_hook_entries = num;
|
|
|
|
return e;
|
|
|
|
}
|
|
|
|
|
|
|
|
static unsigned int accept_all(void *priv,
|
|
|
|
struct sk_buff *skb,
|
|
|
|
const struct nf_hook_state *state)
|
|
|
|
{
|
|
|
|
return NF_ACCEPT; /* ACCEPT makes nf_hook_slow call next hook */
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct nf_hook_ops dummy_ops = {
|
|
|
|
.hook = accept_all,
|
|
|
|
.priority = INT_MIN,
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct nf_hook_entries *
|
|
|
|
nf_hook_entries_grow(const struct nf_hook_entries *old,
|
|
|
|
const struct nf_hook_ops *reg)
|
|
|
|
{
|
|
|
|
unsigned int i, alloc_entries, nhooks, old_entries;
|
|
|
|
struct nf_hook_ops **orig_ops = NULL;
|
|
|
|
struct nf_hook_ops **new_ops;
|
|
|
|
struct nf_hook_entries *new;
|
|
|
|
bool inserted = false;
|
|
|
|
|
|
|
|
alloc_entries = 1;
|
|
|
|
old_entries = old ? old->num_hook_entries : 0;
|
|
|
|
|
|
|
|
if (old) {
|
|
|
|
orig_ops = nf_hook_entries_get_hook_ops(old);
|
|
|
|
|
|
|
|
for (i = 0; i < old_entries; i++) {
|
|
|
|
if (orig_ops[i] != &dummy_ops)
|
|
|
|
alloc_entries++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (alloc_entries > MAX_HOOK_COUNT)
|
|
|
|
return ERR_PTR(-E2BIG);
|
|
|
|
|
|
|
|
new = allocate_hook_entries_size(alloc_entries);
|
|
|
|
if (!new)
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
|
|
|
|
new_ops = nf_hook_entries_get_hook_ops(new);
|
|
|
|
|
|
|
|
i = 0;
|
|
|
|
nhooks = 0;
|
|
|
|
while (i < old_entries) {
|
|
|
|
if (orig_ops[i] == &dummy_ops) {
|
|
|
|
++i;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (inserted || reg->priority > orig_ops[i]->priority) {
|
|
|
|
new_ops[nhooks] = (void *)orig_ops[i];
|
|
|
|
new->hooks[nhooks] = old->hooks[i];
|
|
|
|
i++;
|
|
|
|
} else {
|
|
|
|
new_ops[nhooks] = (void *)reg;
|
|
|
|
new->hooks[nhooks].hook = reg->hook;
|
|
|
|
new->hooks[nhooks].priv = reg->priv;
|
|
|
|
inserted = true;
|
|
|
|
}
|
|
|
|
nhooks++;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!inserted) {
|
|
|
|
new_ops[nhooks] = (void *)reg;
|
|
|
|
new->hooks[nhooks].hook = reg->hook;
|
|
|
|
new->hooks[nhooks].priv = reg->priv;
|
|
|
|
}
|
|
|
|
|
|
|
|
return new;
|
|
|
|
}
|
|
|
|
|
2017-08-23 09:26:26 -06:00
|
|
|
static void hooks_validate(const struct nf_hook_entries *hooks)
|
|
|
|
{
|
|
|
|
#ifdef CONFIG_DEBUG_KERNEL
|
|
|
|
struct nf_hook_ops **orig_ops;
|
|
|
|
int prio = INT_MIN;
|
|
|
|
size_t i = 0;
|
|
|
|
|
|
|
|
orig_ops = nf_hook_entries_get_hook_ops(hooks);
|
|
|
|
|
|
|
|
for (i = 0; i < hooks->num_hook_entries; i++) {
|
|
|
|
if (orig_ops[i] == &dummy_ops)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
WARN_ON(orig_ops[i]->priority < prio);
|
|
|
|
|
|
|
|
if (orig_ops[i]->priority > prio)
|
|
|
|
prio = orig_ops[i]->priority;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2017-08-23 16:08:32 -06:00
|
|
|
/*
|
|
|
|
* __nf_hook_entries_try_shrink - try to shrink hook array
|
|
|
|
*
|
|
|
|
* @pp -- location of hook blob
|
|
|
|
*
|
|
|
|
* Hook unregistration must always succeed, so to-be-removed hooks
|
|
|
|
* are replaced by a dummy one that will just move to next hook.
|
|
|
|
*
|
|
|
|
* This counts the current dummy hooks, attempts to allocate new blob,
|
|
|
|
* copies the live hooks, then replaces and discards old one.
|
|
|
|
*
|
|
|
|
* return values:
|
|
|
|
*
|
|
|
|
* Returns address to free, or NULL.
|
|
|
|
*/
|
|
|
|
static void *__nf_hook_entries_try_shrink(struct nf_hook_entries __rcu **pp)
|
|
|
|
{
|
|
|
|
struct nf_hook_entries *old, *new = NULL;
|
|
|
|
unsigned int i, j, skip = 0, hook_entries;
|
|
|
|
struct nf_hook_ops **orig_ops;
|
|
|
|
struct nf_hook_ops **new_ops;
|
|
|
|
|
|
|
|
old = nf_entry_dereference(*pp);
|
|
|
|
if (WARN_ON_ONCE(!old))
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
orig_ops = nf_hook_entries_get_hook_ops(old);
|
|
|
|
for (i = 0; i < old->num_hook_entries; i++) {
|
|
|
|
if (orig_ops[i] == &dummy_ops)
|
|
|
|
skip++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* if skip == hook_entries all hooks have been removed */
|
|
|
|
hook_entries = old->num_hook_entries;
|
|
|
|
if (skip == hook_entries)
|
|
|
|
goto out_assign;
|
|
|
|
|
2017-09-06 06:47:57 -06:00
|
|
|
if (skip == 0)
|
2017-08-23 16:08:32 -06:00
|
|
|
return NULL;
|
|
|
|
|
|
|
|
hook_entries -= skip;
|
|
|
|
new = allocate_hook_entries_size(hook_entries);
|
|
|
|
if (!new)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
new_ops = nf_hook_entries_get_hook_ops(new);
|
|
|
|
for (i = 0, j = 0; i < old->num_hook_entries; i++) {
|
|
|
|
if (orig_ops[i] == &dummy_ops)
|
|
|
|
continue;
|
|
|
|
new->hooks[j] = old->hooks[i];
|
|
|
|
new_ops[j] = (void *)orig_ops[i];
|
|
|
|
j++;
|
|
|
|
}
|
2017-08-23 09:26:26 -06:00
|
|
|
hooks_validate(new);
|
2017-08-23 16:08:32 -06:00
|
|
|
out_assign:
|
|
|
|
rcu_assign_pointer(*pp, new);
|
|
|
|
return old;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct nf_hook_entries __rcu **nf_hook_entry_head(struct net *net, const struct nf_hook_ops *reg)
|
2005-08-09 21:21:49 -06:00
|
|
|
{
|
2015-07-10 17:14:30 -06:00
|
|
|
if (reg->pf != NFPROTO_NETDEV)
|
2016-10-10 23:39:04 -06:00
|
|
|
return net->nf.hooks[reg->pf]+reg->hooknum;
|
|
|
|
|
netfilter: add netfilter ingress hook after handle_ing() under unique static key
This patch adds the Netfilter ingress hook just after the existing tc ingress
hook, that seems to be the consensus solution for this.
Note that the Netfilter hook resides under the global static key that enables
ingress filtering. Nonetheless, Netfilter still also has its own static key for
minimal impact on the existing handle_ing().
* Without this patch:
Result: OK: 6216490(c6216338+d152) usec, 100000000 (60byte,0frags)
16086246pps 7721Mb/sec (7721398080bps) errors: 100000000
42.46% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core
25.92% kpktgend_0 [kernel.kallsyms] [k] kfree_skb
7.81% kpktgend_0 [pktgen] [k] pktgen_thread_worker
5.62% kpktgend_0 [kernel.kallsyms] [k] ip_rcv
2.70% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal
2.34% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk
1.44% kpktgend_0 [kernel.kallsyms] [k] __build_skb
* With this patch:
Result: OK: 6214833(c6214731+d101) usec, 100000000 (60byte,0frags)
16090536pps 7723Mb/sec (7723457280bps) errors: 100000000
41.23% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core
26.57% kpktgend_0 [kernel.kallsyms] [k] kfree_skb
7.72% kpktgend_0 [pktgen] [k] pktgen_thread_worker
5.55% kpktgend_0 [kernel.kallsyms] [k] ip_rcv
2.78% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal
2.06% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk
1.43% kpktgend_0 [kernel.kallsyms] [k] __build_skb
* Without this patch + tc ingress:
tc filter add dev eth4 parent ffff: protocol ip prio 1 \
u32 match ip dst 4.3.2.1/32
Result: OK: 9269001(c9268821+d179) usec, 100000000 (60byte,0frags)
10788648pps 5178Mb/sec (5178551040bps) errors: 100000000
40.99% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core
17.50% kpktgend_0 [kernel.kallsyms] [k] kfree_skb
11.77% kpktgend_0 [cls_u32] [k] u32_classify
5.62% kpktgend_0 [kernel.kallsyms] [k] tc_classify_compat
5.18% kpktgend_0 [pktgen] [k] pktgen_thread_worker
3.23% kpktgend_0 [kernel.kallsyms] [k] tc_classify
2.97% kpktgend_0 [kernel.kallsyms] [k] ip_rcv
1.83% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal
1.50% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk
0.99% kpktgend_0 [kernel.kallsyms] [k] __build_skb
* With this patch + tc ingress:
tc filter add dev eth4 parent ffff: protocol ip prio 1 \
u32 match ip dst 4.3.2.1/32
Result: OK: 9308218(c9308091+d126) usec, 100000000 (60byte,0frags)
10743194pps 5156Mb/sec (5156733120bps) errors: 100000000
42.01% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core
17.78% kpktgend_0 [kernel.kallsyms] [k] kfree_skb
11.70% kpktgend_0 [cls_u32] [k] u32_classify
5.46% kpktgend_0 [kernel.kallsyms] [k] tc_classify_compat
5.16% kpktgend_0 [pktgen] [k] pktgen_thread_worker
2.98% kpktgend_0 [kernel.kallsyms] [k] ip_rcv
2.84% kpktgend_0 [kernel.kallsyms] [k] tc_classify
1.96% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal
1.57% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk
Note that the results are very similar before and after.
I can see gcc gets the code under the ingress static key out of the hot path.
Then, on that cold branch, it generates the code to accomodate the netfilter
ingress static key. My explanation for this is that this reduces the pressure
on the instruction cache for non-users as the new code is out of the hot path,
and it comes with minimal impact for tc ingress users.
Using gcc version 4.8.4 on:
Architecture: x86_64
CPU op-mode(s): 32-bit, 64-bit
Byte Order: Little Endian
CPU(s): 8
[...]
L1d cache: 16K
L1i cache: 64K
L2 cache: 2048K
L3 cache: 8192K
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-05-13 10:19:38 -06:00
|
|
|
#ifdef CONFIG_NETFILTER_INGRESS
|
2016-10-10 23:39:04 -06:00
|
|
|
if (reg->hooknum == NF_NETDEV_INGRESS) {
|
2015-07-10 17:15:06 -06:00
|
|
|
if (reg->dev && dev_net(reg->dev) == net)
|
2016-10-10 23:39:04 -06:00
|
|
|
return ®->dev->nf_hooks_ingress;
|
netfilter: add netfilter ingress hook after handle_ing() under unique static key
This patch adds the Netfilter ingress hook just after the existing tc ingress
hook, that seems to be the consensus solution for this.
Note that the Netfilter hook resides under the global static key that enables
ingress filtering. Nonetheless, Netfilter still also has its own static key for
minimal impact on the existing handle_ing().
* Without this patch:
Result: OK: 6216490(c6216338+d152) usec, 100000000 (60byte,0frags)
16086246pps 7721Mb/sec (7721398080bps) errors: 100000000
42.46% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core
25.92% kpktgend_0 [kernel.kallsyms] [k] kfree_skb
7.81% kpktgend_0 [pktgen] [k] pktgen_thread_worker
5.62% kpktgend_0 [kernel.kallsyms] [k] ip_rcv
2.70% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal
2.34% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk
1.44% kpktgend_0 [kernel.kallsyms] [k] __build_skb
* With this patch:
Result: OK: 6214833(c6214731+d101) usec, 100000000 (60byte,0frags)
16090536pps 7723Mb/sec (7723457280bps) errors: 100000000
41.23% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core
26.57% kpktgend_0 [kernel.kallsyms] [k] kfree_skb
7.72% kpktgend_0 [pktgen] [k] pktgen_thread_worker
5.55% kpktgend_0 [kernel.kallsyms] [k] ip_rcv
2.78% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal
2.06% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk
1.43% kpktgend_0 [kernel.kallsyms] [k] __build_skb
* Without this patch + tc ingress:
tc filter add dev eth4 parent ffff: protocol ip prio 1 \
u32 match ip dst 4.3.2.1/32
Result: OK: 9269001(c9268821+d179) usec, 100000000 (60byte,0frags)
10788648pps 5178Mb/sec (5178551040bps) errors: 100000000
40.99% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core
17.50% kpktgend_0 [kernel.kallsyms] [k] kfree_skb
11.77% kpktgend_0 [cls_u32] [k] u32_classify
5.62% kpktgend_0 [kernel.kallsyms] [k] tc_classify_compat
5.18% kpktgend_0 [pktgen] [k] pktgen_thread_worker
3.23% kpktgend_0 [kernel.kallsyms] [k] tc_classify
2.97% kpktgend_0 [kernel.kallsyms] [k] ip_rcv
1.83% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal
1.50% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk
0.99% kpktgend_0 [kernel.kallsyms] [k] __build_skb
* With this patch + tc ingress:
tc filter add dev eth4 parent ffff: protocol ip prio 1 \
u32 match ip dst 4.3.2.1/32
Result: OK: 9308218(c9308091+d126) usec, 100000000 (60byte,0frags)
10743194pps 5156Mb/sec (5156733120bps) errors: 100000000
42.01% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core
17.78% kpktgend_0 [kernel.kallsyms] [k] kfree_skb
11.70% kpktgend_0 [cls_u32] [k] u32_classify
5.46% kpktgend_0 [kernel.kallsyms] [k] tc_classify_compat
5.16% kpktgend_0 [pktgen] [k] pktgen_thread_worker
2.98% kpktgend_0 [kernel.kallsyms] [k] ip_rcv
2.84% kpktgend_0 [kernel.kallsyms] [k] tc_classify
1.96% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal
1.57% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk
Note that the results are very similar before and after.
I can see gcc gets the code under the ingress static key out of the hot path.
Then, on that cold branch, it generates the code to accomodate the netfilter
ingress static key. My explanation for this is that this reduces the pressure
on the instruction cache for non-users as the new code is out of the hot path,
and it comes with minimal impact for tc ingress users.
Using gcc version 4.8.4 on:
Architecture: x86_64
CPU op-mode(s): 32-bit, 64-bit
Byte Order: Little Endian
CPU(s): 8
[...]
L1d cache: 16K
L1i cache: 64K
L2 cache: 2048K
L3 cache: 8192K
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-05-13 10:19:38 -06:00
|
|
|
}
|
2016-09-28 09:35:15 -06:00
|
|
|
#endif
|
2017-08-23 16:08:32 -06:00
|
|
|
WARN_ON_ONCE(1);
|
2016-10-10 23:39:04 -06:00
|
|
|
return NULL;
|
2016-09-21 09:35:07 -06:00
|
|
|
}
|
2015-07-20 01:31:25 -06:00
|
|
|
|
2015-07-10 17:15:06 -06:00
|
|
|
int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
|
2015-07-10 17:14:30 -06:00
|
|
|
{
|
2017-08-23 16:08:32 -06:00
|
|
|
struct nf_hook_entries *p, *new_hooks;
|
|
|
|
struct nf_hook_entries __rcu **pp;
|
2015-07-10 17:15:06 -06:00
|
|
|
|
2016-09-28 09:35:15 -06:00
|
|
|
if (reg->pf == NFPROTO_NETDEV) {
|
|
|
|
#ifndef CONFIG_NETFILTER_INGRESS
|
|
|
|
if (reg->hooknum == NF_NETDEV_INGRESS)
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
#endif
|
|
|
|
if (reg->hooknum != NF_NETDEV_INGRESS ||
|
|
|
|
!reg->dev || dev_net(reg->dev) != net)
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2016-09-21 09:35:05 -06:00
|
|
|
|
2016-10-10 23:39:04 -06:00
|
|
|
pp = nf_hook_entry_head(net, reg);
|
|
|
|
if (!pp)
|
|
|
|
return -EINVAL;
|
|
|
|
|
2016-09-21 09:35:07 -06:00
|
|
|
mutex_lock(&nf_hook_mutex);
|
2015-07-10 17:15:06 -06:00
|
|
|
|
2017-08-23 16:08:32 -06:00
|
|
|
p = nf_entry_dereference(*pp);
|
|
|
|
new_hooks = nf_hook_entries_grow(p, reg);
|
|
|
|
|
|
|
|
if (!IS_ERR(new_hooks))
|
|
|
|
rcu_assign_pointer(*pp, new_hooks);
|
2016-09-21 09:35:07 -06:00
|
|
|
|
2007-02-12 12:10:14 -07:00
|
|
|
mutex_unlock(&nf_hook_mutex);
|
2017-08-23 16:08:32 -06:00
|
|
|
if (IS_ERR(new_hooks))
|
|
|
|
return PTR_ERR(new_hooks);
|
|
|
|
|
2017-08-23 09:26:26 -06:00
|
|
|
hooks_validate(new_hooks);
|
2015-07-10 17:13:58 -06:00
|
|
|
#ifdef CONFIG_NETFILTER_INGRESS
|
|
|
|
if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
|
|
|
|
net_inc_ingress_queue();
|
|
|
|
#endif
|
2014-08-21 20:40:15 -06:00
|
|
|
#ifdef HAVE_JUMP_LABEL
|
2012-02-24 00:31:31 -07:00
|
|
|
static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]);
|
2011-11-18 10:32:46 -07:00
|
|
|
#endif
|
2017-08-23 16:08:32 -06:00
|
|
|
synchronize_net();
|
|
|
|
BUG_ON(p == new_hooks);
|
|
|
|
kvfree(p);
|
2005-08-09 21:21:49 -06:00
|
|
|
return 0;
|
|
|
|
}
|
2015-07-10 17:15:06 -06:00
|
|
|
EXPORT_SYMBOL(nf_register_net_hook);
|
2005-08-09 21:21:49 -06:00
|
|
|
|
2017-08-23 16:08:32 -06:00
|
|
|
/*
|
|
|
|
* __nf_unregister_net_hook - remove a hook from blob
|
|
|
|
*
|
|
|
|
* @oldp: current address of hook blob
|
|
|
|
* @unreg: hook to unregister
|
|
|
|
*
|
|
|
|
* This cannot fail, hook unregistration must always succeed.
|
|
|
|
* Therefore replace the to-be-removed hook with a dummy hook.
|
|
|
|
*/
|
|
|
|
static void __nf_unregister_net_hook(struct nf_hook_entries *old,
|
|
|
|
const struct nf_hook_ops *unreg)
|
2005-08-09 21:21:49 -06:00
|
|
|
{
|
2017-08-23 16:08:32 -06:00
|
|
|
struct nf_hook_ops **orig_ops;
|
|
|
|
bool found = false;
|
|
|
|
unsigned int i;
|
2015-07-10 17:15:06 -06:00
|
|
|
|
2017-08-23 16:08:32 -06:00
|
|
|
orig_ops = nf_hook_entries_get_hook_ops(old);
|
|
|
|
for (i = 0; i < old->num_hook_entries; i++) {
|
|
|
|
if (orig_ops[i] != unreg)
|
|
|
|
continue;
|
|
|
|
WRITE_ONCE(old->hooks[i].hook, accept_all);
|
|
|
|
WRITE_ONCE(orig_ops[i], &dummy_ops);
|
|
|
|
found = true;
|
|
|
|
break;
|
2015-07-10 17:15:06 -06:00
|
|
|
}
|
2017-08-23 16:08:32 -06:00
|
|
|
|
|
|
|
if (found) {
|
netfilter: add netfilter ingress hook after handle_ing() under unique static key
This patch adds the Netfilter ingress hook just after the existing tc ingress
hook, that seems to be the consensus solution for this.
Note that the Netfilter hook resides under the global static key that enables
ingress filtering. Nonetheless, Netfilter still also has its own static key for
minimal impact on the existing handle_ing().
* Without this patch:
Result: OK: 6216490(c6216338+d152) usec, 100000000 (60byte,0frags)
16086246pps 7721Mb/sec (7721398080bps) errors: 100000000
42.46% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core
25.92% kpktgend_0 [kernel.kallsyms] [k] kfree_skb
7.81% kpktgend_0 [pktgen] [k] pktgen_thread_worker
5.62% kpktgend_0 [kernel.kallsyms] [k] ip_rcv
2.70% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal
2.34% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk
1.44% kpktgend_0 [kernel.kallsyms] [k] __build_skb
* With this patch:
Result: OK: 6214833(c6214731+d101) usec, 100000000 (60byte,0frags)
16090536pps 7723Mb/sec (7723457280bps) errors: 100000000
41.23% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core
26.57% kpktgend_0 [kernel.kallsyms] [k] kfree_skb
7.72% kpktgend_0 [pktgen] [k] pktgen_thread_worker
5.55% kpktgend_0 [kernel.kallsyms] [k] ip_rcv
2.78% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal
2.06% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk
1.43% kpktgend_0 [kernel.kallsyms] [k] __build_skb
* Without this patch + tc ingress:
tc filter add dev eth4 parent ffff: protocol ip prio 1 \
u32 match ip dst 4.3.2.1/32
Result: OK: 9269001(c9268821+d179) usec, 100000000 (60byte,0frags)
10788648pps 5178Mb/sec (5178551040bps) errors: 100000000
40.99% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core
17.50% kpktgend_0 [kernel.kallsyms] [k] kfree_skb
11.77% kpktgend_0 [cls_u32] [k] u32_classify
5.62% kpktgend_0 [kernel.kallsyms] [k] tc_classify_compat
5.18% kpktgend_0 [pktgen] [k] pktgen_thread_worker
3.23% kpktgend_0 [kernel.kallsyms] [k] tc_classify
2.97% kpktgend_0 [kernel.kallsyms] [k] ip_rcv
1.83% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal
1.50% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk
0.99% kpktgend_0 [kernel.kallsyms] [k] __build_skb
* With this patch + tc ingress:
tc filter add dev eth4 parent ffff: protocol ip prio 1 \
u32 match ip dst 4.3.2.1/32
Result: OK: 9308218(c9308091+d126) usec, 100000000 (60byte,0frags)
10743194pps 5156Mb/sec (5156733120bps) errors: 100000000
42.01% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core
17.78% kpktgend_0 [kernel.kallsyms] [k] kfree_skb
11.70% kpktgend_0 [cls_u32] [k] u32_classify
5.46% kpktgend_0 [kernel.kallsyms] [k] tc_classify_compat
5.16% kpktgend_0 [pktgen] [k] pktgen_thread_worker
2.98% kpktgend_0 [kernel.kallsyms] [k] ip_rcv
2.84% kpktgend_0 [kernel.kallsyms] [k] tc_classify
1.96% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal
1.57% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk
Note that the results are very similar before and after.
I can see gcc gets the code under the ingress static key out of the hot path.
Then, on that cold branch, it generates the code to accomodate the netfilter
ingress static key. My explanation for this is that this reduces the pressure
on the instruction cache for non-users as the new code is out of the hot path,
and it comes with minimal impact for tc ingress users.
Using gcc version 4.8.4 on:
Architecture: x86_64
CPU op-mode(s): 32-bit, 64-bit
Byte Order: Little Endian
CPU(s): 8
[...]
L1d cache: 16K
L1i cache: 64K
L2 cache: 2048K
L3 cache: 8192K
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-05-13 10:19:38 -06:00
|
|
|
#ifdef CONFIG_NETFILTER_INGRESS
|
2017-08-23 16:08:32 -06:00
|
|
|
if (unreg->pf == NFPROTO_NETDEV && unreg->hooknum == NF_NETDEV_INGRESS)
|
|
|
|
net_dec_ingress_queue();
|
netfilter: add netfilter ingress hook after handle_ing() under unique static key
This patch adds the Netfilter ingress hook just after the existing tc ingress
hook, that seems to be the consensus solution for this.
Note that the Netfilter hook resides under the global static key that enables
ingress filtering. Nonetheless, Netfilter still also has its own static key for
minimal impact on the existing handle_ing().
* Without this patch:
Result: OK: 6216490(c6216338+d152) usec, 100000000 (60byte,0frags)
16086246pps 7721Mb/sec (7721398080bps) errors: 100000000
42.46% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core
25.92% kpktgend_0 [kernel.kallsyms] [k] kfree_skb
7.81% kpktgend_0 [pktgen] [k] pktgen_thread_worker
5.62% kpktgend_0 [kernel.kallsyms] [k] ip_rcv
2.70% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal
2.34% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk
1.44% kpktgend_0 [kernel.kallsyms] [k] __build_skb
* With this patch:
Result: OK: 6214833(c6214731+d101) usec, 100000000 (60byte,0frags)
16090536pps 7723Mb/sec (7723457280bps) errors: 100000000
41.23% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core
26.57% kpktgend_0 [kernel.kallsyms] [k] kfree_skb
7.72% kpktgend_0 [pktgen] [k] pktgen_thread_worker
5.55% kpktgend_0 [kernel.kallsyms] [k] ip_rcv
2.78% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal
2.06% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk
1.43% kpktgend_0 [kernel.kallsyms] [k] __build_skb
* Without this patch + tc ingress:
tc filter add dev eth4 parent ffff: protocol ip prio 1 \
u32 match ip dst 4.3.2.1/32
Result: OK: 9269001(c9268821+d179) usec, 100000000 (60byte,0frags)
10788648pps 5178Mb/sec (5178551040bps) errors: 100000000
40.99% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core
17.50% kpktgend_0 [kernel.kallsyms] [k] kfree_skb
11.77% kpktgend_0 [cls_u32] [k] u32_classify
5.62% kpktgend_0 [kernel.kallsyms] [k] tc_classify_compat
5.18% kpktgend_0 [pktgen] [k] pktgen_thread_worker
3.23% kpktgend_0 [kernel.kallsyms] [k] tc_classify
2.97% kpktgend_0 [kernel.kallsyms] [k] ip_rcv
1.83% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal
1.50% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk
0.99% kpktgend_0 [kernel.kallsyms] [k] __build_skb
* With this patch + tc ingress:
tc filter add dev eth4 parent ffff: protocol ip prio 1 \
u32 match ip dst 4.3.2.1/32
Result: OK: 9308218(c9308091+d126) usec, 100000000 (60byte,0frags)
10743194pps 5156Mb/sec (5156733120bps) errors: 100000000
42.01% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core
17.78% kpktgend_0 [kernel.kallsyms] [k] kfree_skb
11.70% kpktgend_0 [cls_u32] [k] u32_classify
5.46% kpktgend_0 [kernel.kallsyms] [k] tc_classify_compat
5.16% kpktgend_0 [pktgen] [k] pktgen_thread_worker
2.98% kpktgend_0 [kernel.kallsyms] [k] ip_rcv
2.84% kpktgend_0 [kernel.kallsyms] [k] tc_classify
1.96% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal
1.57% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk
Note that the results are very similar before and after.
I can see gcc gets the code under the ingress static key out of the hot path.
Then, on that cold branch, it generates the code to accomodate the netfilter
ingress static key. My explanation for this is that this reduces the pressure
on the instruction cache for non-users as the new code is out of the hot path,
and it comes with minimal impact for tc ingress users.
Using gcc version 4.8.4 on:
Architecture: x86_64
CPU op-mode(s): 32-bit, 64-bit
Byte Order: Little Endian
CPU(s): 8
[...]
L1d cache: 16K
L1i cache: 64K
L2 cache: 2048K
L3 cache: 8192K
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-05-13 10:19:38 -06:00
|
|
|
#endif
|
2014-08-21 20:40:15 -06:00
|
|
|
#ifdef HAVE_JUMP_LABEL
|
2017-08-23 16:08:32 -06:00
|
|
|
static_key_slow_dec(&nf_hooks_needed[unreg->pf][unreg->hooknum]);
|
2011-11-18 10:32:46 -07:00
|
|
|
#endif
|
2017-08-23 16:08:32 -06:00
|
|
|
} else {
|
|
|
|
WARN_ONCE(1, "hook not found, pf %d num %d", unreg->pf, unreg->hooknum);
|
|
|
|
}
|
2017-04-24 07:37:39 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
|
|
|
|
{
|
2017-08-23 16:08:32 -06:00
|
|
|
struct nf_hook_entries __rcu **pp;
|
|
|
|
struct nf_hook_entries *p;
|
2017-04-24 07:37:41 -06:00
|
|
|
unsigned int nfq;
|
2017-04-24 07:37:39 -06:00
|
|
|
|
2017-08-23 16:08:32 -06:00
|
|
|
pp = nf_hook_entry_head(net, reg);
|
|
|
|
if (!pp)
|
|
|
|
return;
|
|
|
|
|
|
|
|
mutex_lock(&nf_hook_mutex);
|
|
|
|
|
|
|
|
p = nf_entry_dereference(*pp);
|
|
|
|
if (WARN_ON_ONCE(!p)) {
|
|
|
|
mutex_unlock(&nf_hook_mutex);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
__nf_unregister_net_hook(p, reg);
|
|
|
|
|
|
|
|
p = __nf_hook_entries_try_shrink(pp);
|
|
|
|
mutex_unlock(&nf_hook_mutex);
|
2017-04-24 07:37:39 -06:00
|
|
|
if (!p)
|
|
|
|
return;
|
|
|
|
|
2005-08-09 21:21:49 -06:00
|
|
|
synchronize_net();
|
2017-04-24 07:37:41 -06:00
|
|
|
|
2015-10-08 15:38:07 -06:00
|
|
|
/* other cpu might still process nfqueue verdict that used reg */
|
2017-04-24 07:37:41 -06:00
|
|
|
nfq = nf_queue_nf_hook_drop(net);
|
|
|
|
if (nfq)
|
|
|
|
synchronize_net();
|
2017-08-23 16:08:32 -06:00
|
|
|
kvfree(p);
|
2015-07-10 17:15:06 -06:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(nf_unregister_net_hook);
|
|
|
|
|
|
|
|
int nf_register_net_hooks(struct net *net, const struct nf_hook_ops *reg,
|
|
|
|
unsigned int n)
|
|
|
|
{
|
|
|
|
unsigned int i;
|
|
|
|
int err = 0;
|
|
|
|
|
|
|
|
for (i = 0; i < n; i++) {
|
|
|
|
err = nf_register_net_hook(net, ®[i]);
|
|
|
|
if (err)
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
return err;
|
|
|
|
|
|
|
|
err:
|
|
|
|
if (i > 0)
|
|
|
|
nf_unregister_net_hooks(net, reg, i);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(nf_register_net_hooks);
|
|
|
|
|
|
|
|
void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg,
|
2017-04-24 07:37:39 -06:00
|
|
|
unsigned int hookcount)
|
2015-07-10 17:15:06 -06:00
|
|
|
{
|
2017-08-23 09:26:27 -06:00
|
|
|
struct nf_hook_entries *to_free[16], *p;
|
|
|
|
struct nf_hook_entries __rcu **pp;
|
|
|
|
unsigned int i, j, n;
|
|
|
|
|
|
|
|
mutex_lock(&nf_hook_mutex);
|
|
|
|
for (i = 0; i < hookcount; i++) {
|
|
|
|
pp = nf_hook_entry_head(net, ®[i]);
|
|
|
|
if (!pp)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
p = nf_entry_dereference(*pp);
|
|
|
|
if (WARN_ON_ONCE(!p))
|
|
|
|
continue;
|
|
|
|
__nf_unregister_net_hook(p, ®[i]);
|
|
|
|
}
|
|
|
|
mutex_unlock(&nf_hook_mutex);
|
|
|
|
|
|
|
|
do {
|
|
|
|
n = min_t(unsigned int, hookcount, ARRAY_SIZE(to_free));
|
|
|
|
|
|
|
|
mutex_lock(&nf_hook_mutex);
|
|
|
|
|
|
|
|
for (i = 0, j = 0; i < hookcount && j < n; i++) {
|
|
|
|
pp = nf_hook_entry_head(net, ®[i]);
|
|
|
|
if (!pp)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
p = nf_entry_dereference(*pp);
|
|
|
|
if (!p)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
to_free[j] = __nf_hook_entries_try_shrink(pp);
|
|
|
|
if (to_free[j])
|
|
|
|
++j;
|
|
|
|
}
|
|
|
|
|
|
|
|
mutex_unlock(&nf_hook_mutex);
|
|
|
|
|
|
|
|
if (j) {
|
|
|
|
unsigned int nfq;
|
|
|
|
|
|
|
|
synchronize_net();
|
|
|
|
|
|
|
|
/* need 2nd synchronize_net() if nfqueue is used, skb
|
|
|
|
* can get reinjected right before nf_queue_hook_drop()
|
|
|
|
*/
|
|
|
|
nfq = nf_queue_nf_hook_drop(net);
|
|
|
|
if (nfq)
|
|
|
|
synchronize_net();
|
|
|
|
|
|
|
|
for (i = 0; i < j; i++)
|
|
|
|
kvfree(to_free[i]);
|
|
|
|
}
|
2017-04-24 07:37:39 -06:00
|
|
|
|
2017-08-23 09:26:27 -06:00
|
|
|
reg += n;
|
|
|
|
hookcount -= n;
|
|
|
|
} while (hookcount > 0);
|
2015-07-10 17:15:06 -06:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(nf_unregister_net_hooks);
|
|
|
|
|
2005-08-09 21:21:49 -06:00
|
|
|
/* Returns 1 if okfn() needs to be executed by the caller,
|
2016-09-21 09:35:04 -06:00
|
|
|
* -EPERM for NF_DROP, 0 otherwise. Caller must hold rcu_read_lock. */
|
2016-11-03 03:56:35 -06:00
|
|
|
int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
|
2017-08-23 16:08:32 -06:00
|
|
|
const struct nf_hook_entries *e, unsigned int s)
|
2005-08-09 21:21:49 -06:00
|
|
|
{
|
|
|
|
unsigned int verdict;
|
2016-11-03 03:56:30 -06:00
|
|
|
int ret;
|
2005-08-09 21:21:49 -06:00
|
|
|
|
2017-08-23 16:08:32 -06:00
|
|
|
for (; s < e->num_hook_entries; s++) {
|
|
|
|
verdict = nf_hook_entry_hookfn(&e->hooks[s], skb, state);
|
2016-11-03 03:56:39 -06:00
|
|
|
switch (verdict & NF_VERDICT_MASK) {
|
|
|
|
case NF_ACCEPT:
|
|
|
|
break;
|
|
|
|
case NF_DROP:
|
|
|
|
kfree_skb(skb);
|
|
|
|
ret = NF_DROP_GETERR(verdict);
|
|
|
|
if (ret == 0)
|
|
|
|
ret = -EPERM;
|
|
|
|
return ret;
|
|
|
|
case NF_QUEUE:
|
2017-08-23 16:08:32 -06:00
|
|
|
ret = nf_queue(skb, state, e, s, verdict);
|
|
|
|
if (ret == 1)
|
2016-11-03 03:56:39 -06:00
|
|
|
continue;
|
|
|
|
return ret;
|
|
|
|
default:
|
|
|
|
/* Implicit handling for NF_STOLEN, as well as any other
|
|
|
|
* non conventional verdicts.
|
|
|
|
*/
|
|
|
|
return 0;
|
|
|
|
}
|
2017-08-23 16:08:32 -06:00
|
|
|
}
|
2016-11-03 03:56:39 -06:00
|
|
|
|
|
|
|
return 1;
|
2005-08-09 21:21:49 -06:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(nf_hook_slow);
|
|
|
|
|
|
|
|
|
2007-10-14 01:39:18 -06:00
|
|
|
int skb_make_writable(struct sk_buff *skb, unsigned int writable_len)
|
2005-08-09 21:21:49 -06:00
|
|
|
{
|
2007-10-14 01:39:18 -06:00
|
|
|
if (writable_len > skb->len)
|
2005-08-09 21:21:49 -06:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* Not exclusive use of packet? Must copy. */
|
2007-10-14 01:39:18 -06:00
|
|
|
if (!skb_cloned(skb)) {
|
|
|
|
if (writable_len <= skb_headlen(skb))
|
|
|
|
return 1;
|
|
|
|
} else if (skb_clone_writable(skb, writable_len))
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
if (writable_len <= skb_headlen(skb))
|
|
|
|
writable_len = 0;
|
|
|
|
else
|
|
|
|
writable_len -= skb_headlen(skb);
|
|
|
|
|
|
|
|
return !!__pskb_pull_tail(skb, writable_len);
|
2005-08-09 21:21:49 -06:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(skb_make_writable);
|
|
|
|
|
2015-09-30 15:53:44 -06:00
|
|
|
/* This needs to be compiled in any case to avoid dependencies between the
|
|
|
|
* nfnetlink_queue code and nf_conntrack.
|
|
|
|
*/
|
2015-10-04 20:47:13 -06:00
|
|
|
struct nfnl_ct_hook __rcu *nfnl_ct_hook __read_mostly;
|
|
|
|
EXPORT_SYMBOL_GPL(nfnl_ct_hook);
|
2015-09-30 15:53:44 -06:00
|
|
|
|
2011-12-11 19:58:24 -07:00
|
|
|
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
|
2005-08-09 21:21:49 -06:00
|
|
|
/* This does not belong here, but locally generated errors need it if connection
|
|
|
|
tracking in use: without this, connection may not be in hash table, and hence
|
|
|
|
manufactured ICMP or RST packets will not be associated with it. */
|
2013-07-28 14:54:08 -06:00
|
|
|
void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *)
|
|
|
|
__rcu __read_mostly;
|
2005-08-09 21:21:49 -06:00
|
|
|
EXPORT_SYMBOL(ip_ct_attach);
|
|
|
|
|
2013-07-28 14:54:08 -06:00
|
|
|
void nf_ct_attach(struct sk_buff *new, const struct sk_buff *skb)
|
2005-08-09 21:21:49 -06:00
|
|
|
{
|
2013-07-28 14:54:08 -06:00
|
|
|
void (*attach)(struct sk_buff *, const struct sk_buff *);
|
2005-08-09 21:21:49 -06:00
|
|
|
|
2017-01-23 10:21:59 -07:00
|
|
|
if (skb->_nfct) {
|
2007-02-12 12:09:19 -07:00
|
|
|
rcu_read_lock();
|
|
|
|
attach = rcu_dereference(ip_ct_attach);
|
|
|
|
if (attach)
|
|
|
|
attach(new, skb);
|
|
|
|
rcu_read_unlock();
|
2005-08-09 21:21:49 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(nf_ct_attach);
|
2007-03-23 12:17:27 -06:00
|
|
|
|
2010-11-15 10:17:21 -07:00
|
|
|
void (*nf_ct_destroy)(struct nf_conntrack *) __rcu __read_mostly;
|
2007-03-23 12:17:27 -06:00
|
|
|
EXPORT_SYMBOL(nf_ct_destroy);
|
|
|
|
|
|
|
|
void nf_conntrack_destroy(struct nf_conntrack *nfct)
|
|
|
|
{
|
|
|
|
void (*destroy)(struct nf_conntrack *);
|
|
|
|
|
|
|
|
rcu_read_lock();
|
|
|
|
destroy = rcu_dereference(nf_ct_destroy);
|
|
|
|
BUG_ON(destroy == NULL);
|
|
|
|
destroy(nfct);
|
|
|
|
rcu_read_unlock();
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(nf_conntrack_destroy);
|
2012-06-07 04:13:39 -06:00
|
|
|
|
2015-09-02 17:26:07 -06:00
|
|
|
/* Built-in default zone used e.g. by modules. */
|
|
|
|
const struct nf_conntrack_zone nf_ct_zone_dflt = {
|
|
|
|
.id = NF_CT_DEFAULT_ZONE_ID,
|
|
|
|
.dir = NF_CT_DEFAULT_ZONE_DIR,
|
|
|
|
};
|
|
|
|
EXPORT_SYMBOL_GPL(nf_ct_zone_dflt);
|
2007-03-23 12:17:27 -06:00
|
|
|
#endif /* CONFIG_NF_CONNTRACK */
|
2005-08-09 21:21:49 -06:00
|
|
|
|
2012-08-26 11:14:06 -06:00
|
|
|
#ifdef CONFIG_NF_NAT_NEEDED
|
|
|
|
void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *);
|
|
|
|
EXPORT_SYMBOL(nf_nat_decode_session_hook);
|
|
|
|
#endif
|
|
|
|
|
2013-03-24 17:50:39 -06:00
|
|
|
static int __net_init netfilter_net_init(struct net *net)
|
|
|
|
{
|
2017-07-18 13:38:56 -06:00
|
|
|
int i, h;
|
2015-07-10 17:15:06 -06:00
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(net->nf.hooks); i++) {
|
|
|
|
for (h = 0; h < NF_MAX_HOOKS; h++)
|
2016-09-21 09:35:07 -06:00
|
|
|
RCU_INIT_POINTER(net->nf.hooks[i][h], NULL);
|
2015-07-10 17:15:06 -06:00
|
|
|
}
|
|
|
|
|
2013-03-24 17:50:39 -06:00
|
|
|
#ifdef CONFIG_PROC_FS
|
|
|
|
net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter",
|
|
|
|
net->proc_net);
|
2013-04-05 11:40:10 -06:00
|
|
|
if (!net->nf.proc_netfilter) {
|
|
|
|
if (!net_eq(net, &init_net))
|
|
|
|
pr_err("cannot create netfilter proc entry");
|
|
|
|
|
2013-03-24 17:50:39 -06:00
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
#endif
|
2015-07-10 17:15:06 -06:00
|
|
|
|
2017-07-18 13:38:56 -06:00
|
|
|
return 0;
|
2013-03-24 17:50:39 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
static void __net_exit netfilter_net_exit(struct net *net)
|
|
|
|
{
|
|
|
|
remove_proc_entry("netfilter", net->proc_net);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct pernet_operations netfilter_net_ops = {
|
|
|
|
.init = netfilter_net_init,
|
|
|
|
.exit = netfilter_net_exit,
|
|
|
|
};
|
|
|
|
|
2013-05-22 16:42:36 -06:00
|
|
|
int __init netfilter_init(void)
|
2005-08-09 21:21:49 -06:00
|
|
|
{
|
2015-07-10 17:15:06 -06:00
|
|
|
int ret;
|
2005-08-09 21:21:49 -06:00
|
|
|
|
2013-05-22 16:42:36 -06:00
|
|
|
ret = register_pernet_subsys(&netfilter_net_ops);
|
|
|
|
if (ret < 0)
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
ret = netfilter_log_init();
|
|
|
|
if (ret < 0)
|
|
|
|
goto err_pernet;
|
2005-08-09 21:21:49 -06:00
|
|
|
|
2013-05-22 16:42:36 -06:00
|
|
|
return 0;
|
|
|
|
err_pernet:
|
|
|
|
unregister_pernet_subsys(&netfilter_net_ops);
|
|
|
|
err:
|
|
|
|
return ret;
|
2005-08-09 21:21:49 -06:00
|
|
|
}
|