2005-04-16 16:20:36 -06:00
|
|
|
/*
|
|
|
|
* Packet matching code.
|
|
|
|
*
|
|
|
|
* Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
|
[NETFILTER] x_tables: Abstraction layer for {ip,ip6,arp}_tables
This monster-patch tries to do the best job for unifying the data
structures and backend interfaces for the three evil clones ip_tables,
ip6_tables and arp_tables. In an ideal world we would never have
allowed this kind of copy+paste programming... but well, our world
isn't (yet?) ideal.
o introduce a new x_tables module
o {ip,arp,ip6}_tables depend on this x_tables module
o registration functions for tables, matches and targets are only
wrappers around x_tables provided functions
o all matches/targets that are used from ip_tables and ip6_tables
are now implemented as xt_FOOBAR.c files and provide module aliases
to ipt_FOOBAR and ip6t_FOOBAR
o header files for xt_matches are in include/linux/netfilter/,
include/linux/netfilter_{ipv4,ipv6} contains compatibility wrappers
around the xt_FOOBAR.h headers
Based on this patchset we're going to further unify the code,
gradually getting rid of all the layer 3 specific assumptions.
Signed-off-by: Harald Welte <laforge@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2006-01-12 14:30:04 -07:00
|
|
|
* Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
|
2013-04-06 07:24:29 -06:00
|
|
|
* Copyright (C) 2006-2010 Patrick McHardy <kaber@trash.net>
|
2005-04-16 16:20:36 -06:00
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License version 2 as
|
|
|
|
* published by the Free Software Foundation.
|
|
|
|
*/
|
2009-07-09 14:54:53 -06:00
|
|
|
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
2005-04-16 16:20:36 -06:00
|
|
|
#include <linux/cache.h>
|
2006-01-11 13:17:47 -07:00
|
|
|
#include <linux/capability.h>
|
2005-04-16 16:20:36 -06:00
|
|
|
#include <linux/skbuff.h>
|
|
|
|
#include <linux/kmod.h>
|
|
|
|
#include <linux/vmalloc.h>
|
|
|
|
#include <linux/netdevice.h>
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/icmp.h>
|
|
|
|
#include <net/ip.h>
|
2006-04-01 03:25:19 -07:00
|
|
|
#include <net/compat.h>
|
2016-12-24 12:46:01 -07:00
|
|
|
#include <linux/uaccess.h>
|
2006-03-20 23:35:41 -07:00
|
|
|
#include <linux/mutex.h>
|
2005-04-16 16:20:36 -06:00
|
|
|
#include <linux/proc_fs.h>
|
|
|
|
#include <linux/err.h>
|
2005-10-13 15:41:23 -06:00
|
|
|
#include <linux/cpumask.h>
|
2005-04-16 16:20:36 -06:00
|
|
|
|
[NETFILTER] x_tables: Abstraction layer for {ip,ip6,arp}_tables
This monster-patch tries to do the best job for unifying the data
structures and backend interfaces for the three evil clones ip_tables,
ip6_tables and arp_tables. In an ideal world we would never have
allowed this kind of copy+paste programming... but well, our world
isn't (yet?) ideal.
o introduce a new x_tables module
o {ip,arp,ip6}_tables depend on this x_tables module
o registration functions for tables, matches and targets are only
wrappers around x_tables provided functions
o all matches/targets that are used from ip_tables and ip6_tables
are now implemented as xt_FOOBAR.c files and provide module aliases
to ipt_FOOBAR and ip6t_FOOBAR
o header files for xt_matches are in include/linux/netfilter/,
include/linux/netfilter_{ipv4,ipv6} contains compatibility wrappers
around the xt_FOOBAR.h headers
Based on this patchset we're going to further unify the code,
gradually getting rid of all the layer 3 specific assumptions.
Signed-off-by: Harald Welte <laforge@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2006-01-12 14:30:04 -07:00
|
|
|
#include <linux/netfilter/x_tables.h>
|
2005-04-16 16:20:36 -06:00
|
|
|
#include <linux/netfilter_ipv4/ip_tables.h>
|
2007-12-17 23:38:49 -07:00
|
|
|
#include <net/netfilter/nf_log.h>
|
2009-06-17 14:14:54 -06:00
|
|
|
#include "../../netfilter/xt_repldata.h"
|
2005-04-16 16:20:36 -06:00
|
|
|
|
|
|
|
MODULE_LICENSE("GPL");
|
|
|
|
MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
|
|
|
|
MODULE_DESCRIPTION("IPv4 packet filter");
|
|
|
|
|
2009-06-17 14:14:54 -06:00
|
|
|
void *ipt_alloc_initial_table(const struct xt_table *info)
|
|
|
|
{
|
|
|
|
return xt_alloc_initial_table(ipt, IPT);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(ipt_alloc_initial_table);
|
|
|
|
|
2005-04-16 16:20:36 -06:00
|
|
|
/* Returns whether matches rule or not. */
|
2008-01-15 00:44:05 -07:00
|
|
|
/* Performance critical - called for every packet */
|
2007-12-17 22:52:00 -07:00
|
|
|
static inline bool
|
2005-04-16 16:20:36 -06:00
|
|
|
ip_packet_match(const struct iphdr *ip,
|
|
|
|
const char *indev,
|
|
|
|
const char *outdev,
|
|
|
|
const struct ipt_ip *ipinfo,
|
|
|
|
int isfrag)
|
|
|
|
{
|
|
|
|
unsigned long ret;
|
|
|
|
|
2016-06-24 14:25:22 -06:00
|
|
|
if (NF_INVF(ipinfo, IPT_INV_SRCIP,
|
|
|
|
(ip->saddr & ipinfo->smsk.s_addr) != ipinfo->src.s_addr) ||
|
|
|
|
NF_INVF(ipinfo, IPT_INV_DSTIP,
|
|
|
|
(ip->daddr & ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr))
|
2007-12-17 22:52:00 -07:00
|
|
|
return false;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2009-03-25 10:31:52 -06:00
|
|
|
ret = ifname_compare_aligned(indev, ipinfo->iniface, ipinfo->iniface_mask);
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2016-06-24 14:25:22 -06:00
|
|
|
if (NF_INVF(ipinfo, IPT_INV_VIA_IN, ret != 0))
|
2007-12-17 22:52:00 -07:00
|
|
|
return false;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2009-03-25 10:31:52 -06:00
|
|
|
ret = ifname_compare_aligned(outdev, ipinfo->outiface, ipinfo->outiface_mask);
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2016-06-24 14:25:22 -06:00
|
|
|
if (NF_INVF(ipinfo, IPT_INV_VIA_OUT, ret != 0))
|
2007-12-17 22:52:00 -07:00
|
|
|
return false;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
|
|
|
/* Check specific protocol */
|
2009-11-23 15:17:06 -07:00
|
|
|
if (ipinfo->proto &&
|
2016-06-24 14:25:22 -06:00
|
|
|
NF_INVF(ipinfo, IPT_INV_PROTO, ip->protocol != ipinfo->proto))
|
2007-12-17 22:52:00 -07:00
|
|
|
return false;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
|
|
|
/* If we have a fragment rule but the packet is not a fragment
|
|
|
|
* then we return zero */
|
2016-06-24 14:25:22 -06:00
|
|
|
if (NF_INVF(ipinfo, IPT_INV_FRAG,
|
|
|
|
(ipinfo->flags & IPT_F_FRAG) && !isfrag))
|
2007-12-17 22:52:00 -07:00
|
|
|
return false;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2007-12-17 22:52:00 -07:00
|
|
|
return true;
|
2005-04-16 16:20:36 -06:00
|
|
|
}
|
|
|
|
|
2008-01-15 00:44:05 -07:00
|
|
|
static bool
|
2005-04-16 16:20:36 -06:00
|
|
|
ip_checkentry(const struct ipt_ip *ip)
|
|
|
|
{
|
2016-05-03 05:54:23 -06:00
|
|
|
if (ip->flags & ~IPT_F_MASK)
|
2007-07-07 23:16:00 -06:00
|
|
|
return false;
|
2016-05-03 05:54:23 -06:00
|
|
|
if (ip->invflags & ~IPT_INV_MASK)
|
2007-07-07 23:16:00 -06:00
|
|
|
return false;
|
|
|
|
return true;
|
2005-04-16 16:20:36 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
static unsigned int
|
2009-07-05 11:43:26 -06:00
|
|
|
ipt_error(struct sk_buff *skb, const struct xt_action_param *par)
|
2005-04-16 16:20:36 -06:00
|
|
|
{
|
2012-05-13 15:56:26 -06:00
|
|
|
net_info_ratelimited("error: `%s'\n", (const char *)par->targinfo);
|
2005-04-16 16:20:36 -06:00
|
|
|
|
|
|
|
return NF_DROP;
|
|
|
|
}
|
|
|
|
|
2008-01-15 00:44:05 -07:00
|
|
|
/* Performance critical */
|
2005-04-16 16:20:36 -06:00
|
|
|
static inline struct ipt_entry *
|
2009-06-25 23:51:59 -06:00
|
|
|
get_entry(const void *base, unsigned int offset)
|
2005-04-16 16:20:36 -06:00
|
|
|
{
|
|
|
|
return (struct ipt_entry *)(base + offset);
|
|
|
|
}
|
|
|
|
|
2007-07-07 23:21:23 -06:00
|
|
|
/* All zeroes == unconditional rule. */
|
2008-01-15 00:44:05 -07:00
|
|
|
/* Mildly perf critical (only if packet tracing is on) */
|
2016-03-22 11:02:52 -06:00
|
|
|
static inline bool unconditional(const struct ipt_entry *e)
|
2007-07-07 23:21:23 -06:00
|
|
|
{
|
2009-07-09 15:00:19 -06:00
|
|
|
static const struct ipt_ip uncond;
|
2007-07-07 23:21:23 -06:00
|
|
|
|
2016-03-22 11:02:52 -06:00
|
|
|
return e->target_offset == sizeof(struct ipt_entry) &&
|
|
|
|
memcmp(&e->ip, &uncond, sizeof(uncond)) == 0;
|
2007-07-07 23:21:23 -06:00
|
|
|
}
|
|
|
|
|
2009-06-25 23:51:59 -06:00
|
|
|
/* for const-correctness */
|
2010-10-13 08:11:22 -06:00
|
|
|
static inline const struct xt_entry_target *
|
2009-06-25 23:51:59 -06:00
|
|
|
ipt_get_target_c(const struct ipt_entry *e)
|
|
|
|
{
|
|
|
|
return ipt_get_target((struct ipt_entry *)e);
|
|
|
|
}
|
|
|
|
|
2013-03-21 13:48:42 -06:00
|
|
|
#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
|
2008-01-15 00:44:05 -07:00
|
|
|
static const char *const hooknames[] = {
|
2007-11-19 19:53:30 -07:00
|
|
|
[NF_INET_PRE_ROUTING] = "PREROUTING",
|
|
|
|
[NF_INET_LOCAL_IN] = "INPUT",
|
2007-12-17 22:52:00 -07:00
|
|
|
[NF_INET_FORWARD] = "FORWARD",
|
2007-11-19 19:53:30 -07:00
|
|
|
[NF_INET_LOCAL_OUT] = "OUTPUT",
|
|
|
|
[NF_INET_POST_ROUTING] = "POSTROUTING",
|
2007-07-07 23:21:23 -06:00
|
|
|
};
|
|
|
|
|
|
|
|
enum nf_ip_trace_comments {
|
|
|
|
NF_IP_TRACE_COMMENT_RULE,
|
|
|
|
NF_IP_TRACE_COMMENT_RETURN,
|
|
|
|
NF_IP_TRACE_COMMENT_POLICY,
|
|
|
|
};
|
|
|
|
|
2008-01-15 00:44:05 -07:00
|
|
|
static const char *const comments[] = {
|
2007-07-07 23:21:23 -06:00
|
|
|
[NF_IP_TRACE_COMMENT_RULE] = "rule",
|
|
|
|
[NF_IP_TRACE_COMMENT_RETURN] = "return",
|
|
|
|
[NF_IP_TRACE_COMMENT_POLICY] = "policy",
|
|
|
|
};
|
|
|
|
|
2017-08-01 04:48:03 -06:00
|
|
|
static const struct nf_loginfo trace_loginfo = {
|
2007-07-07 23:21:23 -06:00
|
|
|
.type = NF_LOG_TYPE_LOG,
|
|
|
|
.u = {
|
|
|
|
.log = {
|
|
|
|
.level = 4,
|
2016-09-25 02:35:56 -06:00
|
|
|
.logflags = NF_LOG_DEFAULT_MASK,
|
2007-07-07 23:21:23 -06:00
|
|
|
},
|
|
|
|
},
|
|
|
|
};
|
|
|
|
|
2008-01-15 00:44:05 -07:00
|
|
|
/* Mildly perf critical (only if packet tracing is on) */
|
2007-07-07 23:21:23 -06:00
|
|
|
static inline int
|
2009-06-25 23:51:59 -06:00
|
|
|
get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e,
|
2009-04-15 12:31:13 -06:00
|
|
|
const char *hookname, const char **chainname,
|
|
|
|
const char **comment, unsigned int *rulenum)
|
2007-07-07 23:21:23 -06:00
|
|
|
{
|
2010-10-13 08:11:22 -06:00
|
|
|
const struct xt_standard_target *t = (void *)ipt_get_target_c(s);
|
2007-07-07 23:21:23 -06:00
|
|
|
|
2010-10-13 08:28:00 -06:00
|
|
|
if (strcmp(t->target.u.kernel.target->name, XT_ERROR_TARGET) == 0) {
|
2007-07-07 23:21:23 -06:00
|
|
|
/* Head of user chain: ERROR target with chainname */
|
|
|
|
*chainname = t->target.data;
|
|
|
|
(*rulenum) = 0;
|
|
|
|
} else if (s == e) {
|
|
|
|
(*rulenum)++;
|
|
|
|
|
2016-03-22 11:02:52 -06:00
|
|
|
if (unconditional(s) &&
|
2009-11-23 15:17:06 -07:00
|
|
|
strcmp(t->target.u.kernel.target->name,
|
2010-10-13 08:28:00 -06:00
|
|
|
XT_STANDARD_TARGET) == 0 &&
|
2016-03-22 11:02:52 -06:00
|
|
|
t->verdict < 0) {
|
2007-07-07 23:21:23 -06:00
|
|
|
/* Tail of chains: STANDARD target (return/policy) */
|
|
|
|
*comment = *chainname == hookname
|
2009-04-15 12:31:13 -06:00
|
|
|
? comments[NF_IP_TRACE_COMMENT_POLICY]
|
|
|
|
: comments[NF_IP_TRACE_COMMENT_RETURN];
|
2007-07-07 23:21:23 -06:00
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
} else
|
|
|
|
(*rulenum)++;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-09-15 19:04:17 -06:00
|
|
|
static void trace_packet(struct net *net,
|
|
|
|
const struct sk_buff *skb,
|
2007-07-07 23:21:23 -06:00
|
|
|
unsigned int hook,
|
|
|
|
const struct net_device *in,
|
|
|
|
const struct net_device *out,
|
2008-01-31 04:54:47 -07:00
|
|
|
const char *tablename,
|
2009-06-25 23:51:59 -06:00
|
|
|
const struct xt_table_info *private,
|
|
|
|
const struct ipt_entry *e)
|
2007-07-07 23:21:23 -06:00
|
|
|
{
|
2008-04-14 03:15:35 -06:00
|
|
|
const struct ipt_entry *root;
|
2009-04-15 12:31:13 -06:00
|
|
|
const char *hookname, *chainname, *comment;
|
2010-02-24 10:32:59 -07:00
|
|
|
const struct ipt_entry *iter;
|
2007-07-07 23:21:23 -06:00
|
|
|
unsigned int rulenum = 0;
|
|
|
|
|
2015-06-10 17:34:55 -06:00
|
|
|
root = get_entry(private->entries, private->hook_entry[hook]);
|
2007-07-07 23:21:23 -06:00
|
|
|
|
2009-04-15 12:31:13 -06:00
|
|
|
hookname = chainname = hooknames[hook];
|
|
|
|
comment = comments[NF_IP_TRACE_COMMENT_RULE];
|
2007-07-07 23:21:23 -06:00
|
|
|
|
2010-02-24 10:32:59 -07:00
|
|
|
xt_entry_foreach(iter, root, private->size - private->hook_entry[hook])
|
|
|
|
if (get_chainname_rulenum(iter, e, hookname,
|
|
|
|
&chainname, &comment, &rulenum) != 0)
|
|
|
|
break;
|
2007-07-07 23:21:23 -06:00
|
|
|
|
2015-03-01 17:10:28 -07:00
|
|
|
nf_log_trace(net, AF_INET, hook, skb, in, out, &trace_loginfo,
|
|
|
|
"TRACE: %s:%s:%s:%u ",
|
|
|
|
tablename, chainname, comment, rulenum);
|
2007-07-07 23:21:23 -06:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2015-07-14 09:51:10 -06:00
|
|
|
static inline
|
2009-04-15 13:06:05 -06:00
|
|
|
struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry)
|
|
|
|
{
|
|
|
|
return (void *)entry + entry->next_offset;
|
|
|
|
}
|
|
|
|
|
2005-04-16 16:20:36 -06:00
|
|
|
/* Returns one of the generic firewall policies, like NF_ACCEPT. */
|
|
|
|
unsigned int
|
2007-10-15 01:53:15 -06:00
|
|
|
ipt_do_table(struct sk_buff *skb,
|
2015-04-03 18:56:08 -06:00
|
|
|
const struct nf_hook_state *state,
|
2007-02-07 16:12:33 -07:00
|
|
|
struct xt_table *table)
|
2005-04-16 16:20:36 -06:00
|
|
|
{
|
2015-09-18 13:32:55 -06:00
|
|
|
unsigned int hook = state->hook;
|
2005-04-16 16:20:36 -06:00
|
|
|
static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
|
2008-04-14 03:15:35 -06:00
|
|
|
const struct iphdr *ip;
|
2005-04-16 16:20:36 -06:00
|
|
|
/* Initializing verdict to NF_DROP keeps gcc happy. */
|
|
|
|
unsigned int verdict = NF_DROP;
|
|
|
|
const char *indev, *outdev;
|
2009-06-25 23:51:59 -06:00
|
|
|
const void *table_base;
|
2010-04-19 08:05:10 -06:00
|
|
|
struct ipt_entry *e, **jumpstack;
|
netfilter: xtables: don't save/restore jumpstack offset
In most cases there is no reentrancy into ip/ip6tables.
For skbs sent by REJECT or SYNPROXY targets, there is one level
of reentrancy, but its not relevant as those targets issue an absolute
verdict, i.e. the jumpstack can be clobbered since its not used
after the target issues absolute verdict (ACCEPT, DROP, STOLEN, etc).
So the only special case where it is relevant is the TEE target, which
returns XT_CONTINUE.
This patch changes ip(6)_do_table to always use the jump stack starting
from 0.
When we detect we're operating on an skb sent via TEE (percpu
nf_skb_duplicated is 1) we switch to an alternate stack to leave
the original one alone.
Since there is no TEE support for arptables, it doesn't need to
test if tee is active.
The jump stack overflow tests are no longer needed as well --
since ->stacksize is the largest call depth we cannot exceed it.
A much better alternative to the external jumpstack would be to just
declare a jumps[32] stack on the local stack frame, but that would mean
we'd have to reject iptables rulesets that used to work before.
Another alternative would be to start rejecting rulesets with a larger
call depth, e.g. 1000 -- in this case it would be feasible to allocate the
entire stack in the percpu area which would avoid one dereference.
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2015-07-14 09:51:08 -06:00
|
|
|
unsigned int stackidx, cpu;
|
2009-06-25 23:51:59 -06:00
|
|
|
const struct xt_table_info *private;
|
2009-07-05 10:26:37 -06:00
|
|
|
struct xt_action_param acpar;
|
2011-04-04 09:04:03 -06:00
|
|
|
unsigned int addend;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
|
|
|
/* Initialization */
|
netfilter: xtables: don't save/restore jumpstack offset
In most cases there is no reentrancy into ip/ip6tables.
For skbs sent by REJECT or SYNPROXY targets, there is one level
of reentrancy, but its not relevant as those targets issue an absolute
verdict, i.e. the jumpstack can be clobbered since its not used
after the target issues absolute verdict (ACCEPT, DROP, STOLEN, etc).
So the only special case where it is relevant is the TEE target, which
returns XT_CONTINUE.
This patch changes ip(6)_do_table to always use the jump stack starting
from 0.
When we detect we're operating on an skb sent via TEE (percpu
nf_skb_duplicated is 1) we switch to an alternate stack to leave
the original one alone.
Since there is no TEE support for arptables, it doesn't need to
test if tee is active.
The jump stack overflow tests are no longer needed as well --
since ->stacksize is the largest call depth we cannot exceed it.
A much better alternative to the external jumpstack would be to just
declare a jumps[32] stack on the local stack frame, but that would mean
we'd have to reject iptables rulesets that used to work before.
Another alternative would be to start rejecting rulesets with a larger
call depth, e.g. 1000 -- in this case it would be feasible to allocate the
entire stack in the percpu area which would avoid one dereference.
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2015-07-14 09:51:08 -06:00
|
|
|
stackidx = 0;
|
2007-10-15 01:53:15 -06:00
|
|
|
ip = ip_hdr(skb);
|
2015-04-03 18:56:08 -06:00
|
|
|
indev = state->in ? state->in->name : nulldevname;
|
|
|
|
outdev = state->out ? state->out->name : nulldevname;
|
2005-04-16 16:20:36 -06:00
|
|
|
/* We handle fragments by dealing with the first fragment as
|
|
|
|
* if it was a normal packet. All other fragments are treated
|
|
|
|
* normally, except that they will NEVER match rules that ask
|
|
|
|
* things we don't know, ie. tcp syn flag or ports). If the
|
|
|
|
* rule is also a fragment-specific rule, non-fragments won't
|
|
|
|
* match it. */
|
2009-07-05 10:26:37 -06:00
|
|
|
acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
|
|
|
|
acpar.thoff = ip_hdrlen(skb);
|
2009-07-07 12:54:30 -06:00
|
|
|
acpar.hotdrop = false;
|
2016-11-03 03:56:21 -06:00
|
|
|
acpar.state = state;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2017-08-30 02:07:12 -06:00
|
|
|
WARN_ON(!(table->valid_hooks & (1 << hook)));
|
2011-04-04 09:04:03 -06:00
|
|
|
local_bh_disable();
|
|
|
|
addend = xt_write_recseq_begin();
|
2009-04-28 23:36:33 -06:00
|
|
|
private = table->private;
|
2010-04-19 08:05:10 -06:00
|
|
|
cpu = smp_processor_id();
|
netfilter: x_tables: fix ordering of jumpstack allocation and table update
During kernel stability testing on an SMP ARMv7 system, Yalin Wang
reported the following panic from the netfilter code:
1fe0: 0000001c 5e2d3b10 4007e779 4009e110 60000010 00000032 ff565656 ff545454
[<c06c48dc>] (ipt_do_table+0x448/0x584) from [<c0655ef0>] (nf_iterate+0x48/0x7c)
[<c0655ef0>] (nf_iterate+0x48/0x7c) from [<c0655f7c>] (nf_hook_slow+0x58/0x104)
[<c0655f7c>] (nf_hook_slow+0x58/0x104) from [<c0683bbc>] (ip_local_deliver+0x88/0xa8)
[<c0683bbc>] (ip_local_deliver+0x88/0xa8) from [<c0683718>] (ip_rcv_finish+0x418/0x43c)
[<c0683718>] (ip_rcv_finish+0x418/0x43c) from [<c062b1c4>] (__netif_receive_skb+0x4cc/0x598)
[<c062b1c4>] (__netif_receive_skb+0x4cc/0x598) from [<c062b314>] (process_backlog+0x84/0x158)
[<c062b314>] (process_backlog+0x84/0x158) from [<c062de84>] (net_rx_action+0x70/0x1dc)
[<c062de84>] (net_rx_action+0x70/0x1dc) from [<c0088230>] (__do_softirq+0x11c/0x27c)
[<c0088230>] (__do_softirq+0x11c/0x27c) from [<c008857c>] (do_softirq+0x44/0x50)
[<c008857c>] (do_softirq+0x44/0x50) from [<c0088614>] (local_bh_enable_ip+0x8c/0xd0)
[<c0088614>] (local_bh_enable_ip+0x8c/0xd0) from [<c06b0330>] (inet_stream_connect+0x164/0x298)
[<c06b0330>] (inet_stream_connect+0x164/0x298) from [<c061d68c>] (sys_connect+0x88/0xc8)
[<c061d68c>] (sys_connect+0x88/0xc8) from [<c000e340>] (ret_fast_syscall+0x0/0x30)
Code: 2a000021 e59d2028 e59de01c e59f011c (e7824103)
---[ end trace da227214a82491bd ]---
Kernel panic - not syncing: Fatal exception in interrupt
This comes about because CPU1 is executing xt_replace_table in response
to a setsockopt syscall, resulting in:
ret = xt_jumpstack_alloc(newinfo);
--> newinfo->jumpstack = kzalloc(size, GFP_KERNEL);
[...]
table->private = newinfo;
newinfo->initial_entries = private->initial_entries;
Meanwhile, CPU0 is handling the network receive path and ends up in
ipt_do_table, resulting in:
private = table->private;
[...]
jumpstack = (struct ipt_entry **)private->jumpstack[cpu];
On weakly ordered memory architectures, the writes to table->private
and newinfo->jumpstack from CPU1 can be observed out of order by CPU0.
Furthermore, on architectures which don't respect ordering of address
dependencies (i.e. Alpha), the reads from CPU0 can also be re-ordered.
This patch adds an smp_wmb() before the assignment to table->private
(which is essentially publishing newinfo) to ensure that all writes to
newinfo will be observed before plugging it into the table structure.
A dependent-read barrier is also added on the consumer sides, to ensure
the same ordering requirements are also respected there.
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reported-by: Wang, Yalin <Yalin.Wang@sonymobile.com>
Tested-by: Wang, Yalin <Yalin.Wang@sonymobile.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2013-10-21 06:14:53 -06:00
|
|
|
/*
|
|
|
|
* Ensure we load private-> members after we've fetched the base
|
|
|
|
* pointer.
|
|
|
|
*/
|
|
|
|
smp_read_barrier_depends();
|
2015-06-10 17:34:55 -06:00
|
|
|
table_base = private->entries;
|
2010-04-19 08:05:10 -06:00
|
|
|
jumpstack = (struct ipt_entry **)private->jumpstack[cpu];
|
netfilter: xtables: don't save/restore jumpstack offset
In most cases there is no reentrancy into ip/ip6tables.
For skbs sent by REJECT or SYNPROXY targets, there is one level
of reentrancy, but its not relevant as those targets issue an absolute
verdict, i.e. the jumpstack can be clobbered since its not used
after the target issues absolute verdict (ACCEPT, DROP, STOLEN, etc).
So the only special case where it is relevant is the TEE target, which
returns XT_CONTINUE.
This patch changes ip(6)_do_table to always use the jump stack starting
from 0.
When we detect we're operating on an skb sent via TEE (percpu
nf_skb_duplicated is 1) we switch to an alternate stack to leave
the original one alone.
Since there is no TEE support for arptables, it doesn't need to
test if tee is active.
The jump stack overflow tests are no longer needed as well --
since ->stacksize is the largest call depth we cannot exceed it.
A much better alternative to the external jumpstack would be to just
declare a jumps[32] stack on the local stack frame, but that would mean
we'd have to reject iptables rulesets that used to work before.
Another alternative would be to start rejecting rulesets with a larger
call depth, e.g. 1000 -- in this case it would be feasible to allocate the
entire stack in the percpu area which would avoid one dereference.
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2015-07-14 09:51:08 -06:00
|
|
|
|
|
|
|
/* Switch to alternate jumpstack if we're being invoked via TEE.
|
|
|
|
* TEE issues XT_CONTINUE verdict on original skb so we must not
|
|
|
|
* clobber the jumpstack.
|
|
|
|
*
|
|
|
|
* For recursion via REJECT or SYNPROXY the stack will be clobbered
|
|
|
|
* but it is no problem since absolute verdict is issued by these.
|
|
|
|
*/
|
2015-07-14 09:51:09 -06:00
|
|
|
if (static_key_false(&xt_tee_enabled))
|
|
|
|
jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated);
|
2009-02-20 02:35:32 -07:00
|
|
|
|
[NETFILTER] x_tables: Abstraction layer for {ip,ip6,arp}_tables
This monster-patch tries to do the best job for unifying the data
structures and backend interfaces for the three evil clones ip_tables,
ip6_tables and arp_tables. In an ideal world we would never have
allowed this kind of copy+paste programming... but well, our world
isn't (yet?) ideal.
o introduce a new x_tables module
o {ip,arp,ip6}_tables depend on this x_tables module
o registration functions for tables, matches and targets are only
wrappers around x_tables provided functions
o all matches/targets that are used from ip_tables and ip6_tables
are now implemented as xt_FOOBAR.c files and provide module aliases
to ipt_FOOBAR and ip6t_FOOBAR
o header files for xt_matches are in include/linux/netfilter/,
include/linux/netfilter_{ipv4,ipv6} contains compatibility wrappers
around the xt_FOOBAR.h headers
Based on this patchset we're going to further unify the code,
gradually getting rid of all the layer 3 specific assumptions.
Signed-off-by: Harald Welte <laforge@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2006-01-12 14:30:04 -07:00
|
|
|
e = get_entry(table_base, private->hook_entry[hook]);
|
2005-04-16 16:20:36 -06:00
|
|
|
|
|
|
|
do {
|
2010-10-13 08:11:22 -06:00
|
|
|
const struct xt_entry_target *t;
|
2010-02-24 10:34:48 -07:00
|
|
|
const struct xt_entry_match *ematch;
|
2015-06-10 17:34:54 -06:00
|
|
|
struct xt_counters *counter;
|
2009-04-15 13:28:39 -06:00
|
|
|
|
2017-08-30 02:07:12 -06:00
|
|
|
WARN_ON(!e);
|
2009-04-15 13:28:39 -06:00
|
|
|
if (!ip_packet_match(ip, indev, outdev,
|
2009-07-05 10:26:37 -06:00
|
|
|
&e->ip, acpar.fragoff)) {
|
2010-02-24 10:34:48 -07:00
|
|
|
no_match:
|
2009-04-15 13:28:39 -06:00
|
|
|
e = ipt_next_entry(e);
|
|
|
|
continue;
|
|
|
|
}
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2009-07-09 11:14:18 -06:00
|
|
|
xt_ematch_foreach(ematch, e) {
|
2009-07-05 10:26:37 -06:00
|
|
|
acpar.match = ematch->u.kernel.match;
|
|
|
|
acpar.matchinfo = ematch->data;
|
|
|
|
if (!acpar.match->match(skb, &acpar))
|
2010-02-24 10:34:48 -07:00
|
|
|
goto no_match;
|
2009-07-09 11:14:18 -06:00
|
|
|
}
|
2010-02-24 10:34:48 -07:00
|
|
|
|
2015-06-10 17:34:54 -06:00
|
|
|
counter = xt_get_this_cpu_counter(&e->counters);
|
|
|
|
ADD_COUNTER(*counter, skb->len, 1);
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2009-04-15 13:28:39 -06:00
|
|
|
t = ipt_get_target(e);
|
2017-08-30 02:07:12 -06:00
|
|
|
WARN_ON(!t->u.kernel.target);
|
2007-07-07 23:21:23 -06:00
|
|
|
|
2013-03-21 13:48:42 -06:00
|
|
|
#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
|
2009-04-15 13:28:39 -06:00
|
|
|
/* The packet is traced: log it */
|
|
|
|
if (unlikely(skb->nf_trace))
|
2015-09-15 19:04:17 -06:00
|
|
|
trace_packet(state->net, skb, hook, state->in,
|
|
|
|
state->out, table->name, private, e);
|
2007-07-07 23:21:23 -06:00
|
|
|
#endif
|
2009-04-15 13:28:39 -06:00
|
|
|
/* Standard target? */
|
|
|
|
if (!t->u.kernel.target->target) {
|
|
|
|
int v;
|
|
|
|
|
2010-10-13 08:11:22 -06:00
|
|
|
v = ((struct xt_standard_target *)t)->verdict;
|
2009-04-15 13:28:39 -06:00
|
|
|
if (v < 0) {
|
|
|
|
/* Pop from stack? */
|
2010-10-13 08:28:00 -06:00
|
|
|
if (v != XT_RETURN) {
|
2012-04-14 23:58:06 -06:00
|
|
|
verdict = (unsigned int)(-v) - 1;
|
2009-04-15 13:28:39 -06:00
|
|
|
break;
|
2005-04-16 16:20:36 -06:00
|
|
|
}
|
netfilter: xtables: don't save/restore jumpstack offset
In most cases there is no reentrancy into ip/ip6tables.
For skbs sent by REJECT or SYNPROXY targets, there is one level
of reentrancy, but its not relevant as those targets issue an absolute
verdict, i.e. the jumpstack can be clobbered since its not used
after the target issues absolute verdict (ACCEPT, DROP, STOLEN, etc).
So the only special case where it is relevant is the TEE target, which
returns XT_CONTINUE.
This patch changes ip(6)_do_table to always use the jump stack starting
from 0.
When we detect we're operating on an skb sent via TEE (percpu
nf_skb_duplicated is 1) we switch to an alternate stack to leave
the original one alone.
Since there is no TEE support for arptables, it doesn't need to
test if tee is active.
The jump stack overflow tests are no longer needed as well --
since ->stacksize is the largest call depth we cannot exceed it.
A much better alternative to the external jumpstack would be to just
declare a jumps[32] stack on the local stack frame, but that would mean
we'd have to reject iptables rulesets that used to work before.
Another alternative would be to start rejecting rulesets with a larger
call depth, e.g. 1000 -- in this case it would be feasible to allocate the
entire stack in the percpu area which would avoid one dereference.
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2015-07-14 09:51:08 -06:00
|
|
|
if (stackidx == 0) {
|
2010-04-19 08:05:10 -06:00
|
|
|
e = get_entry(table_base,
|
|
|
|
private->underflow[hook]);
|
|
|
|
} else {
|
netfilter: xtables: don't save/restore jumpstack offset
In most cases there is no reentrancy into ip/ip6tables.
For skbs sent by REJECT or SYNPROXY targets, there is one level
of reentrancy, but its not relevant as those targets issue an absolute
verdict, i.e. the jumpstack can be clobbered since its not used
after the target issues absolute verdict (ACCEPT, DROP, STOLEN, etc).
So the only special case where it is relevant is the TEE target, which
returns XT_CONTINUE.
This patch changes ip(6)_do_table to always use the jump stack starting
from 0.
When we detect we're operating on an skb sent via TEE (percpu
nf_skb_duplicated is 1) we switch to an alternate stack to leave
the original one alone.
Since there is no TEE support for arptables, it doesn't need to
test if tee is active.
The jump stack overflow tests are no longer needed as well --
since ->stacksize is the largest call depth we cannot exceed it.
A much better alternative to the external jumpstack would be to just
declare a jumps[32] stack on the local stack frame, but that would mean
we'd have to reject iptables rulesets that used to work before.
Another alternative would be to start rejecting rulesets with a larger
call depth, e.g. 1000 -- in this case it would be feasible to allocate the
entire stack in the percpu area which would avoid one dereference.
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2015-07-14 09:51:08 -06:00
|
|
|
e = jumpstack[--stackidx];
|
2010-04-19 08:05:10 -06:00
|
|
|
e = ipt_next_entry(e);
|
|
|
|
}
|
2009-04-15 13:28:39 -06:00
|
|
|
continue;
|
|
|
|
}
|
2009-11-23 15:17:06 -07:00
|
|
|
if (table_base + v != ipt_next_entry(e) &&
|
2016-05-03 05:54:23 -06:00
|
|
|
!(e->ip.flags & IPT_F_GOTO))
|
netfilter: xtables: don't save/restore jumpstack offset
In most cases there is no reentrancy into ip/ip6tables.
For skbs sent by REJECT or SYNPROXY targets, there is one level
of reentrancy, but its not relevant as those targets issue an absolute
verdict, i.e. the jumpstack can be clobbered since its not used
after the target issues absolute verdict (ACCEPT, DROP, STOLEN, etc).
So the only special case where it is relevant is the TEE target, which
returns XT_CONTINUE.
This patch changes ip(6)_do_table to always use the jump stack starting
from 0.
When we detect we're operating on an skb sent via TEE (percpu
nf_skb_duplicated is 1) we switch to an alternate stack to leave
the original one alone.
Since there is no TEE support for arptables, it doesn't need to
test if tee is active.
The jump stack overflow tests are no longer needed as well --
since ->stacksize is the largest call depth we cannot exceed it.
A much better alternative to the external jumpstack would be to just
declare a jumps[32] stack on the local stack frame, but that would mean
we'd have to reject iptables rulesets that used to work before.
Another alternative would be to start rejecting rulesets with a larger
call depth, e.g. 1000 -- in this case it would be feasible to allocate the
entire stack in the percpu area which would avoid one dereference.
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2015-07-14 09:51:08 -06:00
|
|
|
jumpstack[stackidx++] = e;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2009-04-15 13:28:39 -06:00
|
|
|
e = get_entry(table_base, v);
|
2009-04-15 13:35:33 -06:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2009-07-05 10:26:37 -06:00
|
|
|
acpar.target = t->u.kernel.target;
|
|
|
|
acpar.targinfo = t->data;
|
2009-04-15 13:40:13 -06:00
|
|
|
|
2009-07-05 10:26:37 -06:00
|
|
|
verdict = t->u.kernel.target->target(skb, &acpar);
|
2017-07-26 20:22:04 -06:00
|
|
|
if (verdict == XT_CONTINUE) {
|
|
|
|
/* Target might have changed stuff. */
|
|
|
|
ip = ip_hdr(skb);
|
2009-04-15 13:35:33 -06:00
|
|
|
e = ipt_next_entry(e);
|
2017-07-26 20:22:04 -06:00
|
|
|
} else {
|
2009-04-15 13:35:33 -06:00
|
|
|
/* Verdict */
|
|
|
|
break;
|
2017-07-26 20:22:04 -06:00
|
|
|
}
|
2009-07-07 12:54:30 -06:00
|
|
|
} while (!acpar.hotdrop);
|
netfilter: xtables: don't save/restore jumpstack offset
In most cases there is no reentrancy into ip/ip6tables.
For skbs sent by REJECT or SYNPROXY targets, there is one level
of reentrancy, but its not relevant as those targets issue an absolute
verdict, i.e. the jumpstack can be clobbered since its not used
after the target issues absolute verdict (ACCEPT, DROP, STOLEN, etc).
So the only special case where it is relevant is the TEE target, which
returns XT_CONTINUE.
This patch changes ip(6)_do_table to always use the jump stack starting
from 0.
When we detect we're operating on an skb sent via TEE (percpu
nf_skb_duplicated is 1) we switch to an alternate stack to leave
the original one alone.
Since there is no TEE support for arptables, it doesn't need to
test if tee is active.
The jump stack overflow tests are no longer needed as well --
since ->stacksize is the largest call depth we cannot exceed it.
A much better alternative to the external jumpstack would be to just
declare a jumps[32] stack on the local stack frame, but that would mean
we'd have to reject iptables rulesets that used to work before.
Another alternative would be to start rejecting rulesets with a larger
call depth, e.g. 1000 -- in this case it would be feasible to allocate the
entire stack in the percpu area which would avoid one dereference.
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2015-07-14 09:51:08 -06:00
|
|
|
|
2015-10-14 16:17:07 -06:00
|
|
|
xt_write_recseq_end(addend);
|
|
|
|
local_bh_enable();
|
2011-04-04 09:04:03 -06:00
|
|
|
|
2009-07-07 12:54:30 -06:00
|
|
|
if (acpar.hotdrop)
|
2005-04-16 16:20:36 -06:00
|
|
|
return NF_DROP;
|
|
|
|
else return verdict;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Figures out from what hook each rule can be called: returns 0 if
|
2015-08-26 15:20:51 -06:00
|
|
|
there are loops. Puts hook bitmask in comefrom. */
|
2005-04-16 16:20:36 -06:00
|
|
|
static int
|
2015-08-26 15:20:51 -06:00
|
|
|
mark_source_chains(const struct xt_table_info *newinfo,
|
2016-07-14 09:51:26 -06:00
|
|
|
unsigned int valid_hooks, void *entry0,
|
|
|
|
unsigned int *offsets)
|
2005-04-16 16:20:36 -06:00
|
|
|
{
|
|
|
|
unsigned int hook;
|
|
|
|
|
|
|
|
/* No recursion; use packet counter to save back ptrs (reset
|
|
|
|
to 0 as we leave), and comefrom to save source hook bitmask */
|
2007-11-19 19:53:30 -07:00
|
|
|
for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) {
|
2005-04-16 16:20:36 -06:00
|
|
|
unsigned int pos = newinfo->hook_entry[hook];
|
2017-03-28 13:05:16 -06:00
|
|
|
struct ipt_entry *e = entry0 + pos;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
|
|
|
if (!(valid_hooks & (1 << hook)))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* Set initial back pointer. */
|
|
|
|
e->counters.pcnt = pos;
|
|
|
|
|
|
|
|
for (;;) {
|
2010-10-13 08:11:22 -06:00
|
|
|
const struct xt_standard_target *t
|
2009-06-25 23:51:59 -06:00
|
|
|
= (void *)ipt_get_target_c(e);
|
2006-12-12 01:29:52 -07:00
|
|
|
int visited = e->comefrom & (1 << hook);
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2016-05-03 05:54:23 -06:00
|
|
|
if (e->comefrom & (1 << NF_INET_NUMHOOKS))
|
2005-04-16 16:20:36 -06:00
|
|
|
return 0;
|
2016-05-03 05:54:23 -06:00
|
|
|
|
2007-12-17 22:52:00 -07:00
|
|
|
e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
|
2005-04-16 16:20:36 -06:00
|
|
|
|
|
|
|
/* Unconditional return/END. */
|
2016-03-22 11:02:52 -06:00
|
|
|
if ((unconditional(e) &&
|
2009-11-23 15:17:06 -07:00
|
|
|
(strcmp(t->target.u.user.name,
|
2010-10-13 08:28:00 -06:00
|
|
|
XT_STANDARD_TARGET) == 0) &&
|
2016-03-22 11:02:52 -06:00
|
|
|
t->verdict < 0) || visited) {
|
2005-04-16 16:20:36 -06:00
|
|
|
unsigned int oldpos, size;
|
|
|
|
|
2009-03-25 12:26:35 -06:00
|
|
|
if ((strcmp(t->target.u.user.name,
|
2015-10-14 16:17:07 -06:00
|
|
|
XT_STANDARD_TARGET) == 0) &&
|
2016-05-03 05:54:23 -06:00
|
|
|
t->verdict < -NF_MAX_VERDICT - 1)
|
2006-12-05 14:43:50 -07:00
|
|
|
return 0;
|
|
|
|
|
2005-04-16 16:20:36 -06:00
|
|
|
/* Return: backtrack through the last
|
|
|
|
big jump. */
|
|
|
|
do {
|
2007-11-19 19:53:30 -07:00
|
|
|
e->comefrom ^= (1<<NF_INET_NUMHOOKS);
|
2005-04-16 16:20:36 -06:00
|
|
|
oldpos = pos;
|
|
|
|
pos = e->counters.pcnt;
|
|
|
|
e->counters.pcnt = 0;
|
|
|
|
|
|
|
|
/* We're at the start. */
|
|
|
|
if (pos == oldpos)
|
|
|
|
goto next;
|
|
|
|
|
2017-03-28 13:05:16 -06:00
|
|
|
e = entry0 + pos;
|
2005-04-16 16:20:36 -06:00
|
|
|
} while (oldpos == pos + e->next_offset);
|
|
|
|
|
|
|
|
/* Move along one */
|
|
|
|
size = e->next_offset;
|
2017-03-28 13:05:16 -06:00
|
|
|
e = entry0 + pos + size;
|
2016-04-01 06:17:21 -06:00
|
|
|
if (pos + size >= newinfo->size)
|
|
|
|
return 0;
|
2005-04-16 16:20:36 -06:00
|
|
|
e->counters.pcnt = pos;
|
|
|
|
pos += size;
|
|
|
|
} else {
|
|
|
|
int newpos = t->verdict;
|
|
|
|
|
|
|
|
if (strcmp(t->target.u.user.name,
|
2010-10-13 08:28:00 -06:00
|
|
|
XT_STANDARD_TARGET) == 0 &&
|
2009-11-23 15:17:06 -07:00
|
|
|
newpos >= 0) {
|
2005-04-16 16:20:36 -06:00
|
|
|
/* This a jump; chase it. */
|
2016-07-14 09:51:26 -06:00
|
|
|
if (!xt_find_jump_offset(offsets, newpos,
|
|
|
|
newinfo->number))
|
|
|
|
return 0;
|
2005-04-16 16:20:36 -06:00
|
|
|
} else {
|
|
|
|
/* ... this is a fallthru */
|
|
|
|
newpos = pos + e->next_offset;
|
2016-04-01 06:17:21 -06:00
|
|
|
if (newpos >= newinfo->size)
|
|
|
|
return 0;
|
2005-04-16 16:20:36 -06:00
|
|
|
}
|
2017-03-28 13:05:16 -06:00
|
|
|
e = entry0 + newpos;
|
2005-04-16 16:20:36 -06:00
|
|
|
e->counters.pcnt = pos;
|
|
|
|
pos = newpos;
|
|
|
|
}
|
|
|
|
}
|
2016-05-03 05:54:23 -06:00
|
|
|
next: ;
|
2005-04-16 16:20:36 -06:00
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2010-10-13 08:11:22 -06:00
|
|
|
static void cleanup_match(struct xt_entry_match *m, struct net *net)
|
2005-04-16 16:20:36 -06:00
|
|
|
{
|
2008-10-08 03:35:19 -06:00
|
|
|
struct xt_mtdtor_param par;
|
|
|
|
|
2010-01-18 00:25:47 -07:00
|
|
|
par.net = net;
|
2008-10-08 03:35:19 -06:00
|
|
|
par.match = m->u.kernel.match;
|
|
|
|
par.matchinfo = m->data;
|
2008-10-08 03:35:20 -06:00
|
|
|
par.family = NFPROTO_IPV4;
|
2008-10-08 03:35:19 -06:00
|
|
|
if (par.match->destroy != NULL)
|
|
|
|
par.match->destroy(&par);
|
|
|
|
module_put(par.match->me);
|
2005-04-16 16:20:36 -06:00
|
|
|
}
|
|
|
|
|
2008-01-15 00:44:05 -07:00
|
|
|
static int
|
2010-10-13 08:11:22 -06:00
|
|
|
check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
|
2006-12-12 01:29:26 -07:00
|
|
|
{
|
2008-10-08 03:35:18 -06:00
|
|
|
const struct ipt_ip *ip = par->entryinfo;
|
2006-12-12 01:29:26 -07:00
|
|
|
|
2008-10-08 03:35:18 -06:00
|
|
|
par->match = m->u.kernel.match;
|
|
|
|
par->matchinfo = m->data;
|
|
|
|
|
2016-05-03 05:54:23 -06:00
|
|
|
return xt_check_match(par, m->u.match_size - sizeof(*m),
|
|
|
|
ip->proto, ip->invflags & IPT_INV_PROTO);
|
2006-12-12 01:29:26 -07:00
|
|
|
}
|
|
|
|
|
2008-01-15 00:44:05 -07:00
|
|
|
static int
|
2010-10-13 08:11:22 -06:00
|
|
|
find_check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
|
2005-04-16 16:20:36 -06:00
|
|
|
{
|
2007-02-07 16:11:19 -07:00
|
|
|
struct xt_match *match;
|
2006-03-20 19:00:36 -07:00
|
|
|
int ret;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2009-07-10 11:27:47 -06:00
|
|
|
match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name,
|
|
|
|
m->u.user.revision);
|
2016-05-03 05:54:23 -06:00
|
|
|
if (IS_ERR(match))
|
2009-07-10 11:27:47 -06:00
|
|
|
return PTR_ERR(match);
|
2005-04-16 16:20:36 -06:00
|
|
|
m->u.kernel.match = match;
|
|
|
|
|
2010-02-24 10:35:37 -07:00
|
|
|
ret = check_match(m, par);
|
2006-03-20 19:00:36 -07:00
|
|
|
if (ret)
|
|
|
|
goto err;
|
|
|
|
|
2005-04-16 16:20:36 -06:00
|
|
|
return 0;
|
2006-03-20 19:00:36 -07:00
|
|
|
err:
|
|
|
|
module_put(m->u.kernel.match->me);
|
|
|
|
return ret;
|
2005-04-16 16:20:36 -06:00
|
|
|
}
|
|
|
|
|
2010-02-03 05:45:12 -07:00
|
|
|
static int check_target(struct ipt_entry *e, struct net *net, const char *name)
|
2006-12-12 01:29:26 -07:00
|
|
|
{
|
2010-10-13 08:11:22 -06:00
|
|
|
struct xt_entry_target *t = ipt_get_target(e);
|
2008-10-08 03:35:19 -06:00
|
|
|
struct xt_tgchk_param par = {
|
2010-02-03 05:45:12 -07:00
|
|
|
.net = net,
|
2008-10-08 03:35:19 -06:00
|
|
|
.table = name,
|
|
|
|
.entryinfo = e,
|
|
|
|
.target = t->u.kernel.target,
|
|
|
|
.targinfo = t->data,
|
|
|
|
.hook_mask = e->comefrom,
|
2008-10-08 03:35:20 -06:00
|
|
|
.family = NFPROTO_IPV4,
|
2008-10-08 03:35:19 -06:00
|
|
|
};
|
2006-12-12 01:29:26 -07:00
|
|
|
|
2016-05-03 05:54:23 -06:00
|
|
|
return xt_check_target(&par, t->u.target_size - sizeof(*t),
|
|
|
|
e->ip.proto, e->ip.invflags & IPT_INV_PROTO);
|
2006-12-12 01:29:26 -07:00
|
|
|
}
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2008-01-15 00:44:05 -07:00
|
|
|
static int
|
2010-01-18 00:21:13 -07:00
|
|
|
find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
|
2016-11-22 06:44:19 -07:00
|
|
|
unsigned int size,
|
|
|
|
struct xt_percpu_counter_alloc_state *alloc_state)
|
2005-04-16 16:20:36 -06:00
|
|
|
{
|
2010-10-13 08:11:22 -06:00
|
|
|
struct xt_entry_target *t;
|
2007-02-07 16:11:19 -07:00
|
|
|
struct xt_target *target;
|
2005-04-16 16:20:36 -06:00
|
|
|
int ret;
|
|
|
|
unsigned int j;
|
2008-10-08 03:35:18 -06:00
|
|
|
struct xt_mtchk_param mtpar;
|
2010-02-24 10:34:48 -07:00
|
|
|
struct xt_entry_match *ematch;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2016-11-22 06:44:19 -07:00
|
|
|
if (!xt_percpu_counter_alloc(alloc_state, &e->counters))
|
2015-06-10 17:34:54 -06:00
|
|
|
return -ENOMEM;
|
|
|
|
|
2005-04-16 16:20:36 -06:00
|
|
|
j = 0;
|
2010-01-18 00:21:13 -07:00
|
|
|
mtpar.net = net;
|
2008-10-08 03:35:18 -06:00
|
|
|
mtpar.table = name;
|
|
|
|
mtpar.entryinfo = &e->ip;
|
|
|
|
mtpar.hook_mask = e->comefrom;
|
2008-10-08 03:35:20 -06:00
|
|
|
mtpar.family = NFPROTO_IPV4;
|
2010-02-24 10:34:48 -07:00
|
|
|
xt_ematch_foreach(ematch, e) {
|
2010-02-24 10:35:37 -07:00
|
|
|
ret = find_check_match(ematch, &mtpar);
|
2010-02-24 10:34:48 -07:00
|
|
|
if (ret != 0)
|
2010-02-24 10:35:37 -07:00
|
|
|
goto cleanup_matches;
|
|
|
|
++j;
|
2010-02-24 10:34:48 -07:00
|
|
|
}
|
2005-04-16 16:20:36 -06:00
|
|
|
|
|
|
|
t = ipt_get_target(e);
|
2009-07-10 10:55:11 -06:00
|
|
|
target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name,
|
|
|
|
t->u.user.revision);
|
|
|
|
if (IS_ERR(target)) {
|
|
|
|
ret = PTR_ERR(target);
|
2005-04-16 16:20:36 -06:00
|
|
|
goto cleanup_matches;
|
|
|
|
}
|
|
|
|
t->u.kernel.target = target;
|
|
|
|
|
2010-02-03 05:45:12 -07:00
|
|
|
ret = check_target(e, net, name);
|
2006-03-20 19:00:36 -07:00
|
|
|
if (ret)
|
|
|
|
goto err;
|
2015-06-10 17:34:54 -06:00
|
|
|
|
2005-04-16 16:20:36 -06:00
|
|
|
return 0;
|
2006-03-20 19:00:36 -07:00
|
|
|
err:
|
|
|
|
module_put(t->u.kernel.target->me);
|
2005-04-16 16:20:36 -06:00
|
|
|
cleanup_matches:
|
2010-02-24 10:35:37 -07:00
|
|
|
xt_ematch_foreach(ematch, e) {
|
|
|
|
if (j-- == 0)
|
2010-02-24 10:34:48 -07:00
|
|
|
break;
|
2010-02-24 10:35:37 -07:00
|
|
|
cleanup_match(ematch, net);
|
|
|
|
}
|
2015-06-10 17:34:54 -06:00
|
|
|
|
2016-11-22 06:44:17 -07:00
|
|
|
xt_percpu_counter_free(&e->counters);
|
2015-06-10 17:34:54 -06:00
|
|
|
|
2005-04-16 16:20:36 -06:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2009-06-25 23:51:59 -06:00
|
|
|
static bool check_underflow(const struct ipt_entry *e)
|
2009-07-18 07:22:30 -06:00
|
|
|
{
|
2010-10-13 08:11:22 -06:00
|
|
|
const struct xt_entry_target *t;
|
2009-07-18 07:22:30 -06:00
|
|
|
unsigned int verdict;
|
|
|
|
|
2016-03-22 11:02:52 -06:00
|
|
|
if (!unconditional(e))
|
2009-07-18 07:22:30 -06:00
|
|
|
return false;
|
2009-06-25 23:51:59 -06:00
|
|
|
t = ipt_get_target_c(e);
|
2009-07-18 07:22:30 -06:00
|
|
|
if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
|
|
|
|
return false;
|
2010-10-13 08:11:22 -06:00
|
|
|
verdict = ((struct xt_standard_target *)t)->verdict;
|
2009-07-18 07:22:30 -06:00
|
|
|
verdict = -verdict - 1;
|
|
|
|
return verdict == NF_DROP || verdict == NF_ACCEPT;
|
|
|
|
}
|
|
|
|
|
2008-01-15 00:44:05 -07:00
|
|
|
static int
|
2005-04-16 16:20:36 -06:00
|
|
|
check_entry_size_and_hooks(struct ipt_entry *e,
|
[NETFILTER] x_tables: Abstraction layer for {ip,ip6,arp}_tables
This monster-patch tries to do the best job for unifying the data
structures and backend interfaces for the three evil clones ip_tables,
ip6_tables and arp_tables. In an ideal world we would never have
allowed this kind of copy+paste programming... but well, our world
isn't (yet?) ideal.
o introduce a new x_tables module
o {ip,arp,ip6}_tables depend on this x_tables module
o registration functions for tables, matches and targets are only
wrappers around x_tables provided functions
o all matches/targets that are used from ip_tables and ip6_tables
are now implemented as xt_FOOBAR.c files and provide module aliases
to ipt_FOOBAR and ip6t_FOOBAR
o header files for xt_matches are in include/linux/netfilter/,
include/linux/netfilter_{ipv4,ipv6} contains compatibility wrappers
around the xt_FOOBAR.h headers
Based on this patchset we're going to further unify the code,
gradually getting rid of all the layer 3 specific assumptions.
Signed-off-by: Harald Welte <laforge@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2006-01-12 14:30:04 -07:00
|
|
|
struct xt_table_info *newinfo,
|
2009-06-25 23:51:59 -06:00
|
|
|
const unsigned char *base,
|
|
|
|
const unsigned char *limit,
|
2005-04-16 16:20:36 -06:00
|
|
|
const unsigned int *hook_entries,
|
|
|
|
const unsigned int *underflows,
|
2010-02-24 10:33:43 -07:00
|
|
|
unsigned int valid_hooks)
|
2005-04-16 16:20:36 -06:00
|
|
|
{
|
|
|
|
unsigned int h;
|
2016-03-22 11:02:49 -06:00
|
|
|
int err;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2009-11-23 15:17:06 -07:00
|
|
|
if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 ||
|
2016-03-22 11:02:50 -06:00
|
|
|
(unsigned char *)e + sizeof(struct ipt_entry) >= limit ||
|
2016-05-03 05:54:23 -06:00
|
|
|
(unsigned char *)e + e->next_offset > limit)
|
2005-04-16 16:20:36 -06:00
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (e->next_offset
|
2016-05-03 05:54:23 -06:00
|
|
|
< sizeof(struct ipt_entry) + sizeof(struct xt_entry_target))
|
2005-04-16 16:20:36 -06:00
|
|
|
return -EINVAL;
|
|
|
|
|
2016-04-01 06:17:24 -06:00
|
|
|
if (!ip_checkentry(&e->ip))
|
|
|
|
return -EINVAL;
|
|
|
|
|
2016-04-01 06:17:28 -06:00
|
|
|
err = xt_check_entry_offsets(e, e->elems, e->target_offset,
|
|
|
|
e->next_offset);
|
2016-03-22 11:02:49 -06:00
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
2005-04-16 16:20:36 -06:00
|
|
|
/* Check hooks & underflows */
|
2007-11-19 19:53:30 -07:00
|
|
|
for (h = 0; h < NF_INET_NUMHOOKS; h++) {
|
2009-07-18 06:52:58 -06:00
|
|
|
if (!(valid_hooks & (1 << h)))
|
|
|
|
continue;
|
2005-04-16 16:20:36 -06:00
|
|
|
if ((unsigned char *)e - base == hook_entries[h])
|
|
|
|
newinfo->hook_entry[h] = hook_entries[h];
|
2009-07-09 14:54:53 -06:00
|
|
|
if ((unsigned char *)e - base == underflows[h]) {
|
2016-05-03 05:54:23 -06:00
|
|
|
if (!check_underflow(e))
|
2009-07-09 14:54:53 -06:00
|
|
|
return -EINVAL;
|
2016-05-03 05:54:23 -06:00
|
|
|
|
2005-04-16 16:20:36 -06:00
|
|
|
newinfo->underflow[h] = underflows[h];
|
2009-07-09 14:54:53 -06:00
|
|
|
}
|
2005-04-16 16:20:36 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Clear counters and comefrom */
|
[NETFILTER] x_tables: Abstraction layer for {ip,ip6,arp}_tables
This monster-patch tries to do the best job for unifying the data
structures and backend interfaces for the three evil clones ip_tables,
ip6_tables and arp_tables. In an ideal world we would never have
allowed this kind of copy+paste programming... but well, our world
isn't (yet?) ideal.
o introduce a new x_tables module
o {ip,arp,ip6}_tables depend on this x_tables module
o registration functions for tables, matches and targets are only
wrappers around x_tables provided functions
o all matches/targets that are used from ip_tables and ip6_tables
are now implemented as xt_FOOBAR.c files and provide module aliases
to ipt_FOOBAR and ip6t_FOOBAR
o header files for xt_matches are in include/linux/netfilter/,
include/linux/netfilter_{ipv4,ipv6} contains compatibility wrappers
around the xt_FOOBAR.h headers
Based on this patchset we're going to further unify the code,
gradually getting rid of all the layer 3 specific assumptions.
Signed-off-by: Harald Welte <laforge@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2006-01-12 14:30:04 -07:00
|
|
|
e->counters = ((struct xt_counters) { 0, 0 });
|
2005-04-16 16:20:36 -06:00
|
|
|
e->comefrom = 0;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2010-02-24 10:33:43 -07:00
|
|
|
static void
|
|
|
|
cleanup_entry(struct ipt_entry *e, struct net *net)
|
2005-04-16 16:20:36 -06:00
|
|
|
{
|
2008-10-08 03:35:19 -06:00
|
|
|
struct xt_tgdtor_param par;
|
2010-10-13 08:11:22 -06:00
|
|
|
struct xt_entry_target *t;
|
2010-02-24 10:34:48 -07:00
|
|
|
struct xt_entry_match *ematch;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
|
|
|
/* Cleanup all matches */
|
2010-02-24 10:34:48 -07:00
|
|
|
xt_ematch_foreach(ematch, e)
|
2010-02-24 10:35:37 -07:00
|
|
|
cleanup_match(ematch, net);
|
2005-04-16 16:20:36 -06:00
|
|
|
t = ipt_get_target(e);
|
2008-10-08 03:35:19 -06:00
|
|
|
|
2010-02-03 05:45:12 -07:00
|
|
|
par.net = net;
|
2008-10-08 03:35:19 -06:00
|
|
|
par.target = t->u.kernel.target;
|
|
|
|
par.targinfo = t->data;
|
2008-10-08 03:35:20 -06:00
|
|
|
par.family = NFPROTO_IPV4;
|
2008-10-08 03:35:19 -06:00
|
|
|
if (par.target->destroy != NULL)
|
|
|
|
par.target->destroy(&par);
|
|
|
|
module_put(par.target->me);
|
2016-11-22 06:44:17 -07:00
|
|
|
xt_percpu_counter_free(&e->counters);
|
2005-04-16 16:20:36 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Checks and translates the user-supplied table segment (held in
|
|
|
|
newinfo) */
|
|
|
|
static int
|
2010-02-24 10:36:04 -07:00
|
|
|
translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
|
2015-10-14 16:17:06 -06:00
|
|
|
const struct ipt_replace *repl)
|
2005-04-16 16:20:36 -06:00
|
|
|
{
|
2016-11-22 06:44:19 -07:00
|
|
|
struct xt_percpu_counter_alloc_state alloc_state = { 0 };
|
2010-02-24 10:32:59 -07:00
|
|
|
struct ipt_entry *iter;
|
2016-07-14 09:51:26 -06:00
|
|
|
unsigned int *offsets;
|
2005-04-16 16:20:36 -06:00
|
|
|
unsigned int i;
|
2010-02-24 10:32:59 -07:00
|
|
|
int ret = 0;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2010-02-24 10:36:04 -07:00
|
|
|
newinfo->size = repl->size;
|
|
|
|
newinfo->number = repl->num_entries;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
|
|
|
/* Init all hooks to impossible value. */
|
2007-11-19 19:53:30 -07:00
|
|
|
for (i = 0; i < NF_INET_NUMHOOKS; i++) {
|
2005-04-16 16:20:36 -06:00
|
|
|
newinfo->hook_entry[i] = 0xFFFFFFFF;
|
|
|
|
newinfo->underflow[i] = 0xFFFFFFFF;
|
|
|
|
}
|
|
|
|
|
2016-07-14 09:51:26 -06:00
|
|
|
offsets = xt_alloc_entry_offsets(newinfo->number);
|
|
|
|
if (!offsets)
|
|
|
|
return -ENOMEM;
|
2005-04-16 16:20:36 -06:00
|
|
|
i = 0;
|
|
|
|
/* Walk through entries, checking offsets. */
|
2010-02-24 10:32:59 -07:00
|
|
|
xt_entry_foreach(iter, entry0, newinfo->size) {
|
|
|
|
ret = check_entry_size_and_hooks(iter, newinfo, entry0,
|
2010-02-26 09:53:31 -07:00
|
|
|
entry0 + repl->size,
|
|
|
|
repl->hook_entry,
|
|
|
|
repl->underflow,
|
|
|
|
repl->valid_hooks);
|
2010-02-24 10:32:59 -07:00
|
|
|
if (ret != 0)
|
2016-07-14 09:51:26 -06:00
|
|
|
goto out_free;
|
|
|
|
if (i < repl->num_entries)
|
|
|
|
offsets[i] = (void *)iter - entry0;
|
2010-02-24 10:33:43 -07:00
|
|
|
++i;
|
2015-08-26 15:20:51 -06:00
|
|
|
if (strcmp(ipt_get_target(iter)->u.user.name,
|
|
|
|
XT_ERROR_TARGET) == 0)
|
|
|
|
++newinfo->stacksize;
|
2010-02-24 10:32:59 -07:00
|
|
|
}
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2016-07-14 09:51:26 -06:00
|
|
|
ret = -EINVAL;
|
2016-05-03 05:54:23 -06:00
|
|
|
if (i != repl->num_entries)
|
2016-07-14 09:51:26 -06:00
|
|
|
goto out_free;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
|
|
|
/* Check hooks all assigned */
|
2007-11-19 19:53:30 -07:00
|
|
|
for (i = 0; i < NF_INET_NUMHOOKS; i++) {
|
2005-04-16 16:20:36 -06:00
|
|
|
/* Only hooks which are valid */
|
2010-02-24 10:36:04 -07:00
|
|
|
if (!(repl->valid_hooks & (1 << i)))
|
2005-04-16 16:20:36 -06:00
|
|
|
continue;
|
2016-05-03 05:54:23 -06:00
|
|
|
if (newinfo->hook_entry[i] == 0xFFFFFFFF)
|
2016-07-14 09:51:26 -06:00
|
|
|
goto out_free;
|
2016-05-03 05:54:23 -06:00
|
|
|
if (newinfo->underflow[i] == 0xFFFFFFFF)
|
2016-07-14 09:51:26 -06:00
|
|
|
goto out_free;
|
2005-04-16 16:20:36 -06:00
|
|
|
}
|
|
|
|
|
2016-07-14 09:51:26 -06:00
|
|
|
if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) {
|
|
|
|
ret = -ELOOP;
|
|
|
|
goto out_free;
|
|
|
|
}
|
|
|
|
kvfree(offsets);
|
2006-12-05 14:43:50 -07:00
|
|
|
|
2005-04-16 16:20:36 -06:00
|
|
|
/* Finally, each sanity check must pass */
|
|
|
|
i = 0;
|
2010-02-24 10:32:59 -07:00
|
|
|
xt_entry_foreach(iter, entry0, newinfo->size) {
|
2016-11-22 06:44:19 -07:00
|
|
|
ret = find_check_entry(iter, net, repl->name, repl->size,
|
|
|
|
&alloc_state);
|
2010-02-24 10:32:59 -07:00
|
|
|
if (ret != 0)
|
|
|
|
break;
|
2010-02-24 10:33:43 -07:00
|
|
|
++i;
|
2010-02-24 10:32:59 -07:00
|
|
|
}
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2006-12-05 14:43:50 -07:00
|
|
|
if (ret != 0) {
|
2010-02-24 10:33:43 -07:00
|
|
|
xt_entry_foreach(iter, entry0, newinfo->size) {
|
|
|
|
if (i-- == 0)
|
2010-02-24 10:32:59 -07:00
|
|
|
break;
|
2010-02-24 10:33:43 -07:00
|
|
|
cleanup_entry(iter, net);
|
|
|
|
}
|
2006-12-05 14:43:50 -07:00
|
|
|
return ret;
|
|
|
|
}
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2016-07-14 09:51:26 -06:00
|
|
|
return ret;
|
|
|
|
out_free:
|
|
|
|
kvfree(offsets);
|
2005-04-16 16:20:36 -06:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
[NETFILTER] x_tables: Abstraction layer for {ip,ip6,arp}_tables
This monster-patch tries to do the best job for unifying the data
structures and backend interfaces for the three evil clones ip_tables,
ip6_tables and arp_tables. In an ideal world we would never have
allowed this kind of copy+paste programming... but well, our world
isn't (yet?) ideal.
o introduce a new x_tables module
o {ip,arp,ip6}_tables depend on this x_tables module
o registration functions for tables, matches and targets are only
wrappers around x_tables provided functions
o all matches/targets that are used from ip_tables and ip6_tables
are now implemented as xt_FOOBAR.c files and provide module aliases
to ipt_FOOBAR and ip6t_FOOBAR
o header files for xt_matches are in include/linux/netfilter/,
include/linux/netfilter_{ipv4,ipv6} contains compatibility wrappers
around the xt_FOOBAR.h headers
Based on this patchset we're going to further unify the code,
gradually getting rid of all the layer 3 specific assumptions.
Signed-off-by: Harald Welte <laforge@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2006-01-12 14:30:04 -07:00
|
|
|
get_counters(const struct xt_table_info *t,
|
|
|
|
struct xt_counters counters[])
|
2005-04-16 16:20:36 -06:00
|
|
|
{
|
2010-02-24 10:32:59 -07:00
|
|
|
struct ipt_entry *iter;
|
2005-04-16 16:20:36 -06:00
|
|
|
unsigned int cpu;
|
|
|
|
unsigned int i;
|
|
|
|
|
2006-04-10 23:52:50 -06:00
|
|
|
for_each_possible_cpu(cpu) {
|
2011-04-04 09:04:03 -06:00
|
|
|
seqcount_t *s = &per_cpu(xt_recseq, cpu);
|
2011-01-10 12:11:38 -07:00
|
|
|
|
2005-04-16 16:20:36 -06:00
|
|
|
i = 0;
|
2015-06-10 17:34:55 -06:00
|
|
|
xt_entry_foreach(iter, t->entries, t->size) {
|
2015-06-10 17:34:54 -06:00
|
|
|
struct xt_counters *tmp;
|
2011-01-10 12:11:38 -07:00
|
|
|
u64 bcnt, pcnt;
|
|
|
|
unsigned int start;
|
|
|
|
|
2015-06-10 17:34:54 -06:00
|
|
|
tmp = xt_get_per_cpu_counter(&iter->counters, cpu);
|
2011-01-10 12:11:38 -07:00
|
|
|
do {
|
2011-04-04 09:04:03 -06:00
|
|
|
start = read_seqcount_begin(s);
|
2015-06-10 17:34:54 -06:00
|
|
|
bcnt = tmp->bcnt;
|
|
|
|
pcnt = tmp->pcnt;
|
2011-04-04 09:04:03 -06:00
|
|
|
} while (read_seqcount_retry(s, start));
|
2011-01-10 12:11:38 -07:00
|
|
|
|
|
|
|
ADD_COUNTER(counters[i], bcnt, pcnt);
|
2010-02-24 10:33:43 -07:00
|
|
|
++i; /* macro does multi eval of i */
|
2017-09-01 14:41:03 -06:00
|
|
|
cond_resched();
|
2010-02-24 10:33:43 -07:00
|
|
|
}
|
2005-04-16 16:20:36 -06:00
|
|
|
}
|
2009-02-20 02:35:32 -07:00
|
|
|
}
|
|
|
|
|
2017-10-11 17:13:51 -06:00
|
|
|
static void get_old_counters(const struct xt_table_info *t,
|
|
|
|
struct xt_counters counters[])
|
|
|
|
{
|
|
|
|
struct ipt_entry *iter;
|
|
|
|
unsigned int cpu, i;
|
|
|
|
|
|
|
|
for_each_possible_cpu(cpu) {
|
|
|
|
i = 0;
|
|
|
|
xt_entry_foreach(iter, t->entries, t->size) {
|
|
|
|
const struct xt_counters *tmp;
|
|
|
|
|
|
|
|
tmp = xt_get_per_cpu_counter(&iter->counters, cpu);
|
|
|
|
ADD_COUNTER(counters[i], tmp->bcnt, tmp->pcnt);
|
|
|
|
++i; /* macro does multi eval of i */
|
|
|
|
}
|
|
|
|
|
|
|
|
cond_resched();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-06-25 23:51:59 -06:00
|
|
|
static struct xt_counters *alloc_counters(const struct xt_table *table)
|
2005-04-16 16:20:36 -06:00
|
|
|
{
|
2006-04-01 03:25:19 -07:00
|
|
|
unsigned int countersize;
|
[NETFILTER] x_tables: Abstraction layer for {ip,ip6,arp}_tables
This monster-patch tries to do the best job for unifying the data
structures and backend interfaces for the three evil clones ip_tables,
ip6_tables and arp_tables. In an ideal world we would never have
allowed this kind of copy+paste programming... but well, our world
isn't (yet?) ideal.
o introduce a new x_tables module
o {ip,arp,ip6}_tables depend on this x_tables module
o registration functions for tables, matches and targets are only
wrappers around x_tables provided functions
o all matches/targets that are used from ip_tables and ip6_tables
are now implemented as xt_FOOBAR.c files and provide module aliases
to ipt_FOOBAR and ip6t_FOOBAR
o header files for xt_matches are in include/linux/netfilter/,
include/linux/netfilter_{ipv4,ipv6} contains compatibility wrappers
around the xt_FOOBAR.h headers
Based on this patchset we're going to further unify the code,
gradually getting rid of all the layer 3 specific assumptions.
Signed-off-by: Harald Welte <laforge@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2006-01-12 14:30:04 -07:00
|
|
|
struct xt_counters *counters;
|
2009-06-25 23:51:59 -06:00
|
|
|
const struct xt_table_info *private = table->private;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
|
|
|
/* We need atomic snapshot of counters: rest doesn't change
|
|
|
|
(other than comefrom, which userspace doesn't care
|
|
|
|
about). */
|
[NETFILTER] x_tables: Abstraction layer for {ip,ip6,arp}_tables
This monster-patch tries to do the best job for unifying the data
structures and backend interfaces for the three evil clones ip_tables,
ip6_tables and arp_tables. In an ideal world we would never have
allowed this kind of copy+paste programming... but well, our world
isn't (yet?) ideal.
o introduce a new x_tables module
o {ip,arp,ip6}_tables depend on this x_tables module
o registration functions for tables, matches and targets are only
wrappers around x_tables provided functions
o all matches/targets that are used from ip_tables and ip6_tables
are now implemented as xt_FOOBAR.c files and provide module aliases
to ipt_FOOBAR and ip6t_FOOBAR
o header files for xt_matches are in include/linux/netfilter/,
include/linux/netfilter_{ipv4,ipv6} contains compatibility wrappers
around the xt_FOOBAR.h headers
Based on this patchset we're going to further unify the code,
gradually getting rid of all the layer 3 specific assumptions.
Signed-off-by: Harald Welte <laforge@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2006-01-12 14:30:04 -07:00
|
|
|
countersize = sizeof(struct xt_counters) * private->number;
|
2011-01-10 12:11:38 -07:00
|
|
|
counters = vzalloc(countersize);
|
2005-04-16 16:20:36 -06:00
|
|
|
|
|
|
|
if (counters == NULL)
|
2009-04-28 23:36:33 -06:00
|
|
|
return ERR_PTR(-ENOMEM);
|
2009-02-20 02:35:32 -07:00
|
|
|
|
2009-04-28 23:36:33 -06:00
|
|
|
get_counters(private, counters);
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2006-04-01 03:25:19 -07:00
|
|
|
return counters;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
copy_entries_to_user(unsigned int total_size,
|
2009-06-25 23:51:59 -06:00
|
|
|
const struct xt_table *table,
|
2006-04-01 03:25:19 -07:00
|
|
|
void __user *userptr)
|
|
|
|
{
|
|
|
|
unsigned int off, num;
|
2009-06-25 23:51:59 -06:00
|
|
|
const struct ipt_entry *e;
|
2006-04-01 03:25:19 -07:00
|
|
|
struct xt_counters *counters;
|
2008-04-14 03:15:35 -06:00
|
|
|
const struct xt_table_info *private = table->private;
|
2006-04-01 03:25:19 -07:00
|
|
|
int ret = 0;
|
2015-06-15 10:57:30 -06:00
|
|
|
const void *loc_cpu_entry;
|
2006-04-01 03:25:19 -07:00
|
|
|
|
|
|
|
counters = alloc_counters(table);
|
|
|
|
if (IS_ERR(counters))
|
|
|
|
return PTR_ERR(counters);
|
|
|
|
|
2015-06-10 17:34:55 -06:00
|
|
|
loc_cpu_entry = private->entries;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
|
|
|
/* FIXME: use iterator macros --RR */
|
|
|
|
/* ... then go back and fix counters and names */
|
|
|
|
for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
|
|
|
|
unsigned int i;
|
2010-10-13 08:11:22 -06:00
|
|
|
const struct xt_entry_match *m;
|
|
|
|
const struct xt_entry_target *t;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2017-03-28 13:05:16 -06:00
|
|
|
e = loc_cpu_entry + off;
|
2017-01-02 15:19:41 -07:00
|
|
|
if (copy_to_user(userptr + off, e, sizeof(*e))) {
|
|
|
|
ret = -EFAULT;
|
|
|
|
goto free_counters;
|
|
|
|
}
|
2005-04-16 16:20:36 -06:00
|
|
|
if (copy_to_user(userptr + off
|
|
|
|
+ offsetof(struct ipt_entry, counters),
|
|
|
|
&counters[num],
|
|
|
|
sizeof(counters[num])) != 0) {
|
|
|
|
ret = -EFAULT;
|
|
|
|
goto free_counters;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = sizeof(struct ipt_entry);
|
|
|
|
i < e->target_offset;
|
|
|
|
i += m->u.match_size) {
|
|
|
|
m = (void *)e + i;
|
|
|
|
|
2017-01-02 15:19:41 -07:00
|
|
|
if (xt_match_to_user(m, userptr + off + i)) {
|
2005-04-16 16:20:36 -06:00
|
|
|
ret = -EFAULT;
|
|
|
|
goto free_counters;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-06-25 23:51:59 -06:00
|
|
|
t = ipt_get_target_c(e);
|
2017-01-02 15:19:41 -07:00
|
|
|
if (xt_target_to_user(t, userptr + off + e->target_offset)) {
|
2005-04-16 16:20:36 -06:00
|
|
|
ret = -EFAULT;
|
|
|
|
goto free_counters;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
free_counters:
|
|
|
|
vfree(counters);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2006-04-01 03:25:19 -07:00
|
|
|
#ifdef CONFIG_COMPAT
|
2009-06-26 00:23:19 -06:00
|
|
|
static void compat_standard_from_user(void *dst, const void *src)
|
2006-04-01 03:25:19 -07:00
|
|
|
{
|
2006-09-20 13:05:37 -06:00
|
|
|
int v = *(compat_int_t *)src;
|
2006-04-01 03:25:19 -07:00
|
|
|
|
2006-09-20 13:05:37 -06:00
|
|
|
if (v > 0)
|
2007-12-17 22:47:48 -07:00
|
|
|
v += xt_compat_calc_jump(AF_INET, v);
|
2006-09-20 13:05:37 -06:00
|
|
|
memcpy(dst, &v, sizeof(v));
|
|
|
|
}
|
2006-05-01 21:12:22 -06:00
|
|
|
|
2009-06-26 00:23:19 -06:00
|
|
|
static int compat_standard_to_user(void __user *dst, const void *src)
|
2006-04-01 03:25:19 -07:00
|
|
|
{
|
2006-09-20 13:05:37 -06:00
|
|
|
compat_int_t cv = *(int *)src;
|
2006-04-01 03:25:19 -07:00
|
|
|
|
2006-09-20 13:05:37 -06:00
|
|
|
if (cv > 0)
|
2007-12-17 22:47:48 -07:00
|
|
|
cv -= xt_compat_calc_jump(AF_INET, cv);
|
2006-09-20 13:05:37 -06:00
|
|
|
return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
|
2006-04-01 03:25:19 -07:00
|
|
|
}
|
|
|
|
|
2009-06-25 23:51:59 -06:00
|
|
|
static int compat_calc_entry(const struct ipt_entry *e,
|
2007-12-17 22:46:15 -07:00
|
|
|
const struct xt_table_info *info,
|
2009-06-25 23:51:59 -06:00
|
|
|
const void *base, struct xt_table_info *newinfo)
|
2006-04-01 03:25:19 -07:00
|
|
|
{
|
2010-02-24 10:34:48 -07:00
|
|
|
const struct xt_entry_match *ematch;
|
2010-10-13 08:11:22 -06:00
|
|
|
const struct xt_entry_target *t;
|
2007-01-04 13:14:41 -07:00
|
|
|
unsigned int entry_offset;
|
2006-04-01 03:25:19 -07:00
|
|
|
int off, i, ret;
|
|
|
|
|
2007-12-17 22:47:14 -07:00
|
|
|
off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
|
2006-04-01 03:25:19 -07:00
|
|
|
entry_offset = (void *)e - base;
|
2010-02-24 10:34:48 -07:00
|
|
|
xt_ematch_foreach(ematch, e)
|
2010-02-24 10:35:37 -07:00
|
|
|
off += xt_compat_match_offset(ematch->u.kernel.match);
|
2009-06-25 23:51:59 -06:00
|
|
|
t = ipt_get_target_c(e);
|
2006-09-20 13:05:37 -06:00
|
|
|
off += xt_compat_target_offset(t->u.kernel.target);
|
2006-04-01 03:25:19 -07:00
|
|
|
newinfo->size -= off;
|
2007-12-17 22:47:48 -07:00
|
|
|
ret = xt_compat_add_offset(AF_INET, entry_offset, off);
|
2006-04-01 03:25:19 -07:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2007-11-19 19:53:30 -07:00
|
|
|
for (i = 0; i < NF_INET_NUMHOOKS; i++) {
|
2007-12-17 22:46:15 -07:00
|
|
|
if (info->hook_entry[i] &&
|
|
|
|
(e < (struct ipt_entry *)(base + info->hook_entry[i])))
|
2006-04-01 03:25:19 -07:00
|
|
|
newinfo->hook_entry[i] -= off;
|
2007-12-17 22:46:15 -07:00
|
|
|
if (info->underflow[i] &&
|
|
|
|
(e < (struct ipt_entry *)(base + info->underflow[i])))
|
2006-04-01 03:25:19 -07:00
|
|
|
newinfo->underflow[i] -= off;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2007-12-05 00:24:56 -07:00
|
|
|
static int compat_table_info(const struct xt_table_info *info,
|
2007-12-17 22:46:15 -07:00
|
|
|
struct xt_table_info *newinfo)
|
2006-04-01 03:25:19 -07:00
|
|
|
{
|
2010-02-24 10:32:59 -07:00
|
|
|
struct ipt_entry *iter;
|
2015-06-15 10:57:30 -06:00
|
|
|
const void *loc_cpu_entry;
|
2010-02-24 10:33:43 -07:00
|
|
|
int ret;
|
2006-04-01 03:25:19 -07:00
|
|
|
|
|
|
|
if (!newinfo || !info)
|
|
|
|
return -EINVAL;
|
|
|
|
|
2015-06-10 17:34:55 -06:00
|
|
|
/* we dont care about newinfo->entries */
|
2007-12-05 00:24:56 -07:00
|
|
|
memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
|
|
|
|
newinfo->initial_entries = 0;
|
2015-06-10 17:34:55 -06:00
|
|
|
loc_cpu_entry = info->entries;
|
2010-12-18 10:35:15 -07:00
|
|
|
xt_compat_init_offsets(AF_INET, info->number);
|
2010-02-24 10:32:59 -07:00
|
|
|
xt_entry_foreach(iter, loc_cpu_entry, info->size) {
|
|
|
|
ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
|
|
|
|
if (ret != 0)
|
2010-02-24 10:33:43 -07:00
|
|
|
return ret;
|
2010-02-24 10:32:59 -07:00
|
|
|
}
|
2010-02-24 10:33:43 -07:00
|
|
|
return 0;
|
2006-04-01 03:25:19 -07:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2009-06-25 23:51:59 -06:00
|
|
|
static int get_info(struct net *net, void __user *user,
|
2015-10-14 16:17:06 -06:00
|
|
|
const int *len, int compat)
|
2006-04-01 03:25:19 -07:00
|
|
|
{
|
2010-10-13 07:56:56 -06:00
|
|
|
char name[XT_TABLE_MAXNAMELEN];
|
2007-02-07 16:12:33 -07:00
|
|
|
struct xt_table *t;
|
2006-04-01 03:25:19 -07:00
|
|
|
int ret;
|
|
|
|
|
2016-05-03 05:54:23 -06:00
|
|
|
if (*len != sizeof(struct ipt_getinfo))
|
2006-04-01 03:25:19 -07:00
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (copy_from_user(name, user, sizeof(name)) != 0)
|
|
|
|
return -EFAULT;
|
|
|
|
|
2010-10-13 07:56:56 -06:00
|
|
|
name[XT_TABLE_MAXNAMELEN-1] = '\0';
|
2006-04-01 03:25:19 -07:00
|
|
|
#ifdef CONFIG_COMPAT
|
|
|
|
if (compat)
|
|
|
|
xt_compat_lock(AF_INET);
|
|
|
|
#endif
|
2008-01-31 05:03:03 -07:00
|
|
|
t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
|
2007-12-17 22:46:15 -07:00
|
|
|
"iptable_%s", name);
|
2016-11-11 05:32:38 -07:00
|
|
|
if (t) {
|
2006-04-01 03:25:19 -07:00
|
|
|
struct ipt_getinfo info;
|
2008-04-14 03:15:35 -06:00
|
|
|
const struct xt_table_info *private = t->private;
|
2006-04-01 03:25:19 -07:00
|
|
|
#ifdef CONFIG_COMPAT
|
2010-02-08 12:17:43 -07:00
|
|
|
struct xt_table_info tmp;
|
|
|
|
|
2006-04-01 03:25:19 -07:00
|
|
|
if (compat) {
|
|
|
|
ret = compat_table_info(private, &tmp);
|
2007-12-17 22:47:48 -07:00
|
|
|
xt_compat_flush_offsets(AF_INET);
|
2007-12-17 22:46:15 -07:00
|
|
|
private = &tmp;
|
2006-04-01 03:25:19 -07:00
|
|
|
}
|
|
|
|
#endif
|
2010-11-03 01:45:06 -06:00
|
|
|
memset(&info, 0, sizeof(info));
|
2006-04-01 03:25:19 -07:00
|
|
|
info.valid_hooks = t->valid_hooks;
|
|
|
|
memcpy(info.hook_entry, private->hook_entry,
|
2007-12-17 22:46:15 -07:00
|
|
|
sizeof(info.hook_entry));
|
2006-04-01 03:25:19 -07:00
|
|
|
memcpy(info.underflow, private->underflow,
|
2007-12-17 22:46:15 -07:00
|
|
|
sizeof(info.underflow));
|
2006-04-01 03:25:19 -07:00
|
|
|
info.num_entries = private->number;
|
|
|
|
info.size = private->size;
|
|
|
|
strcpy(info.name, name);
|
|
|
|
|
|
|
|
if (copy_to_user(user, &info, *len) != 0)
|
|
|
|
ret = -EFAULT;
|
|
|
|
else
|
|
|
|
ret = 0;
|
|
|
|
|
|
|
|
xt_table_unlock(t);
|
|
|
|
module_put(t->me);
|
|
|
|
} else
|
2016-11-11 05:32:38 -07:00
|
|
|
ret = -ENOENT;
|
2006-04-01 03:25:19 -07:00
|
|
|
#ifdef CONFIG_COMPAT
|
|
|
|
if (compat)
|
|
|
|
xt_compat_unlock(AF_INET);
|
|
|
|
#endif
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2009-06-25 23:51:59 -06:00
|
|
|
get_entries(struct net *net, struct ipt_get_entries __user *uptr,
|
|
|
|
const int *len)
|
2006-04-01 03:25:19 -07:00
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
struct ipt_get_entries get;
|
2007-02-07 16:12:33 -07:00
|
|
|
struct xt_table *t;
|
2006-04-01 03:25:19 -07:00
|
|
|
|
2016-05-03 05:54:23 -06:00
|
|
|
if (*len < sizeof(get))
|
2006-04-01 03:25:19 -07:00
|
|
|
return -EINVAL;
|
|
|
|
if (copy_from_user(&get, uptr, sizeof(get)) != 0)
|
|
|
|
return -EFAULT;
|
2016-05-03 05:54:23 -06:00
|
|
|
if (*len != sizeof(struct ipt_get_entries) + get.size)
|
2006-04-01 03:25:19 -07:00
|
|
|
return -EINVAL;
|
2016-03-24 14:29:53 -06:00
|
|
|
get.name[sizeof(get.name) - 1] = '\0';
|
2006-04-01 03:25:19 -07:00
|
|
|
|
2008-01-31 05:03:03 -07:00
|
|
|
t = xt_find_table_lock(net, AF_INET, get.name);
|
2016-11-11 05:32:38 -07:00
|
|
|
if (t) {
|
2008-04-14 03:15:35 -06:00
|
|
|
const struct xt_table_info *private = t->private;
|
2006-04-01 03:25:19 -07:00
|
|
|
if (get.size == private->size)
|
|
|
|
ret = copy_entries_to_user(private->size,
|
|
|
|
t, uptr->entrytable);
|
2016-05-03 05:54:23 -06:00
|
|
|
else
|
2008-04-14 03:15:45 -06:00
|
|
|
ret = -EAGAIN;
|
2016-05-03 05:54:23 -06:00
|
|
|
|
2006-04-01 03:25:19 -07:00
|
|
|
module_put(t->me);
|
|
|
|
xt_table_unlock(t);
|
|
|
|
} else
|
2016-11-11 05:32:38 -07:00
|
|
|
ret = -ENOENT;
|
2006-04-01 03:25:19 -07:00
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2008-01-31 05:03:03 -07:00
|
|
|
__do_replace(struct net *net, const char *name, unsigned int valid_hooks,
|
2007-12-17 22:46:15 -07:00
|
|
|
struct xt_table_info *newinfo, unsigned int num_counters,
|
|
|
|
void __user *counters_ptr)
|
2006-04-01 03:25:19 -07:00
|
|
|
{
|
|
|
|
int ret;
|
2007-02-07 16:12:33 -07:00
|
|
|
struct xt_table *t;
|
2006-04-01 03:25:19 -07:00
|
|
|
struct xt_table_info *oldinfo;
|
|
|
|
struct xt_counters *counters;
|
2010-02-24 10:32:59 -07:00
|
|
|
struct ipt_entry *iter;
|
2006-04-01 03:25:19 -07:00
|
|
|
|
|
|
|
ret = 0;
|
2011-01-10 12:11:38 -07:00
|
|
|
counters = vzalloc(num_counters * sizeof(struct xt_counters));
|
2006-04-01 03:25:19 -07:00
|
|
|
if (!counters) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2008-01-31 05:03:03 -07:00
|
|
|
t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
|
2006-04-01 03:25:19 -07:00
|
|
|
"iptable_%s", name);
|
2016-11-11 05:32:38 -07:00
|
|
|
if (!t) {
|
|
|
|
ret = -ENOENT;
|
2006-04-01 03:25:19 -07:00
|
|
|
goto free_newinfo_counters_untrans;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* You lied! */
|
|
|
|
if (valid_hooks != t->valid_hooks) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto put_module;
|
|
|
|
}
|
|
|
|
|
|
|
|
oldinfo = xt_replace_table(t, num_counters, newinfo, &ret);
|
|
|
|
if (!oldinfo)
|
|
|
|
goto put_module;
|
|
|
|
|
|
|
|
/* Update module usage count based on number of rules */
|
|
|
|
if ((oldinfo->number > oldinfo->initial_entries) ||
|
|
|
|
(newinfo->number <= oldinfo->initial_entries))
|
|
|
|
module_put(t->me);
|
|
|
|
if ((oldinfo->number > oldinfo->initial_entries) &&
|
|
|
|
(newinfo->number <= oldinfo->initial_entries))
|
|
|
|
module_put(t->me);
|
|
|
|
|
2017-10-11 17:13:51 -06:00
|
|
|
get_old_counters(oldinfo, counters);
|
2009-04-28 23:36:33 -06:00
|
|
|
|
2006-04-01 03:25:19 -07:00
|
|
|
/* Decrease module usage counts and free resource */
|
2015-06-10 17:34:55 -06:00
|
|
|
xt_entry_foreach(iter, oldinfo->entries, oldinfo->size)
|
2010-02-24 10:33:43 -07:00
|
|
|
cleanup_entry(iter, net);
|
2010-02-24 10:32:59 -07:00
|
|
|
|
2006-04-01 03:25:19 -07:00
|
|
|
xt_free_table_info(oldinfo);
|
|
|
|
if (copy_to_user(counters_ptr, counters,
|
2014-04-04 09:57:45 -06:00
|
|
|
sizeof(struct xt_counters) * num_counters) != 0) {
|
|
|
|
/* Silent error, can't fail, new table is already in place */
|
|
|
|
net_warn_ratelimited("iptables: counters copy to user failed while replacing table\n");
|
|
|
|
}
|
2006-04-01 03:25:19 -07:00
|
|
|
vfree(counters);
|
|
|
|
xt_table_unlock(t);
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
put_module:
|
|
|
|
module_put(t->me);
|
|
|
|
xt_table_unlock(t);
|
|
|
|
free_newinfo_counters_untrans:
|
|
|
|
vfree(counters);
|
|
|
|
out:
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2009-06-25 23:51:59 -06:00
|
|
|
do_replace(struct net *net, const void __user *user, unsigned int len)
|
2006-04-01 03:25:19 -07:00
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
struct ipt_replace tmp;
|
|
|
|
struct xt_table_info *newinfo;
|
|
|
|
void *loc_cpu_entry;
|
2010-02-24 10:32:59 -07:00
|
|
|
struct ipt_entry *iter;
|
2006-04-01 03:25:19 -07:00
|
|
|
|
|
|
|
if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
/* overflow check */
|
|
|
|
if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
|
|
|
|
return -ENOMEM;
|
2015-05-19 18:55:17 -06:00
|
|
|
if (tmp.num_counters == 0)
|
|
|
|
return -EINVAL;
|
|
|
|
|
2011-03-15 06:36:05 -06:00
|
|
|
tmp.name[sizeof(tmp.name)-1] = 0;
|
2006-04-01 03:25:19 -07:00
|
|
|
|
|
|
|
newinfo = xt_alloc_table_info(tmp.size);
|
|
|
|
if (!newinfo)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2015-06-10 17:34:55 -06:00
|
|
|
loc_cpu_entry = newinfo->entries;
|
2006-04-01 03:25:19 -07:00
|
|
|
if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
|
|
|
|
tmp.size) != 0) {
|
|
|
|
ret = -EFAULT;
|
|
|
|
goto free_newinfo;
|
|
|
|
}
|
|
|
|
|
2010-02-24 10:36:04 -07:00
|
|
|
ret = translate_table(net, newinfo, loc_cpu_entry, &tmp);
|
2006-04-01 03:25:19 -07:00
|
|
|
if (ret != 0)
|
|
|
|
goto free_newinfo;
|
|
|
|
|
2008-01-31 05:03:03 -07:00
|
|
|
ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
|
2007-12-17 22:46:15 -07:00
|
|
|
tmp.num_counters, tmp.counters);
|
2006-04-01 03:25:19 -07:00
|
|
|
if (ret)
|
|
|
|
goto free_newinfo_untrans;
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
free_newinfo_untrans:
|
2010-02-24 10:32:59 -07:00
|
|
|
xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
|
2010-02-24 10:33:43 -07:00
|
|
|
cleanup_entry(iter, net);
|
2006-04-01 03:25:19 -07:00
|
|
|
free_newinfo:
|
|
|
|
xt_free_table_info(newinfo);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2009-06-25 23:51:59 -06:00
|
|
|
do_add_counters(struct net *net, const void __user *user,
|
2015-10-14 16:17:06 -06:00
|
|
|
unsigned int len, int compat)
|
2006-04-01 03:25:19 -07:00
|
|
|
{
|
2015-06-10 17:34:55 -06:00
|
|
|
unsigned int i;
|
2006-04-01 03:25:19 -07:00
|
|
|
struct xt_counters_info tmp;
|
|
|
|
struct xt_counters *paddc;
|
2007-02-07 16:12:33 -07:00
|
|
|
struct xt_table *t;
|
2008-04-14 03:15:35 -06:00
|
|
|
const struct xt_table_info *private;
|
2006-04-01 03:25:19 -07:00
|
|
|
int ret = 0;
|
2010-02-24 10:32:59 -07:00
|
|
|
struct ipt_entry *iter;
|
2011-04-04 09:04:03 -06:00
|
|
|
unsigned int addend;
|
2006-04-01 03:25:19 -07:00
|
|
|
|
2016-04-01 07:37:59 -06:00
|
|
|
paddc = xt_copy_counters_from_user(user, len, &tmp, compat);
|
|
|
|
if (IS_ERR(paddc))
|
|
|
|
return PTR_ERR(paddc);
|
2006-04-01 03:25:19 -07:00
|
|
|
|
2016-04-01 07:37:59 -06:00
|
|
|
t = xt_find_table_lock(net, AF_INET, tmp.name);
|
2016-11-11 05:32:38 -07:00
|
|
|
if (!t) {
|
|
|
|
ret = -ENOENT;
|
2006-04-01 03:25:19 -07:00
|
|
|
goto free;
|
|
|
|
}
|
|
|
|
|
2009-04-28 23:36:33 -06:00
|
|
|
local_bh_disable();
|
2006-04-01 03:25:19 -07:00
|
|
|
private = t->private;
|
2016-04-01 07:37:59 -06:00
|
|
|
if (private->number != tmp.num_counters) {
|
2006-04-01 03:25:19 -07:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto unlock_up_free;
|
|
|
|
}
|
|
|
|
|
|
|
|
i = 0;
|
2011-04-04 09:04:03 -06:00
|
|
|
addend = xt_write_recseq_begin();
|
2015-06-10 17:34:55 -06:00
|
|
|
xt_entry_foreach(iter, private->entries, private->size) {
|
2015-06-10 17:34:54 -06:00
|
|
|
struct xt_counters *tmp;
|
|
|
|
|
|
|
|
tmp = xt_get_this_cpu_counter(&iter->counters);
|
|
|
|
ADD_COUNTER(*tmp, paddc[i].bcnt, paddc[i].pcnt);
|
2010-02-24 10:33:43 -07:00
|
|
|
++i;
|
|
|
|
}
|
2011-04-04 09:04:03 -06:00
|
|
|
xt_write_recseq_end(addend);
|
2006-04-01 03:25:19 -07:00
|
|
|
unlock_up_free:
|
2009-04-28 23:36:33 -06:00
|
|
|
local_bh_enable();
|
2006-04-01 03:25:19 -07:00
|
|
|
xt_table_unlock(t);
|
|
|
|
module_put(t->me);
|
|
|
|
free:
|
|
|
|
vfree(paddc);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef CONFIG_COMPAT
|
|
|
|
struct compat_ipt_replace {
|
2010-10-13 07:56:56 -06:00
|
|
|
char name[XT_TABLE_MAXNAMELEN];
|
2006-04-01 03:25:19 -07:00
|
|
|
u32 valid_hooks;
|
|
|
|
u32 num_entries;
|
|
|
|
u32 size;
|
2007-11-19 19:53:30 -07:00
|
|
|
u32 hook_entry[NF_INET_NUMHOOKS];
|
|
|
|
u32 underflow[NF_INET_NUMHOOKS];
|
2006-04-01 03:25:19 -07:00
|
|
|
u32 num_counters;
|
2010-10-13 08:11:22 -06:00
|
|
|
compat_uptr_t counters; /* struct xt_counters * */
|
2006-04-01 03:25:19 -07:00
|
|
|
struct compat_ipt_entry entries[0];
|
|
|
|
};
|
|
|
|
|
2007-12-12 11:35:16 -07:00
|
|
|
static int
|
|
|
|
compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
|
2008-01-31 05:10:18 -07:00
|
|
|
unsigned int *size, struct xt_counters *counters,
|
2010-02-24 10:33:43 -07:00
|
|
|
unsigned int i)
|
2006-04-01 03:25:19 -07:00
|
|
|
{
|
2010-10-13 08:11:22 -06:00
|
|
|
struct xt_entry_target *t;
|
2006-04-01 03:25:19 -07:00
|
|
|
struct compat_ipt_entry __user *ce;
|
|
|
|
u_int16_t target_offset, next_offset;
|
|
|
|
compat_uint_t origsize;
|
2010-02-24 10:34:48 -07:00
|
|
|
const struct xt_entry_match *ematch;
|
|
|
|
int ret = 0;
|
2006-04-01 03:25:19 -07:00
|
|
|
|
|
|
|
origsize = *size;
|
2017-03-28 13:05:16 -06:00
|
|
|
ce = *dstptr;
|
2010-02-24 10:33:43 -07:00
|
|
|
if (copy_to_user(ce, e, sizeof(struct ipt_entry)) != 0 ||
|
|
|
|
copy_to_user(&ce->counters, &counters[i],
|
|
|
|
sizeof(counters[i])) != 0)
|
|
|
|
return -EFAULT;
|
2007-12-12 11:35:16 -07:00
|
|
|
|
2006-04-01 03:25:19 -07:00
|
|
|
*dstptr += sizeof(struct compat_ipt_entry);
|
2007-12-17 22:47:14 -07:00
|
|
|
*size -= sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
|
|
|
|
|
2010-02-24 10:34:48 -07:00
|
|
|
xt_ematch_foreach(ematch, e) {
|
|
|
|
ret = xt_compat_match_to_user(ematch, dstptr, size);
|
|
|
|
if (ret != 0)
|
2010-02-24 10:35:37 -07:00
|
|
|
return ret;
|
2010-02-24 10:34:48 -07:00
|
|
|
}
|
2006-04-01 03:25:19 -07:00
|
|
|
target_offset = e->target_offset - (origsize - *size);
|
|
|
|
t = ipt_get_target(e);
|
2006-09-20 13:05:37 -06:00
|
|
|
ret = xt_compat_target_to_user(t, dstptr, size);
|
2006-04-01 03:25:19 -07:00
|
|
|
if (ret)
|
2010-02-24 10:33:43 -07:00
|
|
|
return ret;
|
2006-04-01 03:25:19 -07:00
|
|
|
next_offset = e->next_offset - (origsize - *size);
|
2010-02-24 10:33:43 -07:00
|
|
|
if (put_user(target_offset, &ce->target_offset) != 0 ||
|
|
|
|
put_user(next_offset, &ce->next_offset) != 0)
|
|
|
|
return -EFAULT;
|
2006-04-01 03:25:19 -07:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2008-01-15 00:44:05 -07:00
|
|
|
static int
|
2010-10-13 08:11:22 -06:00
|
|
|
compat_find_calc_match(struct xt_entry_match *m,
|
2007-12-17 22:46:15 -07:00
|
|
|
const struct ipt_ip *ip,
|
2010-02-24 10:35:37 -07:00
|
|
|
int *size)
|
2006-04-01 03:25:19 -07:00
|
|
|
{
|
2007-02-07 16:11:19 -07:00
|
|
|
struct xt_match *match;
|
2006-04-01 03:25:19 -07:00
|
|
|
|
2009-07-10 11:27:47 -06:00
|
|
|
match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name,
|
|
|
|
m->u.user.revision);
|
2016-05-03 05:54:23 -06:00
|
|
|
if (IS_ERR(match))
|
2009-07-10 11:27:47 -06:00
|
|
|
return PTR_ERR(match);
|
2016-05-03 05:54:23 -06:00
|
|
|
|
2006-04-01 03:25:19 -07:00
|
|
|
m->u.kernel.match = match;
|
2006-09-20 13:05:37 -06:00
|
|
|
*size += xt_compat_match_offset(match);
|
2007-06-05 13:56:09 -06:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2010-02-24 10:33:43 -07:00
|
|
|
static void compat_release_entry(struct compat_ipt_entry *e)
|
2007-06-05 13:56:09 -06:00
|
|
|
{
|
2010-10-13 08:11:22 -06:00
|
|
|
struct xt_entry_target *t;
|
2010-02-24 10:34:48 -07:00
|
|
|
struct xt_entry_match *ematch;
|
2007-06-05 13:56:09 -06:00
|
|
|
|
|
|
|
/* Cleanup all matches */
|
2010-02-24 10:34:48 -07:00
|
|
|
xt_ematch_foreach(ematch, e)
|
2010-02-24 10:35:37 -07:00
|
|
|
module_put(ematch->u.kernel.match->me);
|
2007-12-17 22:47:32 -07:00
|
|
|
t = compat_ipt_get_target(e);
|
2007-06-05 13:56:09 -06:00
|
|
|
module_put(t->u.kernel.target->me);
|
|
|
|
}
|
|
|
|
|
2008-01-15 00:44:05 -07:00
|
|
|
static int
|
2007-12-17 22:47:32 -07:00
|
|
|
check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
|
2007-12-17 22:46:15 -07:00
|
|
|
struct xt_table_info *newinfo,
|
|
|
|
unsigned int *size,
|
2009-06-25 23:51:59 -06:00
|
|
|
const unsigned char *base,
|
netfilter: x_tables: do compat validation via translate_table
This looks like refactoring, but its also a bug fix.
Problem is that the compat path (32bit iptables, 64bit kernel) lacks a few
sanity tests that are done in the normal path.
For example, we do not check for underflows and the base chain policies.
While its possible to also add such checks to the compat path, its more
copy&pastry, for instance we cannot reuse check_underflow() helper as
e->target_offset differs in the compat case.
Other problem is that it makes auditing for validation errors harder; two
places need to be checked and kept in sync.
At a high level 32 bit compat works like this:
1- initial pass over blob:
validate match/entry offsets, bounds checking
lookup all matches and targets
do bookkeeping wrt. size delta of 32/64bit structures
assign match/target.u.kernel pointer (points at kernel
implementation, needed to access ->compatsize etc.)
2- allocate memory according to the total bookkeeping size to
contain the translated ruleset
3- second pass over original blob:
for each entry, copy the 32bit representation to the newly allocated
memory. This also does any special match translations (e.g.
adjust 32bit to 64bit longs, etc).
4- check if ruleset is free of loops (chase all jumps)
5-first pass over translated blob:
call the checkentry function of all matches and targets.
The alternative implemented by this patch is to drop steps 3&4 from the
compat process, the translation is changed into an intermediate step
rather than a full 1:1 translate_table replacement.
In the 2nd pass (step #3), change the 64bit ruleset back to a kernel
representation, i.e. put() the kernel pointer and restore ->u.user.name .
This gets us a 64bit ruleset that is in the format generated by a 64bit
iptables userspace -- we can then use translate_table() to get the
'native' sanity checks.
This has two drawbacks:
1. we re-validate all the match and target entry structure sizes even
though compat translation is supposed to never generate bogus offsets.
2. we put and then re-lookup each match and target.
THe upside is that we get all sanity tests and ruleset validations
provided by the normal path and can remove some duplicated compat code.
iptables-restore time of autogenerated ruleset with 300k chains of form
-A CHAIN0001 -m limit --limit 1/s -j CHAIN0002
-A CHAIN0002 -m limit --limit 1/s -j CHAIN0003
shows no noticeable differences in restore times:
old: 0m30.796s
new: 0m31.521s
64bit: 0m25.674s
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2016-04-01 06:17:34 -06:00
|
|
|
const unsigned char *limit)
|
2006-04-01 03:25:19 -07:00
|
|
|
{
|
2010-02-24 10:34:48 -07:00
|
|
|
struct xt_entry_match *ematch;
|
2010-10-13 08:11:22 -06:00
|
|
|
struct xt_entry_target *t;
|
2007-02-07 16:11:19 -07:00
|
|
|
struct xt_target *target;
|
2007-01-04 13:14:41 -07:00
|
|
|
unsigned int entry_offset;
|
2008-01-31 05:10:18 -07:00
|
|
|
unsigned int j;
|
netfilter: x_tables: do compat validation via translate_table
This looks like refactoring, but its also a bug fix.
Problem is that the compat path (32bit iptables, 64bit kernel) lacks a few
sanity tests that are done in the normal path.
For example, we do not check for underflows and the base chain policies.
While its possible to also add such checks to the compat path, its more
copy&pastry, for instance we cannot reuse check_underflow() helper as
e->target_offset differs in the compat case.
Other problem is that it makes auditing for validation errors harder; two
places need to be checked and kept in sync.
At a high level 32 bit compat works like this:
1- initial pass over blob:
validate match/entry offsets, bounds checking
lookup all matches and targets
do bookkeeping wrt. size delta of 32/64bit structures
assign match/target.u.kernel pointer (points at kernel
implementation, needed to access ->compatsize etc.)
2- allocate memory according to the total bookkeeping size to
contain the translated ruleset
3- second pass over original blob:
for each entry, copy the 32bit representation to the newly allocated
memory. This also does any special match translations (e.g.
adjust 32bit to 64bit longs, etc).
4- check if ruleset is free of loops (chase all jumps)
5-first pass over translated blob:
call the checkentry function of all matches and targets.
The alternative implemented by this patch is to drop steps 3&4 from the
compat process, the translation is changed into an intermediate step
rather than a full 1:1 translate_table replacement.
In the 2nd pass (step #3), change the 64bit ruleset back to a kernel
representation, i.e. put() the kernel pointer and restore ->u.user.name .
This gets us a 64bit ruleset that is in the format generated by a 64bit
iptables userspace -- we can then use translate_table() to get the
'native' sanity checks.
This has two drawbacks:
1. we re-validate all the match and target entry structure sizes even
though compat translation is supposed to never generate bogus offsets.
2. we put and then re-lookup each match and target.
THe upside is that we get all sanity tests and ruleset validations
provided by the normal path and can remove some duplicated compat code.
iptables-restore time of autogenerated ruleset with 300k chains of form
-A CHAIN0001 -m limit --limit 1/s -j CHAIN0002
-A CHAIN0002 -m limit --limit 1/s -j CHAIN0003
shows no noticeable differences in restore times:
old: 0m30.796s
new: 0m31.521s
64bit: 0m25.674s
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2016-04-01 06:17:34 -06:00
|
|
|
int ret, off;
|
2006-04-01 03:25:19 -07:00
|
|
|
|
2009-11-23 15:17:06 -07:00
|
|
|
if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 ||
|
2016-03-22 11:02:50 -06:00
|
|
|
(unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit ||
|
2016-05-03 05:54:23 -06:00
|
|
|
(unsigned char *)e + e->next_offset > limit)
|
2006-04-01 03:25:19 -07:00
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (e->next_offset < sizeof(struct compat_ipt_entry) +
|
2016-05-03 05:54:23 -06:00
|
|
|
sizeof(struct compat_xt_entry_target))
|
2006-04-01 03:25:19 -07:00
|
|
|
return -EINVAL;
|
|
|
|
|
2016-04-01 06:17:24 -06:00
|
|
|
if (!ip_checkentry(&e->ip))
|
|
|
|
return -EINVAL;
|
|
|
|
|
2016-04-01 06:17:28 -06:00
|
|
|
ret = xt_compat_check_entry_offsets(e, e->elems,
|
2016-04-01 06:17:26 -06:00
|
|
|
e->target_offset, e->next_offset);
|
2006-12-12 01:29:26 -07:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
[NETFILTER]: Missed and reordered checks in {arp,ip,ip6}_tables
There is a number of issues in parsing user-provided table in
translate_table(). Malicious user with CAP_NET_ADMIN may crash system by
passing special-crafted table to the *_tables.
The first issue is that mark_source_chains() function is called before entry
content checks. In case of standard target, mark_source_chains() function
uses t->verdict field in order to determine new position. But the check, that
this field leads no further, than the table end, is in check_entry(), which
is called later, than mark_source_chains().
The second issue, that there is no check that target_offset points inside
entry. If so, *_ITERATE_MATCH macro will follow further, than the entry
ends. As a result, we'll have oops or memory disclosure.
And the third issue, that there is no check that the target is completely
inside entry. Results are the same, as in previous issue.
Signed-off-by: Dmitry Mishin <dim@openvz.org>
Acked-by: Kirill Korotaev <dev@openvz.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2006-10-30 16:12:55 -07:00
|
|
|
|
2007-12-17 22:47:14 -07:00
|
|
|
off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
|
2006-04-01 03:25:19 -07:00
|
|
|
entry_offset = (void *)e - (void *)base;
|
|
|
|
j = 0;
|
2010-02-24 10:34:48 -07:00
|
|
|
xt_ematch_foreach(ematch, e) {
|
2016-04-01 06:17:30 -06:00
|
|
|
ret = compat_find_calc_match(ematch, &e->ip, &off);
|
2010-02-24 10:34:48 -07:00
|
|
|
if (ret != 0)
|
2010-02-24 10:35:37 -07:00
|
|
|
goto release_matches;
|
|
|
|
++j;
|
2010-02-24 10:34:48 -07:00
|
|
|
}
|
2006-04-01 03:25:19 -07:00
|
|
|
|
2007-12-17 22:47:32 -07:00
|
|
|
t = compat_ipt_get_target(e);
|
2009-07-10 10:55:11 -06:00
|
|
|
target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name,
|
|
|
|
t->u.user.revision);
|
|
|
|
if (IS_ERR(target)) {
|
|
|
|
ret = PTR_ERR(target);
|
2007-06-05 13:56:09 -06:00
|
|
|
goto release_matches;
|
2006-04-01 03:25:19 -07:00
|
|
|
}
|
|
|
|
t->u.kernel.target = target;
|
|
|
|
|
2006-09-20 13:05:37 -06:00
|
|
|
off += xt_compat_target_offset(target);
|
2006-04-01 03:25:19 -07:00
|
|
|
*size += off;
|
2007-12-17 22:47:48 -07:00
|
|
|
ret = xt_compat_add_offset(AF_INET, entry_offset, off);
|
2006-04-01 03:25:19 -07:00
|
|
|
if (ret)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
return 0;
|
2006-09-20 13:04:08 -06:00
|
|
|
|
2006-04-01 03:25:19 -07:00
|
|
|
out:
|
2006-09-20 13:04:08 -06:00
|
|
|
module_put(t->u.kernel.target->me);
|
2007-06-05 13:56:09 -06:00
|
|
|
release_matches:
|
2010-02-24 10:35:37 -07:00
|
|
|
xt_ematch_foreach(ematch, e) {
|
|
|
|
if (j-- == 0)
|
2010-02-24 10:34:48 -07:00
|
|
|
break;
|
2010-02-24 10:35:37 -07:00
|
|
|
module_put(ematch->u.kernel.match->me);
|
|
|
|
}
|
2006-04-01 03:25:19 -07:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2016-04-01 06:17:33 -06:00
|
|
|
static void
|
2007-12-17 22:47:32 -07:00
|
|
|
compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
|
2016-04-01 06:17:30 -06:00
|
|
|
unsigned int *size,
|
2007-12-17 22:46:15 -07:00
|
|
|
struct xt_table_info *newinfo, unsigned char *base)
|
2006-04-01 03:25:19 -07:00
|
|
|
{
|
2010-10-13 08:11:22 -06:00
|
|
|
struct xt_entry_target *t;
|
2006-04-01 03:25:19 -07:00
|
|
|
struct ipt_entry *de;
|
|
|
|
unsigned int origsize;
|
2016-04-01 06:17:33 -06:00
|
|
|
int h;
|
2010-02-24 10:34:48 -07:00
|
|
|
struct xt_entry_match *ematch;
|
2006-04-01 03:25:19 -07:00
|
|
|
|
|
|
|
origsize = *size;
|
2017-03-28 13:05:16 -06:00
|
|
|
de = *dstptr;
|
2006-04-01 03:25:19 -07:00
|
|
|
memcpy(de, e, sizeof(struct ipt_entry));
|
2007-12-17 22:47:32 -07:00
|
|
|
memcpy(&de->counters, &e->counters, sizeof(e->counters));
|
2006-04-01 03:25:19 -07:00
|
|
|
|
2007-12-17 22:47:32 -07:00
|
|
|
*dstptr += sizeof(struct ipt_entry);
|
2007-12-17 22:47:14 -07:00
|
|
|
*size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
|
|
|
|
|
2016-04-01 06:17:33 -06:00
|
|
|
xt_ematch_foreach(ematch, e)
|
|
|
|
xt_compat_match_from_user(ematch, dstptr, size);
|
|
|
|
|
2006-04-01 03:25:19 -07:00
|
|
|
de->target_offset = e->target_offset - (origsize - *size);
|
2007-12-17 22:47:32 -07:00
|
|
|
t = compat_ipt_get_target(e);
|
2006-09-20 13:05:37 -06:00
|
|
|
xt_compat_target_from_user(t, dstptr, size);
|
2006-04-01 03:25:19 -07:00
|
|
|
|
|
|
|
de->next_offset = e->next_offset - (origsize - *size);
|
netfilter: x_tables: do compat validation via translate_table
This looks like refactoring, but its also a bug fix.
Problem is that the compat path (32bit iptables, 64bit kernel) lacks a few
sanity tests that are done in the normal path.
For example, we do not check for underflows and the base chain policies.
While its possible to also add such checks to the compat path, its more
copy&pastry, for instance we cannot reuse check_underflow() helper as
e->target_offset differs in the compat case.
Other problem is that it makes auditing for validation errors harder; two
places need to be checked and kept in sync.
At a high level 32 bit compat works like this:
1- initial pass over blob:
validate match/entry offsets, bounds checking
lookup all matches and targets
do bookkeeping wrt. size delta of 32/64bit structures
assign match/target.u.kernel pointer (points at kernel
implementation, needed to access ->compatsize etc.)
2- allocate memory according to the total bookkeeping size to
contain the translated ruleset
3- second pass over original blob:
for each entry, copy the 32bit representation to the newly allocated
memory. This also does any special match translations (e.g.
adjust 32bit to 64bit longs, etc).
4- check if ruleset is free of loops (chase all jumps)
5-first pass over translated blob:
call the checkentry function of all matches and targets.
The alternative implemented by this patch is to drop steps 3&4 from the
compat process, the translation is changed into an intermediate step
rather than a full 1:1 translate_table replacement.
In the 2nd pass (step #3), change the 64bit ruleset back to a kernel
representation, i.e. put() the kernel pointer and restore ->u.user.name .
This gets us a 64bit ruleset that is in the format generated by a 64bit
iptables userspace -- we can then use translate_table() to get the
'native' sanity checks.
This has two drawbacks:
1. we re-validate all the match and target entry structure sizes even
though compat translation is supposed to never generate bogus offsets.
2. we put and then re-lookup each match and target.
THe upside is that we get all sanity tests and ruleset validations
provided by the normal path and can remove some duplicated compat code.
iptables-restore time of autogenerated ruleset with 300k chains of form
-A CHAIN0001 -m limit --limit 1/s -j CHAIN0002
-A CHAIN0002 -m limit --limit 1/s -j CHAIN0003
shows no noticeable differences in restore times:
old: 0m30.796s
new: 0m31.521s
64bit: 0m25.674s
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2016-04-01 06:17:34 -06:00
|
|
|
|
2007-11-19 19:53:30 -07:00
|
|
|
for (h = 0; h < NF_INET_NUMHOOKS; h++) {
|
2006-04-01 03:25:19 -07:00
|
|
|
if ((unsigned char *)de - base < newinfo->hook_entry[h])
|
|
|
|
newinfo->hook_entry[h] -= origsize - *size;
|
|
|
|
if ((unsigned char *)de - base < newinfo->underflow[h])
|
|
|
|
newinfo->underflow[h] -= origsize - *size;
|
|
|
|
}
|
2006-12-05 14:44:07 -07:00
|
|
|
}
|
|
|
|
|
2005-04-16 16:20:36 -06:00
|
|
|
static int
|
2010-01-18 00:21:13 -07:00
|
|
|
translate_compat_table(struct net *net,
|
2007-12-17 22:46:15 -07:00
|
|
|
struct xt_table_info **pinfo,
|
|
|
|
void **pentry0,
|
2016-04-01 06:17:30 -06:00
|
|
|
const struct compat_ipt_replace *compatr)
|
2005-04-16 16:20:36 -06:00
|
|
|
{
|
2006-10-30 16:14:27 -07:00
|
|
|
unsigned int i, j;
|
2006-04-01 03:25:19 -07:00
|
|
|
struct xt_table_info *newinfo, *info;
|
|
|
|
void *pos, *entry0, *entry1;
|
2010-02-24 10:32:59 -07:00
|
|
|
struct compat_ipt_entry *iter0;
|
netfilter: x_tables: do compat validation via translate_table
This looks like refactoring, but its also a bug fix.
Problem is that the compat path (32bit iptables, 64bit kernel) lacks a few
sanity tests that are done in the normal path.
For example, we do not check for underflows and the base chain policies.
While its possible to also add such checks to the compat path, its more
copy&pastry, for instance we cannot reuse check_underflow() helper as
e->target_offset differs in the compat case.
Other problem is that it makes auditing for validation errors harder; two
places need to be checked and kept in sync.
At a high level 32 bit compat works like this:
1- initial pass over blob:
validate match/entry offsets, bounds checking
lookup all matches and targets
do bookkeeping wrt. size delta of 32/64bit structures
assign match/target.u.kernel pointer (points at kernel
implementation, needed to access ->compatsize etc.)
2- allocate memory according to the total bookkeeping size to
contain the translated ruleset
3- second pass over original blob:
for each entry, copy the 32bit representation to the newly allocated
memory. This also does any special match translations (e.g.
adjust 32bit to 64bit longs, etc).
4- check if ruleset is free of loops (chase all jumps)
5-first pass over translated blob:
call the checkentry function of all matches and targets.
The alternative implemented by this patch is to drop steps 3&4 from the
compat process, the translation is changed into an intermediate step
rather than a full 1:1 translate_table replacement.
In the 2nd pass (step #3), change the 64bit ruleset back to a kernel
representation, i.e. put() the kernel pointer and restore ->u.user.name .
This gets us a 64bit ruleset that is in the format generated by a 64bit
iptables userspace -- we can then use translate_table() to get the
'native' sanity checks.
This has two drawbacks:
1. we re-validate all the match and target entry structure sizes even
though compat translation is supposed to never generate bogus offsets.
2. we put and then re-lookup each match and target.
THe upside is that we get all sanity tests and ruleset validations
provided by the normal path and can remove some duplicated compat code.
iptables-restore time of autogenerated ruleset with 300k chains of form
-A CHAIN0001 -m limit --limit 1/s -j CHAIN0002
-A CHAIN0002 -m limit --limit 1/s -j CHAIN0003
shows no noticeable differences in restore times:
old: 0m30.796s
new: 0m31.521s
64bit: 0m25.674s
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2016-04-01 06:17:34 -06:00
|
|
|
struct ipt_replace repl;
|
2006-04-01 03:25:19 -07:00
|
|
|
unsigned int size;
|
2010-02-24 10:33:43 -07:00
|
|
|
int ret;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2006-04-01 03:25:19 -07:00
|
|
|
info = *pinfo;
|
|
|
|
entry0 = *pentry0;
|
2016-04-01 06:17:30 -06:00
|
|
|
size = compatr->size;
|
|
|
|
info->number = compatr->num_entries;
|
2006-04-01 03:25:19 -07:00
|
|
|
|
2006-10-30 16:14:27 -07:00
|
|
|
j = 0;
|
2006-04-01 03:25:19 -07:00
|
|
|
xt_compat_lock(AF_INET);
|
2016-04-01 06:17:30 -06:00
|
|
|
xt_compat_init_offsets(AF_INET, compatr->num_entries);
|
2006-04-01 03:25:19 -07:00
|
|
|
/* Walk through entries, checking offsets. */
|
2016-04-01 06:17:30 -06:00
|
|
|
xt_entry_foreach(iter0, entry0, compatr->size) {
|
2010-02-24 10:32:59 -07:00
|
|
|
ret = check_compat_entry_size_and_hooks(iter0, info, &size,
|
2010-02-26 09:53:31 -07:00
|
|
|
entry0,
|
netfilter: x_tables: do compat validation via translate_table
This looks like refactoring, but its also a bug fix.
Problem is that the compat path (32bit iptables, 64bit kernel) lacks a few
sanity tests that are done in the normal path.
For example, we do not check for underflows and the base chain policies.
While its possible to also add such checks to the compat path, its more
copy&pastry, for instance we cannot reuse check_underflow() helper as
e->target_offset differs in the compat case.
Other problem is that it makes auditing for validation errors harder; two
places need to be checked and kept in sync.
At a high level 32 bit compat works like this:
1- initial pass over blob:
validate match/entry offsets, bounds checking
lookup all matches and targets
do bookkeeping wrt. size delta of 32/64bit structures
assign match/target.u.kernel pointer (points at kernel
implementation, needed to access ->compatsize etc.)
2- allocate memory according to the total bookkeeping size to
contain the translated ruleset
3- second pass over original blob:
for each entry, copy the 32bit representation to the newly allocated
memory. This also does any special match translations (e.g.
adjust 32bit to 64bit longs, etc).
4- check if ruleset is free of loops (chase all jumps)
5-first pass over translated blob:
call the checkentry function of all matches and targets.
The alternative implemented by this patch is to drop steps 3&4 from the
compat process, the translation is changed into an intermediate step
rather than a full 1:1 translate_table replacement.
In the 2nd pass (step #3), change the 64bit ruleset back to a kernel
representation, i.e. put() the kernel pointer and restore ->u.user.name .
This gets us a 64bit ruleset that is in the format generated by a 64bit
iptables userspace -- we can then use translate_table() to get the
'native' sanity checks.
This has two drawbacks:
1. we re-validate all the match and target entry structure sizes even
though compat translation is supposed to never generate bogus offsets.
2. we put and then re-lookup each match and target.
THe upside is that we get all sanity tests and ruleset validations
provided by the normal path and can remove some duplicated compat code.
iptables-restore time of autogenerated ruleset with 300k chains of form
-A CHAIN0001 -m limit --limit 1/s -j CHAIN0002
-A CHAIN0002 -m limit --limit 1/s -j CHAIN0003
shows no noticeable differences in restore times:
old: 0m30.796s
new: 0m31.521s
64bit: 0m25.674s
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2016-04-01 06:17:34 -06:00
|
|
|
entry0 + compatr->size);
|
2010-02-24 10:32:59 -07:00
|
|
|
if (ret != 0)
|
2010-02-24 10:33:43 -07:00
|
|
|
goto out_unlock;
|
|
|
|
++j;
|
2010-02-24 10:32:59 -07:00
|
|
|
}
|
2006-04-01 03:25:19 -07:00
|
|
|
|
|
|
|
ret = -EINVAL;
|
2016-05-03 05:54:23 -06:00
|
|
|
if (j != compatr->num_entries)
|
2006-04-01 03:25:19 -07:00
|
|
|
goto out_unlock;
|
|
|
|
|
|
|
|
ret = -ENOMEM;
|
|
|
|
newinfo = xt_alloc_table_info(size);
|
|
|
|
if (!newinfo)
|
|
|
|
goto out_unlock;
|
|
|
|
|
2016-04-01 06:17:30 -06:00
|
|
|
newinfo->number = compatr->num_entries;
|
2007-11-19 19:53:30 -07:00
|
|
|
for (i = 0; i < NF_INET_NUMHOOKS; i++) {
|
netfilter: x_tables: do compat validation via translate_table
This looks like refactoring, but its also a bug fix.
Problem is that the compat path (32bit iptables, 64bit kernel) lacks a few
sanity tests that are done in the normal path.
For example, we do not check for underflows and the base chain policies.
While its possible to also add such checks to the compat path, its more
copy&pastry, for instance we cannot reuse check_underflow() helper as
e->target_offset differs in the compat case.
Other problem is that it makes auditing for validation errors harder; two
places need to be checked and kept in sync.
At a high level 32 bit compat works like this:
1- initial pass over blob:
validate match/entry offsets, bounds checking
lookup all matches and targets
do bookkeeping wrt. size delta of 32/64bit structures
assign match/target.u.kernel pointer (points at kernel
implementation, needed to access ->compatsize etc.)
2- allocate memory according to the total bookkeeping size to
contain the translated ruleset
3- second pass over original blob:
for each entry, copy the 32bit representation to the newly allocated
memory. This also does any special match translations (e.g.
adjust 32bit to 64bit longs, etc).
4- check if ruleset is free of loops (chase all jumps)
5-first pass over translated blob:
call the checkentry function of all matches and targets.
The alternative implemented by this patch is to drop steps 3&4 from the
compat process, the translation is changed into an intermediate step
rather than a full 1:1 translate_table replacement.
In the 2nd pass (step #3), change the 64bit ruleset back to a kernel
representation, i.e. put() the kernel pointer and restore ->u.user.name .
This gets us a 64bit ruleset that is in the format generated by a 64bit
iptables userspace -- we can then use translate_table() to get the
'native' sanity checks.
This has two drawbacks:
1. we re-validate all the match and target entry structure sizes even
though compat translation is supposed to never generate bogus offsets.
2. we put and then re-lookup each match and target.
THe upside is that we get all sanity tests and ruleset validations
provided by the normal path and can remove some duplicated compat code.
iptables-restore time of autogenerated ruleset with 300k chains of form
-A CHAIN0001 -m limit --limit 1/s -j CHAIN0002
-A CHAIN0002 -m limit --limit 1/s -j CHAIN0003
shows no noticeable differences in restore times:
old: 0m30.796s
new: 0m31.521s
64bit: 0m25.674s
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2016-04-01 06:17:34 -06:00
|
|
|
newinfo->hook_entry[i] = compatr->hook_entry[i];
|
|
|
|
newinfo->underflow[i] = compatr->underflow[i];
|
2006-04-01 03:25:19 -07:00
|
|
|
}
|
2015-06-10 17:34:55 -06:00
|
|
|
entry1 = newinfo->entries;
|
2006-04-01 03:25:19 -07:00
|
|
|
pos = entry1;
|
2016-04-01 06:17:30 -06:00
|
|
|
size = compatr->size;
|
2016-04-01 06:17:33 -06:00
|
|
|
xt_entry_foreach(iter0, entry0, compatr->size)
|
|
|
|
compat_copy_entry_from_user(iter0, &pos, &size,
|
|
|
|
newinfo, entry1);
|
|
|
|
|
netfilter: x_tables: do compat validation via translate_table
This looks like refactoring, but its also a bug fix.
Problem is that the compat path (32bit iptables, 64bit kernel) lacks a few
sanity tests that are done in the normal path.
For example, we do not check for underflows and the base chain policies.
While its possible to also add such checks to the compat path, its more
copy&pastry, for instance we cannot reuse check_underflow() helper as
e->target_offset differs in the compat case.
Other problem is that it makes auditing for validation errors harder; two
places need to be checked and kept in sync.
At a high level 32 bit compat works like this:
1- initial pass over blob:
validate match/entry offsets, bounds checking
lookup all matches and targets
do bookkeeping wrt. size delta of 32/64bit structures
assign match/target.u.kernel pointer (points at kernel
implementation, needed to access ->compatsize etc.)
2- allocate memory according to the total bookkeeping size to
contain the translated ruleset
3- second pass over original blob:
for each entry, copy the 32bit representation to the newly allocated
memory. This also does any special match translations (e.g.
adjust 32bit to 64bit longs, etc).
4- check if ruleset is free of loops (chase all jumps)
5-first pass over translated blob:
call the checkentry function of all matches and targets.
The alternative implemented by this patch is to drop steps 3&4 from the
compat process, the translation is changed into an intermediate step
rather than a full 1:1 translate_table replacement.
In the 2nd pass (step #3), change the 64bit ruleset back to a kernel
representation, i.e. put() the kernel pointer and restore ->u.user.name .
This gets us a 64bit ruleset that is in the format generated by a 64bit
iptables userspace -- we can then use translate_table() to get the
'native' sanity checks.
This has two drawbacks:
1. we re-validate all the match and target entry structure sizes even
though compat translation is supposed to never generate bogus offsets.
2. we put and then re-lookup each match and target.
THe upside is that we get all sanity tests and ruleset validations
provided by the normal path and can remove some duplicated compat code.
iptables-restore time of autogenerated ruleset with 300k chains of form
-A CHAIN0001 -m limit --limit 1/s -j CHAIN0002
-A CHAIN0002 -m limit --limit 1/s -j CHAIN0003
shows no noticeable differences in restore times:
old: 0m30.796s
new: 0m31.521s
64bit: 0m25.674s
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2016-04-01 06:17:34 -06:00
|
|
|
/* all module references in entry0 are now gone.
|
|
|
|
* entry1/newinfo contains a 64bit ruleset that looks exactly as
|
|
|
|
* generated by 64bit userspace.
|
|
|
|
*
|
|
|
|
* Call standard translate_table() to validate all hook_entrys,
|
|
|
|
* underflows, check for loops, etc.
|
|
|
|
*/
|
2007-12-17 22:47:48 -07:00
|
|
|
xt_compat_flush_offsets(AF_INET);
|
2006-04-01 03:25:19 -07:00
|
|
|
xt_compat_unlock(AF_INET);
|
|
|
|
|
netfilter: x_tables: do compat validation via translate_table
This looks like refactoring, but its also a bug fix.
Problem is that the compat path (32bit iptables, 64bit kernel) lacks a few
sanity tests that are done in the normal path.
For example, we do not check for underflows and the base chain policies.
While its possible to also add such checks to the compat path, its more
copy&pastry, for instance we cannot reuse check_underflow() helper as
e->target_offset differs in the compat case.
Other problem is that it makes auditing for validation errors harder; two
places need to be checked and kept in sync.
At a high level 32 bit compat works like this:
1- initial pass over blob:
validate match/entry offsets, bounds checking
lookup all matches and targets
do bookkeeping wrt. size delta of 32/64bit structures
assign match/target.u.kernel pointer (points at kernel
implementation, needed to access ->compatsize etc.)
2- allocate memory according to the total bookkeeping size to
contain the translated ruleset
3- second pass over original blob:
for each entry, copy the 32bit representation to the newly allocated
memory. This also does any special match translations (e.g.
adjust 32bit to 64bit longs, etc).
4- check if ruleset is free of loops (chase all jumps)
5-first pass over translated blob:
call the checkentry function of all matches and targets.
The alternative implemented by this patch is to drop steps 3&4 from the
compat process, the translation is changed into an intermediate step
rather than a full 1:1 translate_table replacement.
In the 2nd pass (step #3), change the 64bit ruleset back to a kernel
representation, i.e. put() the kernel pointer and restore ->u.user.name .
This gets us a 64bit ruleset that is in the format generated by a 64bit
iptables userspace -- we can then use translate_table() to get the
'native' sanity checks.
This has two drawbacks:
1. we re-validate all the match and target entry structure sizes even
though compat translation is supposed to never generate bogus offsets.
2. we put and then re-lookup each match and target.
THe upside is that we get all sanity tests and ruleset validations
provided by the normal path and can remove some duplicated compat code.
iptables-restore time of autogenerated ruleset with 300k chains of form
-A CHAIN0001 -m limit --limit 1/s -j CHAIN0002
-A CHAIN0002 -m limit --limit 1/s -j CHAIN0003
shows no noticeable differences in restore times:
old: 0m30.796s
new: 0m31.521s
64bit: 0m25.674s
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2016-04-01 06:17:34 -06:00
|
|
|
memcpy(&repl, compatr, sizeof(*compatr));
|
2006-04-01 03:25:19 -07:00
|
|
|
|
netfilter: x_tables: do compat validation via translate_table
This looks like refactoring, but its also a bug fix.
Problem is that the compat path (32bit iptables, 64bit kernel) lacks a few
sanity tests that are done in the normal path.
For example, we do not check for underflows and the base chain policies.
While its possible to also add such checks to the compat path, its more
copy&pastry, for instance we cannot reuse check_underflow() helper as
e->target_offset differs in the compat case.
Other problem is that it makes auditing for validation errors harder; two
places need to be checked and kept in sync.
At a high level 32 bit compat works like this:
1- initial pass over blob:
validate match/entry offsets, bounds checking
lookup all matches and targets
do bookkeeping wrt. size delta of 32/64bit structures
assign match/target.u.kernel pointer (points at kernel
implementation, needed to access ->compatsize etc.)
2- allocate memory according to the total bookkeeping size to
contain the translated ruleset
3- second pass over original blob:
for each entry, copy the 32bit representation to the newly allocated
memory. This also does any special match translations (e.g.
adjust 32bit to 64bit longs, etc).
4- check if ruleset is free of loops (chase all jumps)
5-first pass over translated blob:
call the checkentry function of all matches and targets.
The alternative implemented by this patch is to drop steps 3&4 from the
compat process, the translation is changed into an intermediate step
rather than a full 1:1 translate_table replacement.
In the 2nd pass (step #3), change the 64bit ruleset back to a kernel
representation, i.e. put() the kernel pointer and restore ->u.user.name .
This gets us a 64bit ruleset that is in the format generated by a 64bit
iptables userspace -- we can then use translate_table() to get the
'native' sanity checks.
This has two drawbacks:
1. we re-validate all the match and target entry structure sizes even
though compat translation is supposed to never generate bogus offsets.
2. we put and then re-lookup each match and target.
THe upside is that we get all sanity tests and ruleset validations
provided by the normal path and can remove some duplicated compat code.
iptables-restore time of autogenerated ruleset with 300k chains of form
-A CHAIN0001 -m limit --limit 1/s -j CHAIN0002
-A CHAIN0002 -m limit --limit 1/s -j CHAIN0003
shows no noticeable differences in restore times:
old: 0m30.796s
new: 0m31.521s
64bit: 0m25.674s
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2016-04-01 06:17:34 -06:00
|
|
|
for (i = 0; i < NF_INET_NUMHOOKS; i++) {
|
|
|
|
repl.hook_entry[i] = newinfo->hook_entry[i];
|
|
|
|
repl.underflow[i] = newinfo->underflow[i];
|
2007-06-05 13:56:09 -06:00
|
|
|
}
|
2006-12-05 14:44:07 -07:00
|
|
|
|
netfilter: x_tables: do compat validation via translate_table
This looks like refactoring, but its also a bug fix.
Problem is that the compat path (32bit iptables, 64bit kernel) lacks a few
sanity tests that are done in the normal path.
For example, we do not check for underflows and the base chain policies.
While its possible to also add such checks to the compat path, its more
copy&pastry, for instance we cannot reuse check_underflow() helper as
e->target_offset differs in the compat case.
Other problem is that it makes auditing for validation errors harder; two
places need to be checked and kept in sync.
At a high level 32 bit compat works like this:
1- initial pass over blob:
validate match/entry offsets, bounds checking
lookup all matches and targets
do bookkeeping wrt. size delta of 32/64bit structures
assign match/target.u.kernel pointer (points at kernel
implementation, needed to access ->compatsize etc.)
2- allocate memory according to the total bookkeeping size to
contain the translated ruleset
3- second pass over original blob:
for each entry, copy the 32bit representation to the newly allocated
memory. This also does any special match translations (e.g.
adjust 32bit to 64bit longs, etc).
4- check if ruleset is free of loops (chase all jumps)
5-first pass over translated blob:
call the checkentry function of all matches and targets.
The alternative implemented by this patch is to drop steps 3&4 from the
compat process, the translation is changed into an intermediate step
rather than a full 1:1 translate_table replacement.
In the 2nd pass (step #3), change the 64bit ruleset back to a kernel
representation, i.e. put() the kernel pointer and restore ->u.user.name .
This gets us a 64bit ruleset that is in the format generated by a 64bit
iptables userspace -- we can then use translate_table() to get the
'native' sanity checks.
This has two drawbacks:
1. we re-validate all the match and target entry structure sizes even
though compat translation is supposed to never generate bogus offsets.
2. we put and then re-lookup each match and target.
THe upside is that we get all sanity tests and ruleset validations
provided by the normal path and can remove some duplicated compat code.
iptables-restore time of autogenerated ruleset with 300k chains of form
-A CHAIN0001 -m limit --limit 1/s -j CHAIN0002
-A CHAIN0002 -m limit --limit 1/s -j CHAIN0003
shows no noticeable differences in restore times:
old: 0m30.796s
new: 0m31.521s
64bit: 0m25.674s
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2016-04-01 06:17:34 -06:00
|
|
|
repl.num_counters = 0;
|
|
|
|
repl.counters = NULL;
|
|
|
|
repl.size = newinfo->size;
|
|
|
|
ret = translate_table(net, newinfo, entry1, &repl);
|
|
|
|
if (ret)
|
|
|
|
goto free_newinfo;
|
|
|
|
|
2006-04-01 03:25:19 -07:00
|
|
|
*pinfo = newinfo;
|
|
|
|
*pentry0 = entry1;
|
|
|
|
xt_free_table_info(info);
|
|
|
|
return 0;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2006-04-01 03:25:19 -07:00
|
|
|
free_newinfo:
|
|
|
|
xt_free_table_info(newinfo);
|
netfilter: x_tables: do compat validation via translate_table
This looks like refactoring, but its also a bug fix.
Problem is that the compat path (32bit iptables, 64bit kernel) lacks a few
sanity tests that are done in the normal path.
For example, we do not check for underflows and the base chain policies.
While its possible to also add such checks to the compat path, its more
copy&pastry, for instance we cannot reuse check_underflow() helper as
e->target_offset differs in the compat case.
Other problem is that it makes auditing for validation errors harder; two
places need to be checked and kept in sync.
At a high level 32 bit compat works like this:
1- initial pass over blob:
validate match/entry offsets, bounds checking
lookup all matches and targets
do bookkeeping wrt. size delta of 32/64bit structures
assign match/target.u.kernel pointer (points at kernel
implementation, needed to access ->compatsize etc.)
2- allocate memory according to the total bookkeeping size to
contain the translated ruleset
3- second pass over original blob:
for each entry, copy the 32bit representation to the newly allocated
memory. This also does any special match translations (e.g.
adjust 32bit to 64bit longs, etc).
4- check if ruleset is free of loops (chase all jumps)
5-first pass over translated blob:
call the checkentry function of all matches and targets.
The alternative implemented by this patch is to drop steps 3&4 from the
compat process, the translation is changed into an intermediate step
rather than a full 1:1 translate_table replacement.
In the 2nd pass (step #3), change the 64bit ruleset back to a kernel
representation, i.e. put() the kernel pointer and restore ->u.user.name .
This gets us a 64bit ruleset that is in the format generated by a 64bit
iptables userspace -- we can then use translate_table() to get the
'native' sanity checks.
This has two drawbacks:
1. we re-validate all the match and target entry structure sizes even
though compat translation is supposed to never generate bogus offsets.
2. we put and then re-lookup each match and target.
THe upside is that we get all sanity tests and ruleset validations
provided by the normal path and can remove some duplicated compat code.
iptables-restore time of autogenerated ruleset with 300k chains of form
-A CHAIN0001 -m limit --limit 1/s -j CHAIN0002
-A CHAIN0002 -m limit --limit 1/s -j CHAIN0003
shows no noticeable differences in restore times:
old: 0m30.796s
new: 0m31.521s
64bit: 0m25.674s
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2016-04-01 06:17:34 -06:00
|
|
|
return ret;
|
|
|
|
out_unlock:
|
|
|
|
xt_compat_flush_offsets(AF_INET);
|
|
|
|
xt_compat_unlock(AF_INET);
|
2016-04-01 06:17:30 -06:00
|
|
|
xt_entry_foreach(iter0, entry0, compatr->size) {
|
2010-02-24 10:33:43 -07:00
|
|
|
if (j-- == 0)
|
2010-02-24 10:32:59 -07:00
|
|
|
break;
|
2010-02-24 10:33:43 -07:00
|
|
|
compat_release_entry(iter0);
|
|
|
|
}
|
2005-04-16 16:20:36 -06:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2008-01-31 05:03:03 -07:00
|
|
|
compat_do_replace(struct net *net, void __user *user, unsigned int len)
|
2005-04-16 16:20:36 -06:00
|
|
|
{
|
|
|
|
int ret;
|
2006-04-01 03:25:19 -07:00
|
|
|
struct compat_ipt_replace tmp;
|
|
|
|
struct xt_table_info *newinfo;
|
|
|
|
void *loc_cpu_entry;
|
2010-02-24 10:32:59 -07:00
|
|
|
struct ipt_entry *iter;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
|
|
|
if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
|
|
|
|
return -EFAULT;
|
|
|
|
|
2006-02-04 03:16:56 -07:00
|
|
|
/* overflow check */
|
|
|
|
if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
|
|
|
|
return -ENOMEM;
|
2015-05-19 18:55:17 -06:00
|
|
|
if (tmp.num_counters == 0)
|
|
|
|
return -EINVAL;
|
|
|
|
|
2011-03-15 06:36:05 -06:00
|
|
|
tmp.name[sizeof(tmp.name)-1] = 0;
|
2006-02-04 03:16:56 -07:00
|
|
|
|
[NETFILTER] x_tables: Abstraction layer for {ip,ip6,arp}_tables
This monster-patch tries to do the best job for unifying the data
structures and backend interfaces for the three evil clones ip_tables,
ip6_tables and arp_tables. In an ideal world we would never have
allowed this kind of copy+paste programming... but well, our world
isn't (yet?) ideal.
o introduce a new x_tables module
o {ip,arp,ip6}_tables depend on this x_tables module
o registration functions for tables, matches and targets are only
wrappers around x_tables provided functions
o all matches/targets that are used from ip_tables and ip6_tables
are now implemented as xt_FOOBAR.c files and provide module aliases
to ipt_FOOBAR and ip6t_FOOBAR
o header files for xt_matches are in include/linux/netfilter/,
include/linux/netfilter_{ipv4,ipv6} contains compatibility wrappers
around the xt_FOOBAR.h headers
Based on this patchset we're going to further unify the code,
gradually getting rid of all the layer 3 specific assumptions.
Signed-off-by: Harald Welte <laforge@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2006-01-12 14:30:04 -07:00
|
|
|
newinfo = xt_alloc_table_info(tmp.size);
|
2005-04-16 16:20:36 -06:00
|
|
|
if (!newinfo)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2015-06-10 17:34:55 -06:00
|
|
|
loc_cpu_entry = newinfo->entries;
|
2005-12-14 00:13:48 -07:00
|
|
|
if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
|
2005-04-16 16:20:36 -06:00
|
|
|
tmp.size) != 0) {
|
|
|
|
ret = -EFAULT;
|
|
|
|
goto free_newinfo;
|
|
|
|
}
|
|
|
|
|
2016-04-01 06:17:30 -06:00
|
|
|
ret = translate_compat_table(net, &newinfo, &loc_cpu_entry, &tmp);
|
2006-04-01 03:25:19 -07:00
|
|
|
if (ret != 0)
|
2005-04-16 16:20:36 -06:00
|
|
|
goto free_newinfo;
|
|
|
|
|
2008-01-31 05:03:03 -07:00
|
|
|
ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
|
2007-12-17 22:46:15 -07:00
|
|
|
tmp.num_counters, compat_ptr(tmp.counters));
|
2006-04-01 03:25:19 -07:00
|
|
|
if (ret)
|
|
|
|
goto free_newinfo_untrans;
|
|
|
|
return 0;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2006-04-01 03:25:19 -07:00
|
|
|
free_newinfo_untrans:
|
2010-02-24 10:32:59 -07:00
|
|
|
xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
|
2010-02-24 10:33:43 -07:00
|
|
|
cleanup_entry(iter, net);
|
2006-04-01 03:25:19 -07:00
|
|
|
free_newinfo:
|
|
|
|
xt_free_table_info(newinfo);
|
|
|
|
return ret;
|
|
|
|
}
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2006-04-01 03:25:19 -07:00
|
|
|
static int
|
|
|
|
compat_do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user,
|
2007-12-17 22:46:15 -07:00
|
|
|
unsigned int len)
|
2006-04-01 03:25:19 -07:00
|
|
|
{
|
|
|
|
int ret;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
net: Allow userns root to control ipv4
Allow an unpriviled user who has created a user namespace, and then
created a network namespace to effectively use the new network
namespace, by reducing capable(CAP_NET_ADMIN) and
capable(CAP_NET_RAW) calls to be ns_capable(net->user_ns,
CAP_NET_ADMIN), or capable(net->user_ns, CAP_NET_RAW) calls.
Settings that merely control a single network device are allowed.
Either the network device is a logical network device where
restrictions make no difference or the network device is hardware NIC
that has been explicity moved from the initial network namespace.
In general policy and network stack state changes are allowed
while resource control is left unchanged.
Allow creating raw sockets.
Allow the SIOCSARP ioctl to control the arp cache.
Allow the SIOCSIFFLAG ioctl to allow setting network device flags.
Allow the SIOCSIFADDR ioctl to allow setting a netdevice ipv4 address.
Allow the SIOCSIFBRDADDR ioctl to allow setting a netdevice ipv4 broadcast address.
Allow the SIOCSIFDSTADDR ioctl to allow setting a netdevice ipv4 destination address.
Allow the SIOCSIFNETMASK ioctl to allow setting a netdevice ipv4 netmask.
Allow the SIOCADDRT and SIOCDELRT ioctls to allow adding and deleting ipv4 routes.
Allow the SIOCADDTUNNEL, SIOCCHGTUNNEL and SIOCDELTUNNEL ioctls for
adding, changing and deleting gre tunnels.
Allow the SIOCADDTUNNEL, SIOCCHGTUNNEL and SIOCDELTUNNEL ioctls for
adding, changing and deleting ipip tunnels.
Allow the SIOCADDTUNNEL, SIOCCHGTUNNEL and SIOCDELTUNNEL ioctls for
adding, changing and deleting ipsec virtual tunnel interfaces.
Allow setting the MRT_INIT, MRT_DONE, MRT_ADD_VIF, MRT_DEL_VIF, MRT_ADD_MFC,
MRT_DEL_MFC, MRT_ASSERT, MRT_PIM, MRT_TABLE socket options on multicast routing
sockets.
Allow setting and receiving IPOPT_CIPSO, IP_OPT_SEC, IP_OPT_SID and
arbitrary ip options.
Allow setting IP_SEC_POLICY/IP_XFRM_POLICY ipv4 socket option.
Allow setting the IP_TRANSPARENT ipv4 socket option.
Allow setting the TCP_REPAIR socket option.
Allow setting the TCP_CONGESTION socket option.
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2012-11-15 20:03:05 -07:00
|
|
|
if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
|
2006-04-01 03:25:19 -07:00
|
|
|
return -EPERM;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2006-04-01 03:25:19 -07:00
|
|
|
switch (cmd) {
|
|
|
|
case IPT_SO_SET_REPLACE:
|
2008-03-25 11:26:21 -06:00
|
|
|
ret = compat_do_replace(sock_net(sk), user, len);
|
2006-04-01 03:25:19 -07:00
|
|
|
break;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2006-04-01 03:25:19 -07:00
|
|
|
case IPT_SO_SET_ADD_COUNTERS:
|
2008-03-25 11:26:21 -06:00
|
|
|
ret = do_add_counters(sock_net(sk), user, len, 1);
|
2006-04-01 03:25:19 -07:00
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
ret = -EINVAL;
|
|
|
|
}
|
2005-04-16 16:20:36 -06:00
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2007-12-17 22:46:15 -07:00
|
|
|
struct compat_ipt_get_entries {
|
2010-10-13 07:56:56 -06:00
|
|
|
char name[XT_TABLE_MAXNAMELEN];
|
2006-04-01 03:25:19 -07:00
|
|
|
compat_uint_t size;
|
|
|
|
struct compat_ipt_entry entrytable[0];
|
|
|
|
};
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2007-12-17 22:46:15 -07:00
|
|
|
static int
|
|
|
|
compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
|
|
|
|
void __user *userptr)
|
2006-04-01 03:25:19 -07:00
|
|
|
{
|
|
|
|
struct xt_counters *counters;
|
2008-04-14 03:15:35 -06:00
|
|
|
const struct xt_table_info *private = table->private;
|
2006-04-01 03:25:19 -07:00
|
|
|
void __user *pos;
|
|
|
|
unsigned int size;
|
|
|
|
int ret = 0;
|
2007-12-12 11:35:16 -07:00
|
|
|
unsigned int i = 0;
|
2010-02-24 10:32:59 -07:00
|
|
|
struct ipt_entry *iter;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2006-04-01 03:25:19 -07:00
|
|
|
counters = alloc_counters(table);
|
|
|
|
if (IS_ERR(counters))
|
|
|
|
return PTR_ERR(counters);
|
|
|
|
|
|
|
|
pos = userptr;
|
|
|
|
size = total_size;
|
2015-06-10 17:34:55 -06:00
|
|
|
xt_entry_foreach(iter, private->entries, total_size) {
|
2010-02-24 10:32:59 -07:00
|
|
|
ret = compat_copy_entry_to_user(iter, &pos,
|
2010-02-26 09:53:31 -07:00
|
|
|
&size, counters, i++);
|
2010-02-24 10:32:59 -07:00
|
|
|
if (ret != 0)
|
|
|
|
break;
|
|
|
|
}
|
2006-04-01 03:25:19 -07:00
|
|
|
|
|
|
|
vfree(counters);
|
|
|
|
return ret;
|
2005-04-16 16:20:36 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2008-01-31 05:03:03 -07:00
|
|
|
compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
|
|
|
|
int *len)
|
2005-04-16 16:20:36 -06:00
|
|
|
{
|
2006-04-01 03:25:19 -07:00
|
|
|
int ret;
|
|
|
|
struct compat_ipt_get_entries get;
|
2007-02-07 16:12:33 -07:00
|
|
|
struct xt_table *t;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2016-05-03 05:54:23 -06:00
|
|
|
if (*len < sizeof(get))
|
2005-04-16 16:20:36 -06:00
|
|
|
return -EINVAL;
|
|
|
|
|
2006-04-01 03:25:19 -07:00
|
|
|
if (copy_from_user(&get, uptr, sizeof(get)) != 0)
|
|
|
|
return -EFAULT;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2016-05-03 05:54:23 -06:00
|
|
|
if (*len != sizeof(struct compat_ipt_get_entries) + get.size)
|
2006-04-01 03:25:19 -07:00
|
|
|
return -EINVAL;
|
2016-05-03 05:54:23 -06:00
|
|
|
|
2016-03-24 14:29:53 -06:00
|
|
|
get.name[sizeof(get.name) - 1] = '\0';
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2006-04-01 03:25:19 -07:00
|
|
|
xt_compat_lock(AF_INET);
|
2008-01-31 05:03:03 -07:00
|
|
|
t = xt_find_table_lock(net, AF_INET, get.name);
|
2016-11-11 05:32:38 -07:00
|
|
|
if (t) {
|
2008-04-14 03:15:35 -06:00
|
|
|
const struct xt_table_info *private = t->private;
|
2006-04-01 03:25:19 -07:00
|
|
|
struct xt_table_info info;
|
|
|
|
ret = compat_table_info(private, &info);
|
2016-05-03 05:54:23 -06:00
|
|
|
if (!ret && get.size == info.size)
|
2006-04-01 03:25:19 -07:00
|
|
|
ret = compat_copy_entries_to_user(private->size,
|
2007-12-17 22:46:15 -07:00
|
|
|
t, uptr->entrytable);
|
2016-05-03 05:54:23 -06:00
|
|
|
else if (!ret)
|
2008-04-14 03:15:45 -06:00
|
|
|
ret = -EAGAIN;
|
2016-05-03 05:54:23 -06:00
|
|
|
|
2007-12-17 22:47:48 -07:00
|
|
|
xt_compat_flush_offsets(AF_INET);
|
2006-04-01 03:25:19 -07:00
|
|
|
module_put(t->me);
|
|
|
|
xt_table_unlock(t);
|
|
|
|
} else
|
2016-11-11 05:32:38 -07:00
|
|
|
ret = -ENOENT;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2006-04-01 03:25:19 -07:00
|
|
|
xt_compat_unlock(AF_INET);
|
|
|
|
return ret;
|
|
|
|
}
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2006-09-20 13:05:08 -06:00
|
|
|
static int do_ipt_get_ctl(struct sock *, int, void __user *, int *);
|
|
|
|
|
2006-04-01 03:25:19 -07:00
|
|
|
static int
|
|
|
|
compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
|
|
|
|
{
|
|
|
|
int ret;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
net: Allow userns root to control ipv4
Allow an unpriviled user who has created a user namespace, and then
created a network namespace to effectively use the new network
namespace, by reducing capable(CAP_NET_ADMIN) and
capable(CAP_NET_RAW) calls to be ns_capable(net->user_ns,
CAP_NET_ADMIN), or capable(net->user_ns, CAP_NET_RAW) calls.
Settings that merely control a single network device are allowed.
Either the network device is a logical network device where
restrictions make no difference or the network device is hardware NIC
that has been explicity moved from the initial network namespace.
In general policy and network stack state changes are allowed
while resource control is left unchanged.
Allow creating raw sockets.
Allow the SIOCSARP ioctl to control the arp cache.
Allow the SIOCSIFFLAG ioctl to allow setting network device flags.
Allow the SIOCSIFADDR ioctl to allow setting a netdevice ipv4 address.
Allow the SIOCSIFBRDADDR ioctl to allow setting a netdevice ipv4 broadcast address.
Allow the SIOCSIFDSTADDR ioctl to allow setting a netdevice ipv4 destination address.
Allow the SIOCSIFNETMASK ioctl to allow setting a netdevice ipv4 netmask.
Allow the SIOCADDRT and SIOCDELRT ioctls to allow adding and deleting ipv4 routes.
Allow the SIOCADDTUNNEL, SIOCCHGTUNNEL and SIOCDELTUNNEL ioctls for
adding, changing and deleting gre tunnels.
Allow the SIOCADDTUNNEL, SIOCCHGTUNNEL and SIOCDELTUNNEL ioctls for
adding, changing and deleting ipip tunnels.
Allow the SIOCADDTUNNEL, SIOCCHGTUNNEL and SIOCDELTUNNEL ioctls for
adding, changing and deleting ipsec virtual tunnel interfaces.
Allow setting the MRT_INIT, MRT_DONE, MRT_ADD_VIF, MRT_DEL_VIF, MRT_ADD_MFC,
MRT_DEL_MFC, MRT_ASSERT, MRT_PIM, MRT_TABLE socket options on multicast routing
sockets.
Allow setting and receiving IPOPT_CIPSO, IP_OPT_SEC, IP_OPT_SID and
arbitrary ip options.
Allow setting IP_SEC_POLICY/IP_XFRM_POLICY ipv4 socket option.
Allow setting the IP_TRANSPARENT ipv4 socket option.
Allow setting the TCP_REPAIR socket option.
Allow setting the TCP_CONGESTION socket option.
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2012-11-15 20:03:05 -07:00
|
|
|
if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
|
2006-10-20 01:21:10 -06:00
|
|
|
return -EPERM;
|
|
|
|
|
2006-04-01 03:25:19 -07:00
|
|
|
switch (cmd) {
|
|
|
|
case IPT_SO_GET_INFO:
|
2008-03-25 11:26:21 -06:00
|
|
|
ret = get_info(sock_net(sk), user, len, 1);
|
2006-04-01 03:25:19 -07:00
|
|
|
break;
|
|
|
|
case IPT_SO_GET_ENTRIES:
|
2008-03-25 11:26:21 -06:00
|
|
|
ret = compat_get_entries(sock_net(sk), user, len);
|
2006-04-01 03:25:19 -07:00
|
|
|
break;
|
|
|
|
default:
|
2006-09-20 13:05:08 -06:00
|
|
|
ret = do_ipt_get_ctl(sk, cmd, user, len);
|
2006-04-01 03:25:19 -07:00
|
|
|
}
|
2005-04-16 16:20:36 -06:00
|
|
|
return ret;
|
|
|
|
}
|
2006-04-01 03:25:19 -07:00
|
|
|
#endif
|
2005-04-16 16:20:36 -06:00
|
|
|
|
|
|
|
static int
|
2007-12-17 22:52:00 -07:00
|
|
|
do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
|
2005-04-16 16:20:36 -06:00
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
net: Allow userns root to control ipv4
Allow an unpriviled user who has created a user namespace, and then
created a network namespace to effectively use the new network
namespace, by reducing capable(CAP_NET_ADMIN) and
capable(CAP_NET_RAW) calls to be ns_capable(net->user_ns,
CAP_NET_ADMIN), or capable(net->user_ns, CAP_NET_RAW) calls.
Settings that merely control a single network device are allowed.
Either the network device is a logical network device where
restrictions make no difference or the network device is hardware NIC
that has been explicity moved from the initial network namespace.
In general policy and network stack state changes are allowed
while resource control is left unchanged.
Allow creating raw sockets.
Allow the SIOCSARP ioctl to control the arp cache.
Allow the SIOCSIFFLAG ioctl to allow setting network device flags.
Allow the SIOCSIFADDR ioctl to allow setting a netdevice ipv4 address.
Allow the SIOCSIFBRDADDR ioctl to allow setting a netdevice ipv4 broadcast address.
Allow the SIOCSIFDSTADDR ioctl to allow setting a netdevice ipv4 destination address.
Allow the SIOCSIFNETMASK ioctl to allow setting a netdevice ipv4 netmask.
Allow the SIOCADDRT and SIOCDELRT ioctls to allow adding and deleting ipv4 routes.
Allow the SIOCADDTUNNEL, SIOCCHGTUNNEL and SIOCDELTUNNEL ioctls for
adding, changing and deleting gre tunnels.
Allow the SIOCADDTUNNEL, SIOCCHGTUNNEL and SIOCDELTUNNEL ioctls for
adding, changing and deleting ipip tunnels.
Allow the SIOCADDTUNNEL, SIOCCHGTUNNEL and SIOCDELTUNNEL ioctls for
adding, changing and deleting ipsec virtual tunnel interfaces.
Allow setting the MRT_INIT, MRT_DONE, MRT_ADD_VIF, MRT_DEL_VIF, MRT_ADD_MFC,
MRT_DEL_MFC, MRT_ASSERT, MRT_PIM, MRT_TABLE socket options on multicast routing
sockets.
Allow setting and receiving IPOPT_CIPSO, IP_OPT_SEC, IP_OPT_SID and
arbitrary ip options.
Allow setting IP_SEC_POLICY/IP_XFRM_POLICY ipv4 socket option.
Allow setting the IP_TRANSPARENT ipv4 socket option.
Allow setting the TCP_REPAIR socket option.
Allow setting the TCP_CONGESTION socket option.
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2012-11-15 20:03:05 -07:00
|
|
|
if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
|
2005-04-16 16:20:36 -06:00
|
|
|
return -EPERM;
|
|
|
|
|
|
|
|
switch (cmd) {
|
|
|
|
case IPT_SO_SET_REPLACE:
|
2008-03-25 11:26:21 -06:00
|
|
|
ret = do_replace(sock_net(sk), user, len);
|
2005-04-16 16:20:36 -06:00
|
|
|
break;
|
|
|
|
|
|
|
|
case IPT_SO_SET_ADD_COUNTERS:
|
2008-03-25 11:26:21 -06:00
|
|
|
ret = do_add_counters(sock_net(sk), user, len, 0);
|
2005-04-16 16:20:36 -06:00
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
ret = -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
net: Allow userns root to control ipv4
Allow an unpriviled user who has created a user namespace, and then
created a network namespace to effectively use the new network
namespace, by reducing capable(CAP_NET_ADMIN) and
capable(CAP_NET_RAW) calls to be ns_capable(net->user_ns,
CAP_NET_ADMIN), or capable(net->user_ns, CAP_NET_RAW) calls.
Settings that merely control a single network device are allowed.
Either the network device is a logical network device where
restrictions make no difference or the network device is hardware NIC
that has been explicity moved from the initial network namespace.
In general policy and network stack state changes are allowed
while resource control is left unchanged.
Allow creating raw sockets.
Allow the SIOCSARP ioctl to control the arp cache.
Allow the SIOCSIFFLAG ioctl to allow setting network device flags.
Allow the SIOCSIFADDR ioctl to allow setting a netdevice ipv4 address.
Allow the SIOCSIFBRDADDR ioctl to allow setting a netdevice ipv4 broadcast address.
Allow the SIOCSIFDSTADDR ioctl to allow setting a netdevice ipv4 destination address.
Allow the SIOCSIFNETMASK ioctl to allow setting a netdevice ipv4 netmask.
Allow the SIOCADDRT and SIOCDELRT ioctls to allow adding and deleting ipv4 routes.
Allow the SIOCADDTUNNEL, SIOCCHGTUNNEL and SIOCDELTUNNEL ioctls for
adding, changing and deleting gre tunnels.
Allow the SIOCADDTUNNEL, SIOCCHGTUNNEL and SIOCDELTUNNEL ioctls for
adding, changing and deleting ipip tunnels.
Allow the SIOCADDTUNNEL, SIOCCHGTUNNEL and SIOCDELTUNNEL ioctls for
adding, changing and deleting ipsec virtual tunnel interfaces.
Allow setting the MRT_INIT, MRT_DONE, MRT_ADD_VIF, MRT_DEL_VIF, MRT_ADD_MFC,
MRT_DEL_MFC, MRT_ASSERT, MRT_PIM, MRT_TABLE socket options on multicast routing
sockets.
Allow setting and receiving IPOPT_CIPSO, IP_OPT_SEC, IP_OPT_SID and
arbitrary ip options.
Allow setting IP_SEC_POLICY/IP_XFRM_POLICY ipv4 socket option.
Allow setting the IP_TRANSPARENT ipv4 socket option.
Allow setting the TCP_REPAIR socket option.
Allow setting the TCP_CONGESTION socket option.
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2012-11-15 20:03:05 -07:00
|
|
|
if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
|
2005-04-16 16:20:36 -06:00
|
|
|
return -EPERM;
|
|
|
|
|
|
|
|
switch (cmd) {
|
2006-04-01 03:25:19 -07:00
|
|
|
case IPT_SO_GET_INFO:
|
2008-03-25 11:26:21 -06:00
|
|
|
ret = get_info(sock_net(sk), user, len, 0);
|
2006-04-01 03:25:19 -07:00
|
|
|
break;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2006-04-01 03:25:19 -07:00
|
|
|
case IPT_SO_GET_ENTRIES:
|
2008-03-25 11:26:21 -06:00
|
|
|
ret = get_entries(sock_net(sk), user, len);
|
2005-04-16 16:20:36 -06:00
|
|
|
break;
|
|
|
|
|
|
|
|
case IPT_SO_GET_REVISION_MATCH:
|
|
|
|
case IPT_SO_GET_REVISION_TARGET: {
|
2010-10-13 07:56:56 -06:00
|
|
|
struct xt_get_revision rev;
|
[NETFILTER] x_tables: Abstraction layer for {ip,ip6,arp}_tables
This monster-patch tries to do the best job for unifying the data
structures and backend interfaces for the three evil clones ip_tables,
ip6_tables and arp_tables. In an ideal world we would never have
allowed this kind of copy+paste programming... but well, our world
isn't (yet?) ideal.
o introduce a new x_tables module
o {ip,arp,ip6}_tables depend on this x_tables module
o registration functions for tables, matches and targets are only
wrappers around x_tables provided functions
o all matches/targets that are used from ip_tables and ip6_tables
are now implemented as xt_FOOBAR.c files and provide module aliases
to ipt_FOOBAR and ip6t_FOOBAR
o header files for xt_matches are in include/linux/netfilter/,
include/linux/netfilter_{ipv4,ipv6} contains compatibility wrappers
around the xt_FOOBAR.h headers
Based on this patchset we're going to further unify the code,
gradually getting rid of all the layer 3 specific assumptions.
Signed-off-by: Harald Welte <laforge@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2006-01-12 14:30:04 -07:00
|
|
|
int target;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
|
|
|
if (*len != sizeof(rev)) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
|
|
|
|
ret = -EFAULT;
|
|
|
|
break;
|
|
|
|
}
|
2011-03-15 06:36:05 -06:00
|
|
|
rev.name[sizeof(rev.name)-1] = 0;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
|
|
|
if (cmd == IPT_SO_GET_REVISION_TARGET)
|
[NETFILTER] x_tables: Abstraction layer for {ip,ip6,arp}_tables
This monster-patch tries to do the best job for unifying the data
structures and backend interfaces for the three evil clones ip_tables,
ip6_tables and arp_tables. In an ideal world we would never have
allowed this kind of copy+paste programming... but well, our world
isn't (yet?) ideal.
o introduce a new x_tables module
o {ip,arp,ip6}_tables depend on this x_tables module
o registration functions for tables, matches and targets are only
wrappers around x_tables provided functions
o all matches/targets that are used from ip_tables and ip6_tables
are now implemented as xt_FOOBAR.c files and provide module aliases
to ipt_FOOBAR and ip6t_FOOBAR
o header files for xt_matches are in include/linux/netfilter/,
include/linux/netfilter_{ipv4,ipv6} contains compatibility wrappers
around the xt_FOOBAR.h headers
Based on this patchset we're going to further unify the code,
gradually getting rid of all the layer 3 specific assumptions.
Signed-off-by: Harald Welte <laforge@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2006-01-12 14:30:04 -07:00
|
|
|
target = 1;
|
2005-04-16 16:20:36 -06:00
|
|
|
else
|
[NETFILTER] x_tables: Abstraction layer for {ip,ip6,arp}_tables
This monster-patch tries to do the best job for unifying the data
structures and backend interfaces for the three evil clones ip_tables,
ip6_tables and arp_tables. In an ideal world we would never have
allowed this kind of copy+paste programming... but well, our world
isn't (yet?) ideal.
o introduce a new x_tables module
o {ip,arp,ip6}_tables depend on this x_tables module
o registration functions for tables, matches and targets are only
wrappers around x_tables provided functions
o all matches/targets that are used from ip_tables and ip6_tables
are now implemented as xt_FOOBAR.c files and provide module aliases
to ipt_FOOBAR and ip6t_FOOBAR
o header files for xt_matches are in include/linux/netfilter/,
include/linux/netfilter_{ipv4,ipv6} contains compatibility wrappers
around the xt_FOOBAR.h headers
Based on this patchset we're going to further unify the code,
gradually getting rid of all the layer 3 specific assumptions.
Signed-off-by: Harald Welte <laforge@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2006-01-12 14:30:04 -07:00
|
|
|
target = 0;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
[NETFILTER] x_tables: Abstraction layer for {ip,ip6,arp}_tables
This monster-patch tries to do the best job for unifying the data
structures and backend interfaces for the three evil clones ip_tables,
ip6_tables and arp_tables. In an ideal world we would never have
allowed this kind of copy+paste programming... but well, our world
isn't (yet?) ideal.
o introduce a new x_tables module
o {ip,arp,ip6}_tables depend on this x_tables module
o registration functions for tables, matches and targets are only
wrappers around x_tables provided functions
o all matches/targets that are used from ip_tables and ip6_tables
are now implemented as xt_FOOBAR.c files and provide module aliases
to ipt_FOOBAR and ip6t_FOOBAR
o header files for xt_matches are in include/linux/netfilter/,
include/linux/netfilter_{ipv4,ipv6} contains compatibility wrappers
around the xt_FOOBAR.h headers
Based on this patchset we're going to further unify the code,
gradually getting rid of all the layer 3 specific assumptions.
Signed-off-by: Harald Welte <laforge@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2006-01-12 14:30:04 -07:00
|
|
|
try_then_request_module(xt_find_revision(AF_INET, rev.name,
|
|
|
|
rev.revision,
|
|
|
|
target, &ret),
|
2005-04-16 16:20:36 -06:00
|
|
|
"ipt_%s", rev.name);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
default:
|
|
|
|
ret = -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
netfilter: xtables: don't hook tables by default
delay hook registration until the table is being requested inside a
namespace.
Historically, a particular table (iptables mangle, ip6tables filter, etc)
was registered on module load.
When netns support was added to iptables only the ip/ip6tables ruleset was
made namespace aware, not the actual hook points.
This means f.e. that when ipt_filter table/module is loaded on a system,
then each namespace on that system has an (empty) iptables filter ruleset.
In other words, if a namespace sends a packet, such skb is 'caught' by
netfilter machinery and fed to hooking points for that table (i.e. INPUT,
FORWARD, etc).
Thanks to Eric Biederman, hooks are no longer global, but per namespace.
This means that we can avoid allocation of empty ruleset in a namespace and
defer hook registration until we need the functionality.
We register a tables hook entry points ONLY in the initial namespace.
When an iptables get/setockopt is issued inside a given namespace, we check
if the table is found in the per-namespace list.
If not, we attempt to find it in the initial namespace, and, if found,
create an empty default table in the requesting namespace and register the
needed hooks.
Hook points are destroyed only once namespace is deleted, there is no
'usage count' (it makes no sense since there is no 'remove table' operation
in xtables api).
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2016-02-25 02:08:36 -07:00
|
|
|
static void __ipt_unregister_table(struct net *net, struct xt_table *table)
|
|
|
|
{
|
|
|
|
struct xt_table_info *private;
|
|
|
|
void *loc_cpu_entry;
|
|
|
|
struct module *table_owner = table->me;
|
|
|
|
struct ipt_entry *iter;
|
|
|
|
|
|
|
|
private = xt_unregister_table(table);
|
|
|
|
|
|
|
|
/* Decrease module usage counts and free resources */
|
|
|
|
loc_cpu_entry = private->entries;
|
|
|
|
xt_entry_foreach(iter, loc_cpu_entry, private->size)
|
|
|
|
cleanup_entry(iter, net);
|
|
|
|
if (private->number > private->initial_entries)
|
|
|
|
module_put(table_owner);
|
|
|
|
xt_free_table_info(private);
|
|
|
|
}
|
|
|
|
|
2016-02-25 02:08:35 -07:00
|
|
|
int ipt_register_table(struct net *net, const struct xt_table *table,
|
|
|
|
const struct ipt_replace *repl,
|
|
|
|
const struct nf_hook_ops *ops, struct xt_table **res)
|
2005-04-16 16:20:36 -06:00
|
|
|
{
|
|
|
|
int ret;
|
[NETFILTER] x_tables: Abstraction layer for {ip,ip6,arp}_tables
This monster-patch tries to do the best job for unifying the data
structures and backend interfaces for the three evil clones ip_tables,
ip6_tables and arp_tables. In an ideal world we would never have
allowed this kind of copy+paste programming... but well, our world
isn't (yet?) ideal.
o introduce a new x_tables module
o {ip,arp,ip6}_tables depend on this x_tables module
o registration functions for tables, matches and targets are only
wrappers around x_tables provided functions
o all matches/targets that are used from ip_tables and ip6_tables
are now implemented as xt_FOOBAR.c files and provide module aliases
to ipt_FOOBAR and ip6t_FOOBAR
o header files for xt_matches are in include/linux/netfilter/,
include/linux/netfilter_{ipv4,ipv6} contains compatibility wrappers
around the xt_FOOBAR.h headers
Based on this patchset we're going to further unify the code,
gradually getting rid of all the layer 3 specific assumptions.
Signed-off-by: Harald Welte <laforge@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2006-01-12 14:30:04 -07:00
|
|
|
struct xt_table_info *newinfo;
|
2010-04-19 08:05:10 -06:00
|
|
|
struct xt_table_info bootstrap = {0};
|
2005-12-14 00:13:48 -07:00
|
|
|
void *loc_cpu_entry;
|
2008-01-31 05:01:49 -07:00
|
|
|
struct xt_table *new_table;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
[NETFILTER] x_tables: Abstraction layer for {ip,ip6,arp}_tables
This monster-patch tries to do the best job for unifying the data
structures and backend interfaces for the three evil clones ip_tables,
ip6_tables and arp_tables. In an ideal world we would never have
allowed this kind of copy+paste programming... but well, our world
isn't (yet?) ideal.
o introduce a new x_tables module
o {ip,arp,ip6}_tables depend on this x_tables module
o registration functions for tables, matches and targets are only
wrappers around x_tables provided functions
o all matches/targets that are used from ip_tables and ip6_tables
are now implemented as xt_FOOBAR.c files and provide module aliases
to ipt_FOOBAR and ip6t_FOOBAR
o header files for xt_matches are in include/linux/netfilter/,
include/linux/netfilter_{ipv4,ipv6} contains compatibility wrappers
around the xt_FOOBAR.h headers
Based on this patchset we're going to further unify the code,
gradually getting rid of all the layer 3 specific assumptions.
Signed-off-by: Harald Welte <laforge@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2006-01-12 14:30:04 -07:00
|
|
|
newinfo = xt_alloc_table_info(repl->size);
|
2016-02-25 02:08:35 -07:00
|
|
|
if (!newinfo)
|
|
|
|
return -ENOMEM;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2015-06-10 17:34:55 -06:00
|
|
|
loc_cpu_entry = newinfo->entries;
|
2005-12-14 00:13:48 -07:00
|
|
|
memcpy(loc_cpu_entry, repl->entries, repl->size);
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2010-02-24 10:36:04 -07:00
|
|
|
ret = translate_table(net, newinfo, loc_cpu_entry, repl);
|
2008-01-31 05:02:44 -07:00
|
|
|
if (ret != 0)
|
|
|
|
goto out_free;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2008-01-31 05:02:44 -07:00
|
|
|
new_table = xt_register_table(net, table, &bootstrap, newinfo);
|
2008-01-31 05:01:49 -07:00
|
|
|
if (IS_ERR(new_table)) {
|
2008-01-31 05:02:44 -07:00
|
|
|
ret = PTR_ERR(new_table);
|
|
|
|
goto out_free;
|
2005-04-16 16:20:36 -06:00
|
|
|
}
|
|
|
|
|
netfilter: xtables: don't hook tables by default
delay hook registration until the table is being requested inside a
namespace.
Historically, a particular table (iptables mangle, ip6tables filter, etc)
was registered on module load.
When netns support was added to iptables only the ip/ip6tables ruleset was
made namespace aware, not the actual hook points.
This means f.e. that when ipt_filter table/module is loaded on a system,
then each namespace on that system has an (empty) iptables filter ruleset.
In other words, if a namespace sends a packet, such skb is 'caught' by
netfilter machinery and fed to hooking points for that table (i.e. INPUT,
FORWARD, etc).
Thanks to Eric Biederman, hooks are no longer global, but per namespace.
This means that we can avoid allocation of empty ruleset in a namespace and
defer hook registration until we need the functionality.
We register a tables hook entry points ONLY in the initial namespace.
When an iptables get/setockopt is issued inside a given namespace, we check
if the table is found in the per-namespace list.
If not, we attempt to find it in the initial namespace, and, if found,
create an empty default table in the requesting namespace and register the
needed hooks.
Hook points are destroyed only once namespace is deleted, there is no
'usage count' (it makes no sense since there is no 'remove table' operation
in xtables api).
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2016-02-25 02:08:36 -07:00
|
|
|
/* set res now, will see skbs right after nf_register_net_hooks */
|
2016-02-25 02:08:35 -07:00
|
|
|
WRITE_ONCE(*res, new_table);
|
netfilter: xtables: don't hook tables by default
delay hook registration until the table is being requested inside a
namespace.
Historically, a particular table (iptables mangle, ip6tables filter, etc)
was registered on module load.
When netns support was added to iptables only the ip/ip6tables ruleset was
made namespace aware, not the actual hook points.
This means f.e. that when ipt_filter table/module is loaded on a system,
then each namespace on that system has an (empty) iptables filter ruleset.
In other words, if a namespace sends a packet, such skb is 'caught' by
netfilter machinery and fed to hooking points for that table (i.e. INPUT,
FORWARD, etc).
Thanks to Eric Biederman, hooks are no longer global, but per namespace.
This means that we can avoid allocation of empty ruleset in a namespace and
defer hook registration until we need the functionality.
We register a tables hook entry points ONLY in the initial namespace.
When an iptables get/setockopt is issued inside a given namespace, we check
if the table is found in the per-namespace list.
If not, we attempt to find it in the initial namespace, and, if found,
create an empty default table in the requesting namespace and register the
needed hooks.
Hook points are destroyed only once namespace is deleted, there is no
'usage count' (it makes no sense since there is no 'remove table' operation
in xtables api).
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2016-02-25 02:08:36 -07:00
|
|
|
|
|
|
|
ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks));
|
|
|
|
if (ret != 0) {
|
|
|
|
__ipt_unregister_table(net, new_table);
|
|
|
|
*res = NULL;
|
|
|
|
}
|
|
|
|
|
2016-02-25 02:08:35 -07:00
|
|
|
return ret;
|
2008-01-31 05:02:44 -07:00
|
|
|
|
|
|
|
out_free:
|
|
|
|
xt_free_table_info(newinfo);
|
2016-02-25 02:08:35 -07:00
|
|
|
return ret;
|
2005-04-16 16:20:36 -06:00
|
|
|
}
|
|
|
|
|
2016-02-25 02:08:35 -07:00
|
|
|
void ipt_unregister_table(struct net *net, struct xt_table *table,
|
|
|
|
const struct nf_hook_ops *ops)
|
2005-04-16 16:20:36 -06:00
|
|
|
{
|
netfilter: xtables: don't hook tables by default
delay hook registration until the table is being requested inside a
namespace.
Historically, a particular table (iptables mangle, ip6tables filter, etc)
was registered on module load.
When netns support was added to iptables only the ip/ip6tables ruleset was
made namespace aware, not the actual hook points.
This means f.e. that when ipt_filter table/module is loaded on a system,
then each namespace on that system has an (empty) iptables filter ruleset.
In other words, if a namespace sends a packet, such skb is 'caught' by
netfilter machinery and fed to hooking points for that table (i.e. INPUT,
FORWARD, etc).
Thanks to Eric Biederman, hooks are no longer global, but per namespace.
This means that we can avoid allocation of empty ruleset in a namespace and
defer hook registration until we need the functionality.
We register a tables hook entry points ONLY in the initial namespace.
When an iptables get/setockopt is issued inside a given namespace, we check
if the table is found in the per-namespace list.
If not, we attempt to find it in the initial namespace, and, if found,
create an empty default table in the requesting namespace and register the
needed hooks.
Hook points are destroyed only once namespace is deleted, there is no
'usage count' (it makes no sense since there is no 'remove table' operation
in xtables api).
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2016-02-25 02:08:36 -07:00
|
|
|
nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
|
|
|
|
__ipt_unregister_table(net, table);
|
2005-04-16 16:20:36 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Returns 1 if the type and code is matched by the range, 0 otherwise */
|
2007-07-07 23:15:35 -06:00
|
|
|
static inline bool
|
2005-04-16 16:20:36 -06:00
|
|
|
icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
|
|
|
|
u_int8_t type, u_int8_t code,
|
2007-07-07 23:15:35 -06:00
|
|
|
bool invert)
|
2005-04-16 16:20:36 -06:00
|
|
|
{
|
2007-12-17 22:52:00 -07:00
|
|
|
return ((test_type == 0xFF) ||
|
|
|
|
(type == test_type && code >= min_code && code <= max_code))
|
2005-04-16 16:20:36 -06:00
|
|
|
^ invert;
|
|
|
|
}
|
|
|
|
|
2007-07-07 23:15:35 -06:00
|
|
|
static bool
|
2009-07-07 12:42:08 -06:00
|
|
|
icmp_match(const struct sk_buff *skb, struct xt_action_param *par)
|
2005-04-16 16:20:36 -06:00
|
|
|
{
|
2008-04-14 03:15:35 -06:00
|
|
|
const struct icmphdr *ic;
|
|
|
|
struct icmphdr _icmph;
|
2008-10-08 03:35:18 -06:00
|
|
|
const struct ipt_icmp *icmpinfo = par->matchinfo;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
|
|
|
/* Must not be a fragment. */
|
2008-10-08 03:35:18 -06:00
|
|
|
if (par->fragoff != 0)
|
2007-07-07 23:15:35 -06:00
|
|
|
return false;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2008-10-08 03:35:18 -06:00
|
|
|
ic = skb_header_pointer(skb, par->thoff, sizeof(_icmph), &_icmph);
|
2005-04-16 16:20:36 -06:00
|
|
|
if (ic == NULL) {
|
|
|
|
/* We've been asked to examine this packet, and we
|
|
|
|
* can't. Hence, no choice but to drop.
|
|
|
|
*/
|
2009-07-07 12:54:30 -06:00
|
|
|
par->hotdrop = true;
|
2007-07-07 23:15:35 -06:00
|
|
|
return false;
|
2005-04-16 16:20:36 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
return icmp_type_code_match(icmpinfo->type,
|
|
|
|
icmpinfo->code[0],
|
|
|
|
icmpinfo->code[1],
|
|
|
|
ic->type, ic->code,
|
|
|
|
!!(icmpinfo->invflags&IPT_ICMP_INV));
|
|
|
|
}
|
|
|
|
|
2010-03-19 10:16:42 -06:00
|
|
|
static int icmp_checkentry(const struct xt_mtchk_param *par)
|
2005-04-16 16:20:36 -06:00
|
|
|
{
|
2008-10-08 03:35:18 -06:00
|
|
|
const struct ipt_icmp *icmpinfo = par->matchinfo;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2006-03-20 19:01:14 -07:00
|
|
|
/* Must specify no unknown invflags */
|
2010-03-23 09:35:56 -06:00
|
|
|
return (icmpinfo->invflags & ~IPT_ICMP_INV) ? -EINVAL : 0;
|
2005-04-16 16:20:36 -06:00
|
|
|
}
|
|
|
|
|
2009-07-04 04:50:00 -06:00
|
|
|
static struct xt_target ipt_builtin_tg[] __read_mostly = {
|
|
|
|
{
|
2010-10-13 08:28:00 -06:00
|
|
|
.name = XT_STANDARD_TARGET,
|
2009-07-04 04:50:00 -06:00
|
|
|
.targetsize = sizeof(int),
|
|
|
|
.family = NFPROTO_IPV4,
|
2006-04-01 03:25:19 -07:00
|
|
|
#ifdef CONFIG_COMPAT
|
2009-07-04 04:50:00 -06:00
|
|
|
.compatsize = sizeof(compat_int_t),
|
|
|
|
.compat_from_user = compat_standard_from_user,
|
|
|
|
.compat_to_user = compat_standard_to_user,
|
2006-04-01 03:25:19 -07:00
|
|
|
#endif
|
2009-07-04 04:50:00 -06:00
|
|
|
},
|
|
|
|
{
|
2010-10-13 08:28:00 -06:00
|
|
|
.name = XT_ERROR_TARGET,
|
2009-07-04 04:50:00 -06:00
|
|
|
.target = ipt_error,
|
2010-10-13 07:56:56 -06:00
|
|
|
.targetsize = XT_FUNCTION_MAXNAMELEN,
|
2009-07-04 04:50:00 -06:00
|
|
|
.family = NFPROTO_IPV4,
|
|
|
|
},
|
2005-04-16 16:20:36 -06:00
|
|
|
};
|
|
|
|
|
|
|
|
static struct nf_sockopt_ops ipt_sockopts = {
|
|
|
|
.pf = PF_INET,
|
|
|
|
.set_optmin = IPT_BASE_CTL,
|
|
|
|
.set_optmax = IPT_SO_SET_MAX+1,
|
|
|
|
.set = do_ipt_set_ctl,
|
2006-04-01 03:25:19 -07:00
|
|
|
#ifdef CONFIG_COMPAT
|
|
|
|
.compat_set = compat_do_ipt_set_ctl,
|
|
|
|
#endif
|
2005-04-16 16:20:36 -06:00
|
|
|
.get_optmin = IPT_BASE_CTL,
|
|
|
|
.get_optmax = IPT_SO_GET_MAX+1,
|
|
|
|
.get = do_ipt_get_ctl,
|
2006-04-01 03:25:19 -07:00
|
|
|
#ifdef CONFIG_COMPAT
|
|
|
|
.compat_get = compat_do_ipt_get_ctl,
|
|
|
|
#endif
|
[NETFILTER]: Fix/improve deadlock condition on module removal netfilter
So I've had a deadlock reported to me. I've found that the sequence of
events goes like this:
1) process A (modprobe) runs to remove ip_tables.ko
2) process B (iptables-restore) runs and calls setsockopt on a netfilter socket,
increasing the ip_tables socket_ops use count
3) process A acquires a file lock on the file ip_tables.ko, calls remove_module
in the kernel, which in turn executes the ip_tables module cleanup routine,
which calls nf_unregister_sockopt
4) nf_unregister_sockopt, seeing that the use count is non-zero, puts the
calling process into uninterruptible sleep, expecting the process using the
socket option code to wake it up when it exits the kernel
4) the user of the socket option code (process B) in do_ipt_get_ctl, calls
ipt_find_table_lock, which in this case calls request_module to load
ip_tables_nat.ko
5) request_module forks a copy of modprobe (process C) to load the module and
blocks until modprobe exits.
6) Process C. forked by request_module process the dependencies of
ip_tables_nat.ko, of which ip_tables.ko is one.
7) Process C attempts to lock the request module and all its dependencies, it
blocks when it attempts to lock ip_tables.ko (which was previously locked in
step 3)
Theres not really any great permanent solution to this that I can see, but I've
developed a two part solution that corrects the problem
Part 1) Modifies the nf_sockopt registration code so that, instead of using a
use counter internal to the nf_sockopt_ops structure, we instead use a pointer
to the registering modules owner to do module reference counting when nf_sockopt
calls a modules set/get routine. This prevents the deadlock by preventing set 4
from happening.
Part 2) Enhances the modprobe utilty so that by default it preforms non-blocking
remove operations (the same way rmmod does), and add an option to explicity
request blocking operation. So if you select blocking operation in modprobe you
can still cause the above deadlock, but only if you explicity try (and since
root can do any old stupid thing it would like.... :) ).
Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2007-09-11 03:28:26 -06:00
|
|
|
.owner = THIS_MODULE,
|
2005-04-16 16:20:36 -06:00
|
|
|
};
|
|
|
|
|
2009-07-04 04:50:00 -06:00
|
|
|
static struct xt_match ipt_builtin_mt[] __read_mostly = {
|
|
|
|
{
|
|
|
|
.name = "icmp",
|
|
|
|
.match = icmp_match,
|
|
|
|
.matchsize = sizeof(struct ipt_icmp),
|
|
|
|
.checkentry = icmp_checkentry,
|
|
|
|
.proto = IPPROTO_ICMP,
|
|
|
|
.family = NFPROTO_IPV4,
|
|
|
|
},
|
2005-04-16 16:20:36 -06:00
|
|
|
};
|
|
|
|
|
2008-01-31 05:49:35 -07:00
|
|
|
static int __net_init ip_tables_net_init(struct net *net)
|
|
|
|
{
|
2009-04-14 06:24:21 -06:00
|
|
|
return xt_proto_init(net, NFPROTO_IPV4);
|
2008-01-31 05:49:35 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
static void __net_exit ip_tables_net_exit(struct net *net)
|
|
|
|
{
|
2009-04-14 06:24:21 -06:00
|
|
|
xt_proto_fini(net, NFPROTO_IPV4);
|
2008-01-31 05:49:35 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
static struct pernet_operations ip_tables_net_ops = {
|
|
|
|
.init = ip_tables_net_init,
|
|
|
|
.exit = ip_tables_net_exit,
|
|
|
|
};
|
|
|
|
|
2006-03-28 17:37:06 -07:00
|
|
|
static int __init ip_tables_init(void)
|
2005-04-16 16:20:36 -06:00
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
2008-01-31 05:49:35 -07:00
|
|
|
ret = register_pernet_subsys(&ip_tables_net_ops);
|
2006-08-13 19:57:28 -06:00
|
|
|
if (ret < 0)
|
|
|
|
goto err1;
|
[NETFILTER] x_tables: Abstraction layer for {ip,ip6,arp}_tables
This monster-patch tries to do the best job for unifying the data
structures and backend interfaces for the three evil clones ip_tables,
ip6_tables and arp_tables. In an ideal world we would never have
allowed this kind of copy+paste programming... but well, our world
isn't (yet?) ideal.
o introduce a new x_tables module
o {ip,arp,ip6}_tables depend on this x_tables module
o registration functions for tables, matches and targets are only
wrappers around x_tables provided functions
o all matches/targets that are used from ip_tables and ip6_tables
are now implemented as xt_FOOBAR.c files and provide module aliases
to ipt_FOOBAR and ip6t_FOOBAR
o header files for xt_matches are in include/linux/netfilter/,
include/linux/netfilter_{ipv4,ipv6} contains compatibility wrappers
around the xt_FOOBAR.h headers
Based on this patchset we're going to further unify the code,
gradually getting rid of all the layer 3 specific assumptions.
Signed-off-by: Harald Welte <laforge@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2006-01-12 14:30:04 -07:00
|
|
|
|
2011-03-30 19:57:33 -06:00
|
|
|
/* No one else will be downing sem now, so we won't sleep */
|
2009-07-04 04:50:00 -06:00
|
|
|
ret = xt_register_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg));
|
2006-08-13 19:57:28 -06:00
|
|
|
if (ret < 0)
|
|
|
|
goto err2;
|
2009-07-04 04:50:00 -06:00
|
|
|
ret = xt_register_matches(ipt_builtin_mt, ARRAY_SIZE(ipt_builtin_mt));
|
2006-08-13 19:57:28 -06:00
|
|
|
if (ret < 0)
|
|
|
|
goto err4;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
|
|
|
/* Register setsockopt */
|
|
|
|
ret = nf_register_sockopt(&ipt_sockopts);
|
2006-08-13 19:57:28 -06:00
|
|
|
if (ret < 0)
|
|
|
|
goto err5;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2010-03-19 14:08:16 -06:00
|
|
|
pr_info("(C) 2000-2006 Netfilter Core Team\n");
|
2005-04-16 16:20:36 -06:00
|
|
|
return 0;
|
2006-08-13 19:57:28 -06:00
|
|
|
|
|
|
|
err5:
|
2009-07-04 04:50:00 -06:00
|
|
|
xt_unregister_matches(ipt_builtin_mt, ARRAY_SIZE(ipt_builtin_mt));
|
2006-08-13 19:57:28 -06:00
|
|
|
err4:
|
2009-07-04 04:50:00 -06:00
|
|
|
xt_unregister_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg));
|
2006-08-13 19:57:28 -06:00
|
|
|
err2:
|
2008-01-31 05:49:35 -07:00
|
|
|
unregister_pernet_subsys(&ip_tables_net_ops);
|
2006-08-13 19:57:28 -06:00
|
|
|
err1:
|
|
|
|
return ret;
|
2005-04-16 16:20:36 -06:00
|
|
|
}
|
|
|
|
|
2006-03-28 17:37:06 -07:00
|
|
|
static void __exit ip_tables_fini(void)
|
2005-04-16 16:20:36 -06:00
|
|
|
{
|
|
|
|
nf_unregister_sockopt(&ipt_sockopts);
|
[NETFILTER] x_tables: Abstraction layer for {ip,ip6,arp}_tables
This monster-patch tries to do the best job for unifying the data
structures and backend interfaces for the three evil clones ip_tables,
ip6_tables and arp_tables. In an ideal world we would never have
allowed this kind of copy+paste programming... but well, our world
isn't (yet?) ideal.
o introduce a new x_tables module
o {ip,arp,ip6}_tables depend on this x_tables module
o registration functions for tables, matches and targets are only
wrappers around x_tables provided functions
o all matches/targets that are used from ip_tables and ip6_tables
are now implemented as xt_FOOBAR.c files and provide module aliases
to ipt_FOOBAR and ip6t_FOOBAR
o header files for xt_matches are in include/linux/netfilter/,
include/linux/netfilter_{ipv4,ipv6} contains compatibility wrappers
around the xt_FOOBAR.h headers
Based on this patchset we're going to further unify the code,
gradually getting rid of all the layer 3 specific assumptions.
Signed-off-by: Harald Welte <laforge@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2006-01-12 14:30:04 -07:00
|
|
|
|
2009-07-04 04:50:00 -06:00
|
|
|
xt_unregister_matches(ipt_builtin_mt, ARRAY_SIZE(ipt_builtin_mt));
|
|
|
|
xt_unregister_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg));
|
2008-01-31 05:49:35 -07:00
|
|
|
unregister_pernet_subsys(&ip_tables_net_ops);
|
2005-04-16 16:20:36 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
EXPORT_SYMBOL(ipt_register_table);
|
|
|
|
EXPORT_SYMBOL(ipt_unregister_table);
|
|
|
|
EXPORT_SYMBOL(ipt_do_table);
|
2006-03-28 17:37:06 -07:00
|
|
|
module_init(ip_tables_init);
|
|
|
|
module_exit(ip_tables_fini);
|