1
0
Fork 0
remarkable-linux/net/netfilter/nfnetlink_log.c

1163 lines
28 KiB
C
Raw Normal View History

/*
* This is a module which is used for logging packets to userspace via
* nfetlink.
*
* (C) 2005 by Harald Welte <laforge@netfilter.org>
* (C) 2006-2012 Patrick McHardy <kaber@trash.net>
*
* Based on the old ipv4-only ipt_ULOG.c:
* (C) 2000-2004 by Harald Welte <laforge@netfilter.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/if_arp.h>
#include <linux/init.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/netdevice.h>
#include <linux/netfilter.h>
#include <linux/netfilter_bridge.h>
#include <net/netlink.h>
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_log.h>
#include <linux/netfilter/nf_conntrack_common.h>
#include <linux/spinlock.h>
#include <linux/sysctl.h>
#include <linux/proc_fs.h>
#include <linux/security.h>
#include <linux/list.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 02:04:11 -06:00
#include <linux/slab.h>
#include <net/sock.h>
#include <net/netfilter/nf_log.h>
#include <net/netns/generic.h>
#include <net/netfilter/nfnetlink_log.h>
#include <linux/atomic.h>
#include <linux/refcount.h>
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
#include "../bridge/br_private.h"
#endif
#define NFULNL_NLBUFSIZ_DEFAULT NLMSG_GOODSIZE
#define NFULNL_TIMEOUT_DEFAULT 100 /* every second */
#define NFULNL_QTHRESH_DEFAULT 100 /* 100 packets */
/* max packet size is limited by 16-bit struct nfattr nfa_len field */
#define NFULNL_COPY_RANGE_MAX (0xFFFF - NLA_HDRLEN)
#define PRINTR(x, args...) do { if (net_ratelimit()) \
printk(x, ## args); } while (0);
struct nfulnl_instance {
struct hlist_node hlist; /* global list of instances */
spinlock_t lock;
refcount_t use; /* use count */
unsigned int qlen; /* number of nlmsgs in skb */
struct sk_buff *skb; /* pre-allocatd skb */
struct timer_list timer;
struct net *net;
struct user_namespace *peer_user_ns; /* User namespace of the peer process */
u32 peer_portid; /* PORTID of the peer process */
/* configurable parameters */
unsigned int flushtimeout; /* timeout until queue flush */
unsigned int nlbufsiz; /* netlink buffer allocation size */
unsigned int qthreshold; /* threshold of the queue */
u_int32_t copy_range;
u_int32_t seq; /* instance-local sequential counter */
u_int16_t group_num; /* number of this queue */
u_int16_t flags;
u_int8_t copy_mode;
struct rcu_head rcu;
};
#define INSTANCE_BUCKETS 16
netns: make struct pernet_operations::id unsigned int Make struct pernet_operations::id unsigned. There are 2 reasons to do so: 1) This field is really an index into an zero based array and thus is unsigned entity. Using negative value is out-of-bound access by definition. 2) On x86_64 unsigned 32-bit data which are mixed with pointers via array indexing or offsets added or subtracted to pointers are preffered to signed 32-bit data. "int" being used as an array index needs to be sign-extended to 64-bit before being used. void f(long *p, int i) { g(p[i]); } roughly translates to movsx rsi, esi mov rdi, [rsi+...] call g MOVSX is 3 byte instruction which isn't necessary if the variable is unsigned because x86_64 is zero extending by default. Now, there is net_generic() function which, you guessed it right, uses "int" as an array index: static inline void *net_generic(const struct net *net, int id) { ... ptr = ng->ptr[id - 1]; ... } And this function is used a lot, so those sign extensions add up. Patch snipes ~1730 bytes on allyesconfig kernel (without all junk messing with code generation): add/remove: 0/0 grow/shrink: 70/598 up/down: 396/-2126 (-1730) Unfortunately some functions actually grow bigger. This is a semmingly random artefact of code generation with register allocator being used differently. gcc decides that some variable needs to live in new r8+ registers and every access now requires REX prefix. Or it is shifted into r12, so [r12+0] addressing mode has to be used which is longer than [r8] However, overall balance is in negative direction: add/remove: 0/0 grow/shrink: 70/598 up/down: 396/-2126 (-1730) function old new delta nfsd4_lock 3886 3959 +73 tipc_link_build_proto_msg 1096 1140 +44 mac80211_hwsim_new_radio 2776 2808 +32 tipc_mon_rcv 1032 1058 +26 svcauth_gss_legacy_init 1413 1429 +16 tipc_bcbase_select_primary 379 392 +13 nfsd4_exchange_id 1247 1260 +13 nfsd4_setclientid_confirm 782 793 +11 ... put_client_renew_locked 494 480 -14 ip_set_sockfn_get 730 716 -14 geneve_sock_add 829 813 -16 nfsd4_sequence_done 721 703 -18 nlmclnt_lookup_host 708 686 -22 nfsd4_lockt 1085 1063 -22 nfs_get_client 1077 1050 -27 tcf_bpf_init 1106 1076 -30 nfsd4_encode_fattr 5997 5930 -67 Total: Before=154856051, After=154854321, chg -0.00% Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-11-16 18:58:21 -07:00
static unsigned int nfnl_log_net_id __read_mostly;
struct nfnl_log_net {
spinlock_t instances_lock;
struct hlist_head instance_table[INSTANCE_BUCKETS];
atomic_t global_seq;
};
static struct nfnl_log_net *nfnl_log_pernet(struct net *net)
{
return net_generic(net, nfnl_log_net_id);
}
static inline u_int8_t instance_hashfn(u_int16_t group_num)
{
return ((group_num & 0xff) % INSTANCE_BUCKETS);
}
static struct nfulnl_instance *
__instance_lookup(struct nfnl_log_net *log, u_int16_t group_num)
{
struct hlist_head *head;
struct nfulnl_instance *inst;
head = &log->instance_table[instance_hashfn(group_num)];
hlist: drop the node parameter from iterators I'm not sure why, but the hlist for each entry iterators were conceived list_for_each_entry(pos, head, member) The hlist ones were greedy and wanted an extra parameter: hlist_for_each_entry(tpos, pos, head, member) Why did they need an extra pos parameter? I'm not quite sure. Not only they don't really need it, it also prevents the iterator from looking exactly like the list iterator, which is unfortunate. Besides the semantic patch, there was some manual work required: - Fix up the actual hlist iterators in linux/list.h - Fix up the declaration of other iterators based on the hlist ones. - A very small amount of places were using the 'node' parameter, this was modified to use 'obj->member' instead. - Coccinelle didn't handle the hlist_for_each_entry_safe iterator properly, so those had to be fixed up manually. The semantic patch which is mostly the work of Peter Senna Tschudin is here: @@ iterator name hlist_for_each_entry, hlist_for_each_entry_continue, hlist_for_each_entry_from, hlist_for_each_entry_rcu, hlist_for_each_entry_rcu_bh, hlist_for_each_entry_continue_rcu_bh, for_each_busy_worker, ax25_uid_for_each, ax25_for_each, inet_bind_bucket_for_each, sctp_for_each_hentry, sk_for_each, sk_for_each_rcu, sk_for_each_from, sk_for_each_safe, sk_for_each_bound, hlist_for_each_entry_safe, hlist_for_each_entry_continue_rcu, nr_neigh_for_each, nr_neigh_for_each_safe, nr_node_for_each, nr_node_for_each_safe, for_each_gfn_indirect_valid_sp, for_each_gfn_sp, for_each_host; type T; expression a,c,d,e; identifier b; statement S; @@ -T b; <+... when != b ( hlist_for_each_entry(a, - b, c, d) S | hlist_for_each_entry_continue(a, - b, c) S | hlist_for_each_entry_from(a, - b, c) S | hlist_for_each_entry_rcu(a, - b, c, d) S | hlist_for_each_entry_rcu_bh(a, - b, c, d) S | hlist_for_each_entry_continue_rcu_bh(a, - b, c) S | for_each_busy_worker(a, c, - b, d) S | ax25_uid_for_each(a, - b, c) S | ax25_for_each(a, - b, c) S | inet_bind_bucket_for_each(a, - b, c) S | sctp_for_each_hentry(a, - b, c) S | sk_for_each(a, - b, c) S | sk_for_each_rcu(a, - b, c) S | sk_for_each_from -(a, b) +(a) S + sk_for_each_from(a) S | sk_for_each_safe(a, - b, c, d) S | sk_for_each_bound(a, - b, c) S | hlist_for_each_entry_safe(a, - b, c, d, e) S | hlist_for_each_entry_continue_rcu(a, - b, c) S | nr_neigh_for_each(a, - b, c) S | nr_neigh_for_each_safe(a, - b, c, d) S | nr_node_for_each(a, - b, c) S | nr_node_for_each_safe(a, - b, c, d) S | - for_each_gfn_sp(a, c, d, b) S + for_each_gfn_sp(a, c, d) S | - for_each_gfn_indirect_valid_sp(a, c, d, b) S + for_each_gfn_indirect_valid_sp(a, c, d) S | for_each_host(a, - b, c) S | for_each_host_safe(a, - b, c, d) S | for_each_mesh_entry(a, - b, c, d) S ) ...+> [akpm@linux-foundation.org: drop bogus change from net/ipv4/raw.c] [akpm@linux-foundation.org: drop bogus hunk from net/ipv6/raw.c] [akpm@linux-foundation.org: checkpatch fixes] [akpm@linux-foundation.org: fix warnings] [akpm@linux-foudnation.org: redo intrusive kvm changes] Tested-by: Peter Senna Tschudin <peter.senna@gmail.com> Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Signed-off-by: Sasha Levin <sasha.levin@oracle.com> Cc: Wu Fengguang <fengguang.wu@intel.com> Cc: Marcelo Tosatti <mtosatti@redhat.com> Cc: Gleb Natapov <gleb@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-02-27 18:06:00 -07:00
hlist_for_each_entry_rcu(inst, head, hlist) {
if (inst->group_num == group_num)
return inst;
}
return NULL;
}
static inline void
instance_get(struct nfulnl_instance *inst)
{
refcount_inc(&inst->use);
}
static struct nfulnl_instance *
instance_lookup_get(struct nfnl_log_net *log, u_int16_t group_num)
{
struct nfulnl_instance *inst;
rcu_read_lock_bh();
inst = __instance_lookup(log, group_num);
if (inst && !refcount_inc_not_zero(&inst->use))
inst = NULL;
rcu_read_unlock_bh();
return inst;
}
static void nfulnl_instance_free_rcu(struct rcu_head *head)
{
struct nfulnl_instance *inst =
container_of(head, struct nfulnl_instance, rcu);
put_net(inst->net);
kfree(inst);
module_put(THIS_MODULE);
}
static void
instance_put(struct nfulnl_instance *inst)
{
if (inst && refcount_dec_and_test(&inst->use))
call_rcu_bh(&inst->rcu, nfulnl_instance_free_rcu);
}
static void nfulnl_timer(unsigned long data);
static struct nfulnl_instance *
instance_create(struct net *net, u_int16_t group_num,
u32 portid, struct user_namespace *user_ns)
{
struct nfulnl_instance *inst;
struct nfnl_log_net *log = nfnl_log_pernet(net);
int err;
spin_lock_bh(&log->instances_lock);
if (__instance_lookup(log, group_num)) {
err = -EEXIST;
goto out_unlock;
}
inst = kzalloc(sizeof(*inst), GFP_ATOMIC);
if (!inst) {
err = -ENOMEM;
goto out_unlock;
}
if (!try_module_get(THIS_MODULE)) {
kfree(inst);
err = -EAGAIN;
goto out_unlock;
}
INIT_HLIST_NODE(&inst->hlist);
spin_lock_init(&inst->lock);
/* needs to be two, since we _put() after creation */
refcount_set(&inst->use, 2);
setup_timer(&inst->timer, nfulnl_timer, (unsigned long)inst);
inst->net = get_net(net);
inst->peer_user_ns = user_ns;
inst->peer_portid = portid;
inst->group_num = group_num;
inst->qthreshold = NFULNL_QTHRESH_DEFAULT;
inst->flushtimeout = NFULNL_TIMEOUT_DEFAULT;
inst->nlbufsiz = NFULNL_NLBUFSIZ_DEFAULT;
inst->copy_mode = NFULNL_COPY_PACKET;
inst->copy_range = NFULNL_COPY_RANGE_MAX;
hlist_add_head_rcu(&inst->hlist,
&log->instance_table[instance_hashfn(group_num)]);
spin_unlock_bh(&log->instances_lock);
return inst;
out_unlock:
spin_unlock_bh(&log->instances_lock);
return ERR_PTR(err);
}
static void __nfulnl_flush(struct nfulnl_instance *inst);
/* called with BH disabled */
static void
__instance_destroy(struct nfulnl_instance *inst)
{
/* first pull it out of the global list */
hlist_del_rcu(&inst->hlist);
/* then flush all pending packets from skb */
spin_lock(&inst->lock);
/* lockless readers wont be able to use us */
inst->copy_mode = NFULNL_COPY_DISABLED;
if (inst->skb)
__nfulnl_flush(inst);
spin_unlock(&inst->lock);
/* and finally put the refcount */
instance_put(inst);
}
static inline void
instance_destroy(struct nfnl_log_net *log,
struct nfulnl_instance *inst)
{
spin_lock_bh(&log->instances_lock);
__instance_destroy(inst);
spin_unlock_bh(&log->instances_lock);
}
static int
nfulnl_set_mode(struct nfulnl_instance *inst, u_int8_t mode,
unsigned int range)
{
int status = 0;
spin_lock_bh(&inst->lock);
switch (mode) {
case NFULNL_COPY_NONE:
case NFULNL_COPY_META:
inst->copy_mode = mode;
inst->copy_range = 0;
break;
case NFULNL_COPY_PACKET:
inst->copy_mode = mode;
if (range == 0)
range = NFULNL_COPY_RANGE_MAX;
inst->copy_range = min_t(unsigned int,
range, NFULNL_COPY_RANGE_MAX);
break;
default:
status = -EINVAL;
break;
}
spin_unlock_bh(&inst->lock);
return status;
}
static int
nfulnl_set_nlbufsiz(struct nfulnl_instance *inst, u_int32_t nlbufsiz)
{
int status;
spin_lock_bh(&inst->lock);
if (nlbufsiz < NFULNL_NLBUFSIZ_DEFAULT)
status = -ERANGE;
else if (nlbufsiz > 131072)
status = -ERANGE;
else {
inst->nlbufsiz = nlbufsiz;
status = 0;
}
spin_unlock_bh(&inst->lock);
return status;
}
static void
nfulnl_set_timeout(struct nfulnl_instance *inst, u_int32_t timeout)
{
spin_lock_bh(&inst->lock);
inst->flushtimeout = timeout;
spin_unlock_bh(&inst->lock);
}
static void
nfulnl_set_qthresh(struct nfulnl_instance *inst, u_int32_t qthresh)
{
spin_lock_bh(&inst->lock);
inst->qthreshold = qthresh;
spin_unlock_bh(&inst->lock);
}
static int
nfulnl_set_flags(struct nfulnl_instance *inst, u_int16_t flags)
{
spin_lock_bh(&inst->lock);
inst->flags = flags;
spin_unlock_bh(&inst->lock);
return 0;
}
static struct sk_buff *
nfulnl_alloc_skb(struct net *net, u32 peer_portid, unsigned int inst_size,
unsigned int pkt_size)
{
struct sk_buff *skb;
unsigned int n;
/* alloc skb which should be big enough for a whole multipart
* message. WARNING: has to be <= 128k due to slab restrictions */
n = max(inst_size, pkt_size);
skb = alloc_skb(n, GFP_ATOMIC | __GFP_NOWARN);
if (!skb) {
if (n > pkt_size) {
/* try to allocate only as much as we need for current
* packet */
skb = alloc_skb(pkt_size, GFP_ATOMIC);
}
}
return skb;
}
static void
__nfulnl_send(struct nfulnl_instance *inst)
{
if (inst->qlen > 1) {
struct nlmsghdr *nlh = nlmsg_put(inst->skb, 0, 0,
NLMSG_DONE,
sizeof(struct nfgenmsg),
0);
if (WARN_ONCE(!nlh, "bad nlskb size: %u, tailroom %d\n",
inst->skb->len, skb_tailroom(inst->skb))) {
kfree_skb(inst->skb);
goto out;
}
}
nfnetlink_unicast(inst->skb, inst->net, inst->peer_portid,
MSG_DONTWAIT);
out:
inst->qlen = 0;
inst->skb = NULL;
}
static void
__nfulnl_flush(struct nfulnl_instance *inst)
{
/* timer holds a reference */
if (del_timer(&inst->timer))
instance_put(inst);
if (inst->skb)
__nfulnl_send(inst);
}
static void
nfulnl_timer(unsigned long data)
{
struct nfulnl_instance *inst = (struct nfulnl_instance *)data;
spin_lock_bh(&inst->lock);
if (inst->skb)
__nfulnl_send(inst);
spin_unlock_bh(&inst->lock);
instance_put(inst);
}
/* This is an inline function, we don't really care about a long
* list of arguments */
static inline int
__build_packet_message(struct nfnl_log_net *log,
struct nfulnl_instance *inst,
const struct sk_buff *skb,
unsigned int data_len,
u_int8_t pf,
unsigned int hooknum,
const struct net_device *indev,
const struct net_device *outdev,
const char *prefix, unsigned int plen,
const struct nfnl_ct_hook *nfnl_ct,
struct nf_conn *ct, enum ip_conntrack_info ctinfo)
{
struct nfulnl_msg_packet_hdr pmsg;
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
sk_buff_data_t old_tail = inst->skb->tail;
struct sock *sk;
const unsigned char *hwhdrp;
nlh = nlmsg_put(inst->skb, 0, 0,
nfnl_msg_type(NFNL_SUBSYS_ULOG, NFULNL_MSG_PACKET),
sizeof(struct nfgenmsg), 0);
if (!nlh)
return -1;
nfmsg = nlmsg_data(nlh);
nfmsg->nfgen_family = pf;
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = htons(inst->group_num);
memset(&pmsg, 0, sizeof(pmsg));
pmsg.hw_protocol = skb->protocol;
pmsg.hook = hooknum;
if (nla_put(inst->skb, NFULA_PACKET_HDR, sizeof(pmsg), &pmsg))
goto nla_put_failure;
if (prefix &&
nla_put(inst->skb, NFULA_PREFIX, plen, prefix))
goto nla_put_failure;
if (indev) {
#if !IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (nla_put_be32(inst->skb, NFULA_IFINDEX_INDEV,
htonl(indev->ifindex)))
goto nla_put_failure;
#else
if (pf == PF_BRIDGE) {
/* Case 1: outdev is physical input device, we need to
* look for bridge group (when called from
* netfilter_bridge) */
if (nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSINDEV,
htonl(indev->ifindex)) ||
/* this is the bridge group "brX" */
/* rcu_read_lock()ed by nf_hook_thresh or
* nf_log_packet.
*/
nla_put_be32(inst->skb, NFULA_IFINDEX_INDEV,
htonl(br_port_get_rcu(indev)->br->dev->ifindex)))
goto nla_put_failure;
} else {
struct net_device *physindev;
/* Case 2: indev is bridge group, we need to look for
* physical device (when called from ipv4) */
if (nla_put_be32(inst->skb, NFULA_IFINDEX_INDEV,
htonl(indev->ifindex)))
goto nla_put_failure;
physindev = nf_bridge_get_physindev(skb);
if (physindev &&
nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSINDEV,
htonl(physindev->ifindex)))
goto nla_put_failure;
}
#endif
}
if (outdev) {
#if !IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (nla_put_be32(inst->skb, NFULA_IFINDEX_OUTDEV,
htonl(outdev->ifindex)))
goto nla_put_failure;
#else
if (pf == PF_BRIDGE) {
/* Case 1: outdev is physical output device, we need to
* look for bridge group (when called from
* netfilter_bridge) */
if (nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV,
htonl(outdev->ifindex)) ||
/* this is the bridge group "brX" */
/* rcu_read_lock()ed by nf_hook_thresh or
* nf_log_packet.
*/
nla_put_be32(inst->skb, NFULA_IFINDEX_OUTDEV,
htonl(br_port_get_rcu(outdev)->br->dev->ifindex)))
goto nla_put_failure;
} else {
struct net_device *physoutdev;
/* Case 2: indev is a bridge group, we need to look
* for physical device (when called from ipv4) */
if (nla_put_be32(inst->skb, NFULA_IFINDEX_OUTDEV,
htonl(outdev->ifindex)))
goto nla_put_failure;
physoutdev = nf_bridge_get_physoutdev(skb);
if (physoutdev &&
nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV,
htonl(physoutdev->ifindex)))
goto nla_put_failure;
}
#endif
}
if (skb->mark &&
nla_put_be32(inst->skb, NFULA_MARK, htonl(skb->mark)))
goto nla_put_failure;
if (indev && skb->dev &&
skb->mac_header != skb->network_header) {
struct nfulnl_msg_packet_hw phw;
int len;
memset(&phw, 0, sizeof(phw));
len = dev_parse_header(skb, phw.hw_addr);
if (len > 0) {
phw.hw_addrlen = htons(len);
if (nla_put(inst->skb, NFULA_HWADDR, sizeof(phw), &phw))
goto nla_put_failure;
}
}
if (indev && skb_mac_header_was_set(skb)) {
if (nla_put_be16(inst->skb, NFULA_HWTYPE, htons(skb->dev->type)) ||
nla_put_be16(inst->skb, NFULA_HWLEN,
htons(skb->dev->hard_header_len)))
goto nla_put_failure;
hwhdrp = skb_mac_header(skb);
if (skb->dev->type == ARPHRD_SIT)
hwhdrp -= ETH_HLEN;
if (hwhdrp >= skb->head &&
nla_put(inst->skb, NFULA_HWHEADER,
skb->dev->hard_header_len, hwhdrp))
goto nla_put_failure;
}
if (skb->tstamp) {
struct nfulnl_msg_packet_timestamp ts;
struct timespec64 kts = ktime_to_timespec64(skb->tstamp);
ts.sec = cpu_to_be64(kts.tv_sec);
ts.usec = cpu_to_be64(kts.tv_nsec / NSEC_PER_USEC);
if (nla_put(inst->skb, NFULA_TIMESTAMP, sizeof(ts), &ts))
goto nla_put_failure;
}
/* UID */
sk = skb->sk;
if (sk && sk_fullsock(sk)) {
read_lock_bh(&sk->sk_callback_lock);
if (sk->sk_socket && sk->sk_socket->file) {
struct file *file = sk->sk_socket->file;
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace Pull user namespace changes from Eric Biederman: "This is a mostly modest set of changes to enable basic user namespace support. This allows the code to code to compile with user namespaces enabled and removes the assumption there is only the initial user namespace. Everything is converted except for the most complex of the filesystems: autofs4, 9p, afs, ceph, cifs, coda, fuse, gfs2, ncpfs, nfs, ocfs2 and xfs as those patches need a bit more review. The strategy is to push kuid_t and kgid_t values are far down into subsystems and filesystems as reasonable. Leaving the make_kuid and from_kuid operations to happen at the edge of userspace, as the values come off the disk, and as the values come in from the network. Letting compile type incompatible compile errors (present when user namespaces are enabled) guide me to find the issues. The most tricky areas have been the places where we had an implicit union of uid and gid values and were storing them in an unsigned int. Those places were converted into explicit unions. I made certain to handle those places with simple trivial patches. Out of that work I discovered we have generic interfaces for storing quota by projid. I had never heard of the project identifiers before. Adding full user namespace support for project identifiers accounts for most of the code size growth in my git tree. Ultimately there will be work to relax privlige checks from "capable(FOO)" to "ns_capable(user_ns, FOO)" where it is safe allowing root in a user names to do those things that today we only forbid to non-root users because it will confuse suid root applications. While I was pushing kuid_t and kgid_t changes deep into the audit code I made a few other cleanups. I capitalized on the fact we process netlink messages in the context of the message sender. I removed usage of NETLINK_CRED, and started directly using current->tty. Some of these patches have also made it into maintainer trees, with no problems from identical code from different trees showing up in linux-next. After reading through all of this code I feel like I might be able to win a game of kernel trivial pursuit." Fix up some fairly trivial conflicts in netfilter uid/git logging code. * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace: (107 commits) userns: Convert the ufs filesystem to use kuid/kgid where appropriate userns: Convert the udf filesystem to use kuid/kgid where appropriate userns: Convert ubifs to use kuid/kgid userns: Convert squashfs to use kuid/kgid where appropriate userns: Convert reiserfs to use kuid and kgid where appropriate userns: Convert jfs to use kuid/kgid where appropriate userns: Convert jffs2 to use kuid and kgid where appropriate userns: Convert hpfs to use kuid and kgid where appropriate userns: Convert btrfs to use kuid/kgid where appropriate userns: Convert bfs to use kuid/kgid where appropriate userns: Convert affs to use kuid/kgid wherwe appropriate userns: On alpha modify linux_to_osf_stat to use convert from kuids and kgids userns: On ia64 deal with current_uid and current_gid being kuid and kgid userns: On ppc convert current_uid from a kuid before printing. userns: Convert s390 getting uid and gid system calls to use kuid and kgid userns: Convert s390 hypfs to use kuid and kgid where appropriate userns: Convert binder ipc to use kuids userns: Teach security_path_chown to take kuids and kgids userns: Add user namespace support to IMA userns: Convert EVM to deal with kuids and kgids in it's hmac computation ...
2012-10-02 12:11:09 -06:00
const struct cred *cred = file->f_cred;
struct user_namespace *user_ns = inst->peer_user_ns;
__be32 uid = htonl(from_kuid_munged(user_ns, cred->fsuid));
__be32 gid = htonl(from_kgid_munged(user_ns, cred->fsgid));
read_unlock_bh(&sk->sk_callback_lock);
if (nla_put_be32(inst->skb, NFULA_UID, uid) ||
nla_put_be32(inst->skb, NFULA_GID, gid))
goto nla_put_failure;
} else
read_unlock_bh(&sk->sk_callback_lock);
}
/* local sequence number */
if ((inst->flags & NFULNL_CFG_F_SEQ) &&
nla_put_be32(inst->skb, NFULA_SEQ, htonl(inst->seq++)))
goto nla_put_failure;
/* global sequence number */
if ((inst->flags & NFULNL_CFG_F_SEQ_GLOBAL) &&
nla_put_be32(inst->skb, NFULA_SEQ_GLOBAL,
htonl(atomic_inc_return(&log->global_seq))))
goto nla_put_failure;
if (ct && nfnl_ct->build(inst->skb, ct, ctinfo,
NFULA_CT, NFULA_CT_INFO) < 0)
goto nla_put_failure;
if (data_len) {
struct nlattr *nla;
int size = nla_attr_size(data_len);
if (skb_tailroom(inst->skb) < nla_total_size(data_len))
goto nla_put_failure;
nla = skb_put(inst->skb, nla_total_size(data_len));
nla->nla_type = NFULA_PAYLOAD;
nla->nla_len = size;
if (skb_copy_bits(skb, 0, nla_data(nla), data_len))
BUG();
}
nlh->nlmsg_len = inst->skb->tail - old_tail;
return 0;
nla_put_failure:
PRINTR(KERN_ERR "nfnetlink_log: error creating log nlmsg\n");
return -1;
}
static const struct nf_loginfo default_loginfo = {
.type = NF_LOG_TYPE_ULOG,
.u = {
.ulog = {
.copy_len = 0xffff,
.group = 0,
.qthreshold = 1,
},
},
};
/* log handler for internal netfilter logging api */
void
nfulnl_log_packet(struct net *net,
u_int8_t pf,
unsigned int hooknum,
const struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
const struct nf_loginfo *li_user,
const char *prefix)
{
size_t size;
unsigned int data_len;
struct nfulnl_instance *inst;
const struct nf_loginfo *li;
unsigned int qthreshold;
unsigned int plen;
struct nfnl_log_net *log = nfnl_log_pernet(net);
const struct nfnl_ct_hook *nfnl_ct = NULL;
struct nf_conn *ct = NULL;
enum ip_conntrack_info uninitialized_var(ctinfo);
if (li_user && li_user->type == NF_LOG_TYPE_ULOG)
li = li_user;
else
li = &default_loginfo;
inst = instance_lookup_get(log, li->u.ulog.group);
if (!inst)
return;
plen = 0;
if (prefix)
plen = strlen(prefix) + 1;
/* FIXME: do we want to make the size calculation conditional based on
* what is actually present? way more branches and checks, but more
* memory efficient... */
size = nlmsg_total_size(sizeof(struct nfgenmsg))
+ nla_total_size(sizeof(struct nfulnl_msg_packet_hdr))
+ nla_total_size(sizeof(u_int32_t)) /* ifindex */
+ nla_total_size(sizeof(u_int32_t)) /* ifindex */
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+ nla_total_size(sizeof(u_int32_t)) /* ifindex */
+ nla_total_size(sizeof(u_int32_t)) /* ifindex */
#endif
+ nla_total_size(sizeof(u_int32_t)) /* mark */
+ nla_total_size(sizeof(u_int32_t)) /* uid */
+ nla_total_size(sizeof(u_int32_t)) /* gid */
+ nla_total_size(plen) /* prefix */
+ nla_total_size(sizeof(struct nfulnl_msg_packet_hw))
+ nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp))
+ nla_total_size(sizeof(struct nfgenmsg)); /* NLMSG_DONE */
if (in && skb_mac_header_was_set(skb)) {
size += nla_total_size(skb->dev->hard_header_len)
+ nla_total_size(sizeof(u_int16_t)) /* hwtype */
+ nla_total_size(sizeof(u_int16_t)); /* hwlen */
}
spin_lock_bh(&inst->lock);
if (inst->flags & NFULNL_CFG_F_SEQ)
size += nla_total_size(sizeof(u_int32_t));
if (inst->flags & NFULNL_CFG_F_SEQ_GLOBAL)
size += nla_total_size(sizeof(u_int32_t));
if (inst->flags & NFULNL_CFG_F_CONNTRACK) {
nfnl_ct = rcu_dereference(nfnl_ct_hook);
if (nfnl_ct != NULL) {
ct = nfnl_ct->get_ct(skb, &ctinfo);
if (ct != NULL)
size += nfnl_ct->build_size(ct);
}
}
qthreshold = inst->qthreshold;
/* per-rule qthreshold overrides per-instance */
if (li->u.ulog.qthreshold)
if (qthreshold > li->u.ulog.qthreshold)
qthreshold = li->u.ulog.qthreshold;
switch (inst->copy_mode) {
case NFULNL_COPY_META:
case NFULNL_COPY_NONE:
data_len = 0;
break;
case NFULNL_COPY_PACKET:
data_len = inst->copy_range;
if ((li->u.ulog.flags & NF_LOG_F_COPY_LEN) &&
(li->u.ulog.copy_len < data_len))
data_len = li->u.ulog.copy_len;
if (data_len > skb->len)
data_len = skb->len;
size += nla_total_size(data_len);
break;
case NFULNL_COPY_DISABLED:
default:
goto unlock_and_release;
}
if (inst->skb && size > skb_tailroom(inst->skb)) {
/* either the queue len is too high or we don't have
* enough room in the skb left. flush to userspace. */
__nfulnl_flush(inst);
}
if (!inst->skb) {
inst->skb = nfulnl_alloc_skb(net, inst->peer_portid,
inst->nlbufsiz, size);
if (!inst->skb)
goto alloc_failure;
}
inst->qlen++;
__build_packet_message(log, inst, skb, data_len, pf,
hooknum, in, out, prefix, plen,
nfnl_ct, ct, ctinfo);
if (inst->qlen >= qthreshold)
__nfulnl_flush(inst);
/* timer_pending always called within inst->lock, so there
* is no chance of a race here */
else if (!timer_pending(&inst->timer)) {
instance_get(inst);
inst->timer.expires = jiffies + (inst->flushtimeout*HZ/100);
add_timer(&inst->timer);
}
unlock_and_release:
spin_unlock_bh(&inst->lock);
instance_put(inst);
return;
alloc_failure:
/* FIXME: statistics */
goto unlock_and_release;
}
EXPORT_SYMBOL_GPL(nfulnl_log_packet);
static int
nfulnl_rcv_nl_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
struct netlink_notify *n = ptr;
struct nfnl_log_net *log = nfnl_log_pernet(n->net);
if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) {
int i;
/* destroy all instances for this portid */
spin_lock_bh(&log->instances_lock);
for (i = 0; i < INSTANCE_BUCKETS; i++) {
hlist: drop the node parameter from iterators I'm not sure why, but the hlist for each entry iterators were conceived list_for_each_entry(pos, head, member) The hlist ones were greedy and wanted an extra parameter: hlist_for_each_entry(tpos, pos, head, member) Why did they need an extra pos parameter? I'm not quite sure. Not only they don't really need it, it also prevents the iterator from looking exactly like the list iterator, which is unfortunate. Besides the semantic patch, there was some manual work required: - Fix up the actual hlist iterators in linux/list.h - Fix up the declaration of other iterators based on the hlist ones. - A very small amount of places were using the 'node' parameter, this was modified to use 'obj->member' instead. - Coccinelle didn't handle the hlist_for_each_entry_safe iterator properly, so those had to be fixed up manually. The semantic patch which is mostly the work of Peter Senna Tschudin is here: @@ iterator name hlist_for_each_entry, hlist_for_each_entry_continue, hlist_for_each_entry_from, hlist_for_each_entry_rcu, hlist_for_each_entry_rcu_bh, hlist_for_each_entry_continue_rcu_bh, for_each_busy_worker, ax25_uid_for_each, ax25_for_each, inet_bind_bucket_for_each, sctp_for_each_hentry, sk_for_each, sk_for_each_rcu, sk_for_each_from, sk_for_each_safe, sk_for_each_bound, hlist_for_each_entry_safe, hlist_for_each_entry_continue_rcu, nr_neigh_for_each, nr_neigh_for_each_safe, nr_node_for_each, nr_node_for_each_safe, for_each_gfn_indirect_valid_sp, for_each_gfn_sp, for_each_host; type T; expression a,c,d,e; identifier b; statement S; @@ -T b; <+... when != b ( hlist_for_each_entry(a, - b, c, d) S | hlist_for_each_entry_continue(a, - b, c) S | hlist_for_each_entry_from(a, - b, c) S | hlist_for_each_entry_rcu(a, - b, c, d) S | hlist_for_each_entry_rcu_bh(a, - b, c, d) S | hlist_for_each_entry_continue_rcu_bh(a, - b, c) S | for_each_busy_worker(a, c, - b, d) S | ax25_uid_for_each(a, - b, c) S | ax25_for_each(a, - b, c) S | inet_bind_bucket_for_each(a, - b, c) S | sctp_for_each_hentry(a, - b, c) S | sk_for_each(a, - b, c) S | sk_for_each_rcu(a, - b, c) S | sk_for_each_from -(a, b) +(a) S + sk_for_each_from(a) S | sk_for_each_safe(a, - b, c, d) S | sk_for_each_bound(a, - b, c) S | hlist_for_each_entry_safe(a, - b, c, d, e) S | hlist_for_each_entry_continue_rcu(a, - b, c) S | nr_neigh_for_each(a, - b, c) S | nr_neigh_for_each_safe(a, - b, c, d) S | nr_node_for_each(a, - b, c) S | nr_node_for_each_safe(a, - b, c, d) S | - for_each_gfn_sp(a, c, d, b) S + for_each_gfn_sp(a, c, d) S | - for_each_gfn_indirect_valid_sp(a, c, d, b) S + for_each_gfn_indirect_valid_sp(a, c, d) S | for_each_host(a, - b, c) S | for_each_host_safe(a, - b, c, d) S | for_each_mesh_entry(a, - b, c, d) S ) ...+> [akpm@linux-foundation.org: drop bogus change from net/ipv4/raw.c] [akpm@linux-foundation.org: drop bogus hunk from net/ipv6/raw.c] [akpm@linux-foundation.org: checkpatch fixes] [akpm@linux-foundation.org: fix warnings] [akpm@linux-foudnation.org: redo intrusive kvm changes] Tested-by: Peter Senna Tschudin <peter.senna@gmail.com> Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Signed-off-by: Sasha Levin <sasha.levin@oracle.com> Cc: Wu Fengguang <fengguang.wu@intel.com> Cc: Marcelo Tosatti <mtosatti@redhat.com> Cc: Gleb Natapov <gleb@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-02-27 18:06:00 -07:00
struct hlist_node *t2;
struct nfulnl_instance *inst;
struct hlist_head *head = &log->instance_table[i];
hlist: drop the node parameter from iterators I'm not sure why, but the hlist for each entry iterators were conceived list_for_each_entry(pos, head, member) The hlist ones were greedy and wanted an extra parameter: hlist_for_each_entry(tpos, pos, head, member) Why did they need an extra pos parameter? I'm not quite sure. Not only they don't really need it, it also prevents the iterator from looking exactly like the list iterator, which is unfortunate. Besides the semantic patch, there was some manual work required: - Fix up the actual hlist iterators in linux/list.h - Fix up the declaration of other iterators based on the hlist ones. - A very small amount of places were using the 'node' parameter, this was modified to use 'obj->member' instead. - Coccinelle didn't handle the hlist_for_each_entry_safe iterator properly, so those had to be fixed up manually. The semantic patch which is mostly the work of Peter Senna Tschudin is here: @@ iterator name hlist_for_each_entry, hlist_for_each_entry_continue, hlist_for_each_entry_from, hlist_for_each_entry_rcu, hlist_for_each_entry_rcu_bh, hlist_for_each_entry_continue_rcu_bh, for_each_busy_worker, ax25_uid_for_each, ax25_for_each, inet_bind_bucket_for_each, sctp_for_each_hentry, sk_for_each, sk_for_each_rcu, sk_for_each_from, sk_for_each_safe, sk_for_each_bound, hlist_for_each_entry_safe, hlist_for_each_entry_continue_rcu, nr_neigh_for_each, nr_neigh_for_each_safe, nr_node_for_each, nr_node_for_each_safe, for_each_gfn_indirect_valid_sp, for_each_gfn_sp, for_each_host; type T; expression a,c,d,e; identifier b; statement S; @@ -T b; <+... when != b ( hlist_for_each_entry(a, - b, c, d) S | hlist_for_each_entry_continue(a, - b, c) S | hlist_for_each_entry_from(a, - b, c) S | hlist_for_each_entry_rcu(a, - b, c, d) S | hlist_for_each_entry_rcu_bh(a, - b, c, d) S | hlist_for_each_entry_continue_rcu_bh(a, - b, c) S | for_each_busy_worker(a, c, - b, d) S | ax25_uid_for_each(a, - b, c) S | ax25_for_each(a, - b, c) S | inet_bind_bucket_for_each(a, - b, c) S | sctp_for_each_hentry(a, - b, c) S | sk_for_each(a, - b, c) S | sk_for_each_rcu(a, - b, c) S | sk_for_each_from -(a, b) +(a) S + sk_for_each_from(a) S | sk_for_each_safe(a, - b, c, d) S | sk_for_each_bound(a, - b, c) S | hlist_for_each_entry_safe(a, - b, c, d, e) S | hlist_for_each_entry_continue_rcu(a, - b, c) S | nr_neigh_for_each(a, - b, c) S | nr_neigh_for_each_safe(a, - b, c, d) S | nr_node_for_each(a, - b, c) S | nr_node_for_each_safe(a, - b, c, d) S | - for_each_gfn_sp(a, c, d, b) S + for_each_gfn_sp(a, c, d) S | - for_each_gfn_indirect_valid_sp(a, c, d, b) S + for_each_gfn_indirect_valid_sp(a, c, d) S | for_each_host(a, - b, c) S | for_each_host_safe(a, - b, c, d) S | for_each_mesh_entry(a, - b, c, d) S ) ...+> [akpm@linux-foundation.org: drop bogus change from net/ipv4/raw.c] [akpm@linux-foundation.org: drop bogus hunk from net/ipv6/raw.c] [akpm@linux-foundation.org: checkpatch fixes] [akpm@linux-foundation.org: fix warnings] [akpm@linux-foudnation.org: redo intrusive kvm changes] Tested-by: Peter Senna Tschudin <peter.senna@gmail.com> Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Signed-off-by: Sasha Levin <sasha.levin@oracle.com> Cc: Wu Fengguang <fengguang.wu@intel.com> Cc: Marcelo Tosatti <mtosatti@redhat.com> Cc: Gleb Natapov <gleb@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-02-27 18:06:00 -07:00
hlist_for_each_entry_safe(inst, t2, head, hlist) {
if (n->portid == inst->peer_portid)
__instance_destroy(inst);
}
}
spin_unlock_bh(&log->instances_lock);
}
return NOTIFY_DONE;
}
static struct notifier_block nfulnl_rtnl_notifier = {
.notifier_call = nfulnl_rcv_nl_event,
};
static int nfulnl_recv_unsupp(struct net *net, struct sock *ctnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nfqa[],
struct netlink_ext_ack *extack)
{
return -ENOTSUPP;
}
static struct nf_logger nfulnl_logger __read_mostly = {
.name = "nfnetlink_log",
.type = NF_LOG_TYPE_ULOG,
.logfn = nfulnl_log_packet,
.me = THIS_MODULE,
};
static const struct nla_policy nfula_cfg_policy[NFULA_CFG_MAX+1] = {
[NFULA_CFG_CMD] = { .len = sizeof(struct nfulnl_msg_config_cmd) },
[NFULA_CFG_MODE] = { .len = sizeof(struct nfulnl_msg_config_mode) },
[NFULA_CFG_TIMEOUT] = { .type = NLA_U32 },
[NFULA_CFG_QTHRESH] = { .type = NLA_U32 },
[NFULA_CFG_NLBUFSIZ] = { .type = NLA_U32 },
[NFULA_CFG_FLAGS] = { .type = NLA_U16 },
};
static int nfulnl_recv_config(struct net *net, struct sock *ctnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nfula[],
struct netlink_ext_ack *extack)
{
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u_int16_t group_num = ntohs(nfmsg->res_id);
struct nfulnl_instance *inst;
struct nfulnl_msg_config_cmd *cmd = NULL;
struct nfnl_log_net *log = nfnl_log_pernet(net);
int ret = 0;
u16 flags = 0;
if (nfula[NFULA_CFG_CMD]) {
u_int8_t pf = nfmsg->nfgen_family;
cmd = nla_data(nfula[NFULA_CFG_CMD]);
/* Commands without queue context */
switch (cmd->command) {
case NFULNL_CFG_CMD_PF_BIND:
return nf_log_bind_pf(net, pf, &nfulnl_logger);
case NFULNL_CFG_CMD_PF_UNBIND:
nf_log_unbind_pf(net, pf);
return 0;
}
}
inst = instance_lookup_get(log, group_num);
if (inst && inst->peer_portid != NETLINK_CB(skb).portid) {
ret = -EPERM;
goto out_put;
}
/* Check if we support these flags in first place, dependencies should
* be there too not to break atomicity.
*/
if (nfula[NFULA_CFG_FLAGS]) {
flags = ntohs(nla_get_be16(nfula[NFULA_CFG_FLAGS]));
if ((flags & NFULNL_CFG_F_CONNTRACK) &&
!rcu_access_pointer(nfnl_ct_hook)) {
#ifdef CONFIG_MODULES
nfnl_unlock(NFNL_SUBSYS_ULOG);
request_module("ip_conntrack_netlink");
nfnl_lock(NFNL_SUBSYS_ULOG);
if (rcu_access_pointer(nfnl_ct_hook)) {
ret = -EAGAIN;
goto out_put;
}
#endif
ret = -EOPNOTSUPP;
goto out_put;
}
}
if (cmd != NULL) {
switch (cmd->command) {
case NFULNL_CFG_CMD_BIND:
if (inst) {
ret = -EBUSY;
goto out_put;
}
inst = instance_create(net, group_num,
NETLINK_CB(skb).portid,
sk_user_ns(NETLINK_CB(skb).sk));
if (IS_ERR(inst)) {
ret = PTR_ERR(inst);
goto out;
}
break;
case NFULNL_CFG_CMD_UNBIND:
if (!inst) {
ret = -ENODEV;
goto out;
}
instance_destroy(log, inst);
goto out_put;
default:
ret = -ENOTSUPP;
goto out_put;
}
} else if (!inst) {
ret = -ENODEV;
goto out;
}
if (nfula[NFULA_CFG_MODE]) {
struct nfulnl_msg_config_mode *params =
nla_data(nfula[NFULA_CFG_MODE]);
nfulnl_set_mode(inst, params->copy_mode,
ntohl(params->copy_range));
}
if (nfula[NFULA_CFG_TIMEOUT]) {
__be32 timeout = nla_get_be32(nfula[NFULA_CFG_TIMEOUT]);
nfulnl_set_timeout(inst, ntohl(timeout));
}
if (nfula[NFULA_CFG_NLBUFSIZ]) {
__be32 nlbufsiz = nla_get_be32(nfula[NFULA_CFG_NLBUFSIZ]);
nfulnl_set_nlbufsiz(inst, ntohl(nlbufsiz));
}
if (nfula[NFULA_CFG_QTHRESH]) {
__be32 qthresh = nla_get_be32(nfula[NFULA_CFG_QTHRESH]);
nfulnl_set_qthresh(inst, ntohl(qthresh));
}
if (nfula[NFULA_CFG_FLAGS])
nfulnl_set_flags(inst, flags);
out_put:
instance_put(inst);
out:
return ret;
}
static const struct nfnl_callback nfulnl_cb[NFULNL_MSG_MAX] = {
[NFULNL_MSG_PACKET] = { .call = nfulnl_recv_unsupp,
.attr_count = NFULA_MAX, },
[NFULNL_MSG_CONFIG] = { .call = nfulnl_recv_config,
.attr_count = NFULA_CFG_MAX,
.policy = nfula_cfg_policy },
};
static const struct nfnetlink_subsystem nfulnl_subsys = {
.name = "log",
.subsys_id = NFNL_SUBSYS_ULOG,
.cb_count = NFULNL_MSG_MAX,
.cb = nfulnl_cb,
};
#ifdef CONFIG_PROC_FS
struct iter_state {
struct seq_net_private p;
unsigned int bucket;
};
static struct hlist_node *get_first(struct net *net, struct iter_state *st)
{
struct nfnl_log_net *log;
if (!st)
return NULL;
log = nfnl_log_pernet(net);
for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
struct hlist_head *head = &log->instance_table[st->bucket];
if (!hlist_empty(head))
return rcu_dereference_bh(hlist_first_rcu(head));
}
return NULL;
}
static struct hlist_node *get_next(struct net *net, struct iter_state *st,
struct hlist_node *h)
{
h = rcu_dereference_bh(hlist_next_rcu(h));
while (!h) {
struct nfnl_log_net *log;
struct hlist_head *head;
if (++st->bucket >= INSTANCE_BUCKETS)
return NULL;
log = nfnl_log_pernet(net);
head = &log->instance_table[st->bucket];
h = rcu_dereference_bh(hlist_first_rcu(head));
}
return h;
}
static struct hlist_node *get_idx(struct net *net, struct iter_state *st,
loff_t pos)
{
struct hlist_node *head;
head = get_first(net, st);
if (head)
while (pos && (head = get_next(net, st, head)))
pos--;
return pos ? NULL : head;
}
static void *seq_start(struct seq_file *s, loff_t *pos)
__acquires(rcu_bh)
{
rcu_read_lock_bh();
return get_idx(seq_file_net(s), s->private, *pos);
}
static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
{
(*pos)++;
return get_next(seq_file_net(s), s->private, v);
}
static void seq_stop(struct seq_file *s, void *v)
__releases(rcu_bh)
{
rcu_read_unlock_bh();
}
static int seq_show(struct seq_file *s, void *v)
{
const struct nfulnl_instance *inst = v;
seq_printf(s, "%5u %6u %5u %1u %5u %6u %2u\n",
inst->group_num,
inst->peer_portid, inst->qlen,
inst->copy_mode, inst->copy_range,
inst->flushtimeout, refcount_read(&inst->use));
return 0;
}
static const struct seq_operations nful_seq_ops = {
.start = seq_start,
.next = seq_next,
.stop = seq_stop,
.show = seq_show,
};
static int nful_open(struct inode *inode, struct file *file)
{
return seq_open_net(inode, file, &nful_seq_ops,
sizeof(struct iter_state));
}
static const struct file_operations nful_file_ops = {
.owner = THIS_MODULE,
.open = nful_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release_net,
};
#endif /* PROC_FS */
static int __net_init nfnl_log_net_init(struct net *net)
{
unsigned int i;
struct nfnl_log_net *log = nfnl_log_pernet(net);
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *proc;
kuid_t root_uid;
kgid_t root_gid;
#endif
for (i = 0; i < INSTANCE_BUCKETS; i++)
INIT_HLIST_HEAD(&log->instance_table[i]);
spin_lock_init(&log->instances_lock);
#ifdef CONFIG_PROC_FS
proc = proc_create("nfnetlink_log", 0440,
net->nf.proc_netfilter, &nful_file_ops);
if (!proc)
return -ENOMEM;
root_uid = make_kuid(net->user_ns, 0);
root_gid = make_kgid(net->user_ns, 0);
if (uid_valid(root_uid) && gid_valid(root_gid))
proc_set_user(proc, root_uid, root_gid);
#endif
return 0;
}
static void __net_exit nfnl_log_net_exit(struct net *net)
{
#ifdef CONFIG_PROC_FS
remove_proc_entry("nfnetlink_log", net->nf.proc_netfilter);
#endif
netfilter: nfnetlink_log: unset nf_loggers for netns when unloading module Steven Rostedt and Arnaldo Carvalho de Melo reported a panic when access the files /proc/sys/net/netfilter/nf_log/*. This problem will occur when we do: echo nfnetlink_log > /proc/sys/net/netfilter/nf_log/any_file rmmod nfnetlink_log and then access the files. Since the nf_loggers of netns hasn't been unset, it will point to the memory that has been freed. This bug is introduced by commit 9368a53c ("netfilter: nfnetlink_log: add net namespace support for nfnetlink_log"). [17261.822047] BUG: unable to handle kernel paging request at ffffffffa0d49090 [17261.822056] IP: [<ffffffff8157aba0>] nf_log_proc_dostring+0xf0/0x1d0 [...] [17261.822226] Call Trace: [17261.822235] [<ffffffff81297b98>] ? security_capable+0x18/0x20 [17261.822240] [<ffffffff8106fa09>] ? ns_capable+0x29/0x50 [17261.822247] [<ffffffff8163d25f>] ? net_ctl_permissions+0x1f/0x90 [17261.822254] [<ffffffff81216613>] proc_sys_call_handler+0xb3/0xc0 [17261.822258] [<ffffffff81216651>] proc_sys_read+0x11/0x20 [17261.822265] [<ffffffff811a80de>] vfs_read+0x9e/0x170 [17261.822270] [<ffffffff811a8c09>] SyS_read+0x49/0xa0 [17261.822276] [<ffffffff810e6496>] ? __audit_syscall_exit+0x1f6/0x2a0 [17261.822283] [<ffffffff81656e99>] system_call_fastpath+0x16/0x1b [17261.822285] Code: cc 81 4d 63 e4 4c 89 45 88 48 89 4d 90 e8 19 03 0d 00 4b 8b 84 e5 28 08 00 00 48 8b 4d 90 4c 8b 45 88 48 85 c0 0f 84 a8 00 00 00 <48> 8b 40 10 48 89 43 08 48 89 df 4c 89 f2 31 f6 e8 4b 35 af ff [17261.822329] RIP [<ffffffff8157aba0>] nf_log_proc_dostring+0xf0/0x1d0 [17261.822334] RSP <ffff880274d3fe28> [17261.822336] CR2: ffffffffa0d49090 [17261.822340] ---[ end trace a14ce54c0897a90d ]--- Reported-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net> Reported-by: Steven Rostedt <rostedt@goodmis.org> Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2013-12-15 23:59:22 -07:00
nf_log_unset(net, &nfulnl_logger);
}
static struct pernet_operations nfnl_log_net_ops = {
.init = nfnl_log_net_init,
.exit = nfnl_log_net_exit,
.id = &nfnl_log_net_id,
.size = sizeof(struct nfnl_log_net),
};
static int __init nfnetlink_log_init(void)
{
int status;
status = register_pernet_subsys(&nfnl_log_net_ops);
if (status < 0) {
pr_err("failed to register pernet ops\n");
goto out;
}
netlink_register_notifier(&nfulnl_rtnl_notifier);
status = nfnetlink_subsys_register(&nfulnl_subsys);
if (status < 0) {
pr_err("failed to create netlink socket\n");
goto cleanup_netlink_notifier;
}
status = nf_log_register(NFPROTO_UNSPEC, &nfulnl_logger);
if (status < 0) {
pr_err("failed to register logger\n");
goto cleanup_subsys;
}
return status;
cleanup_subsys:
nfnetlink_subsys_unregister(&nfulnl_subsys);
cleanup_netlink_notifier:
netlink_unregister_notifier(&nfulnl_rtnl_notifier);
unregister_pernet_subsys(&nfnl_log_net_ops);
out:
return status;
}
static void __exit nfnetlink_log_fini(void)
{
nfnetlink_subsys_unregister(&nfulnl_subsys);
netlink_unregister_notifier(&nfulnl_rtnl_notifier);
unregister_pernet_subsys(&nfnl_log_net_ops);
nf_log_unregister(&nfulnl_logger);
}
MODULE_DESCRIPTION("netfilter userspace logging");
MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
MODULE_LICENSE("GPL");
MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_ULOG);
MODULE_ALIAS_NF_LOGGER(AF_INET, 1);
MODULE_ALIAS_NF_LOGGER(AF_INET6, 1);
MODULE_ALIAS_NF_LOGGER(AF_BRIDGE, 1);
MODULE_ALIAS_NF_LOGGER(3, 1); /* NFPROTO_ARP */
MODULE_ALIAS_NF_LOGGER(5, 1); /* NFPROTO_NETDEV */
module_init(nfnetlink_log_init);
module_exit(nfnetlink_log_fini);