From 6b96686ecffcbea85dcb502e4584e4a20a2bfb29 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 7 Nov 2014 15:34:54 +0100 Subject: [PATCH 1/8] netfilter: nft_masq: fix uninitialized range in nft_masq_{ipv4, ipv6}_eval When transferring from the original range in nf_nat_masquerade_{ipv4,ipv6}() we copy over values from stack in from min_proto/max_proto due to uninitialized range variable in both, nft_masq_{ipv4,ipv6}_eval. As we only initialize flags at this time from nft_masq struct, just zero out the rest. Fixes: 9ba1f726bec09 ("netfilter: nf_tables: add new nft_masq expression") Signed-off-by: Daniel Borkmann Acked-by: Arturo Borrero Gonzalez Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nft_masq_ipv4.c | 1 + net/ipv6/netfilter/nft_masq_ipv6.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c index c1023c445920..665de06561cd 100644 --- a/net/ipv4/netfilter/nft_masq_ipv4.c +++ b/net/ipv4/netfilter/nft_masq_ipv4.c @@ -24,6 +24,7 @@ static void nft_masq_ipv4_eval(const struct nft_expr *expr, struct nf_nat_range range; unsigned int verdict; + memset(&range, 0, sizeof(range)); range.flags = priv->flags; verdict = nf_nat_masquerade_ipv4(pkt->skb, pkt->ops->hooknum, diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c index 8a7ac685076d..529c119cbb14 100644 --- a/net/ipv6/netfilter/nft_masq_ipv6.c +++ b/net/ipv6/netfilter/nft_masq_ipv6.c @@ -25,6 +25,7 @@ static void nft_masq_ipv6_eval(const struct nft_expr *expr, struct nf_nat_range range; unsigned int verdict; + memset(&range, 0, sizeof(range)); range.flags = priv->flags; verdict = nf_nat_masquerade_ipv6(pkt->skb, &range, pkt->out); From 2196937e12b1b4ba139806d132647e1651d655df Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 10 Nov 2014 17:11:21 +0100 Subject: [PATCH 2/8] netfilter: ipset: small potential read beyond the end of buffer We could be reading 8 bytes into a 4 byte buffer here. It seems harmless but adding a check is the right thing to do and it silences a static checker warning. Signed-off-by: Dan Carpenter Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_core.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 86f9d76b1464..d259da3ce67a 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1863,6 +1863,12 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) if (*op < IP_SET_OP_VERSION) { /* Check the version at the beginning of operations */ struct ip_set_req_version *req_version = data; + + if (*len < sizeof(struct ip_set_req_version)) { + ret = -EINVAL; + goto done; + } + if (req_version->version != IPSET_PROTOCOL) { ret = -EPROTO; goto done; From 50656d9df63d69ce399c8be62d4473b039dac36a Mon Sep 17 00:00:00 2001 From: Calvin Owens Date: Tue, 4 Nov 2014 16:37:40 -0800 Subject: [PATCH 3/8] ipvs: Keep skb->sk when allocating headroom on tunnel xmit ip_vs_prepare_tunneled_skb() ignores ->sk when allocating a new skb, either unconditionally setting ->sk to NULL or allowing the uninitialized ->sk from a newly allocated skb to leak through to the caller. This patch properly copies ->sk and increments its reference count. Signed-off-by: Calvin Owens Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_xmit.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 437a3663ad03..bd90bf8107da 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -846,6 +846,8 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af, new_skb = skb_realloc_headroom(skb, max_headroom); if (!new_skb) goto error; + if (skb->sk) + skb_set_owner_w(new_skb, skb->sk); consume_skb(skb); skb = new_skb; } From 2daf1b4d18e3add229d1a3b5c554331d99ac6c7e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 7 Nov 2014 18:48:33 +0100 Subject: [PATCH 4/8] netfilter: nft_compat: use current net namespace Instead of init_net when using xtables over nftables compat. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_compat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 9d6d6f60a80f..b92f129beade 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -117,7 +117,7 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par, struct xt_target *target, void *info, union nft_entry *entry, u8 proto, bool inv) { - par->net = &init_net; + par->net = ctx->net; par->table = ctx->table->name; switch (ctx->afi->family) { case AF_INET: @@ -324,7 +324,7 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx, struct xt_match *match, void *info, union nft_entry *entry, u8 proto, bool inv) { - par->net = &init_net; + par->net = ctx->net; par->table = ctx->table->name; switch (ctx->afi->family) { case AF_INET: From c918687f5e3962375a19de6ded3c1be85ebdbcd6 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 10 Nov 2014 20:53:55 +0100 Subject: [PATCH 5/8] netfilter: nft_compat: relax chain type validation Check for nat chain dependency only, which is the one that can actually crash the kernel. Don't care if mangle, filter and security specific match and targets are used out of their scope, they are harmless. This restores iptables-compat with mangle specific match/target when used out of the OUTPUT chain, that are actually emulated through filter chains, which broke when performing strict validation. Fixes: f3f5dde ("netfilter: nft_compat: validate chain type in match/target") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_compat.c | 32 ++------------------------------ 1 file changed, 2 insertions(+), 30 deletions(-) diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index b92f129beade..70dc96516305 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -21,45 +21,17 @@ #include #include -static const struct { - const char *name; - u8 type; -} table_to_chaintype[] = { - { "filter", NFT_CHAIN_T_DEFAULT }, - { "raw", NFT_CHAIN_T_DEFAULT }, - { "security", NFT_CHAIN_T_DEFAULT }, - { "mangle", NFT_CHAIN_T_ROUTE }, - { "nat", NFT_CHAIN_T_NAT }, - { }, -}; - -static int nft_compat_table_to_chaintype(const char *table) -{ - int i; - - for (i = 0; table_to_chaintype[i].name != NULL; i++) { - if (strcmp(table_to_chaintype[i].name, table) == 0) - return table_to_chaintype[i].type; - } - - return -1; -} - static int nft_compat_chain_validate_dependency(const char *tablename, const struct nft_chain *chain) { - enum nft_chain_type type; const struct nft_base_chain *basechain; if (!tablename || !(chain->flags & NFT_BASE_CHAIN)) return 0; - type = nft_compat_table_to_chaintype(tablename); - if (type < 0) - return -EINVAL; - basechain = nft_base_chain(chain); - if (basechain->type->type != type) + if (strcmp(tablename, "nat") == 0 && + basechain->type->type != NFT_CHAIN_T_NAT) return -EINVAL; return 0; From afefb6f928ed42d5db452ee9251ce6de62673c67 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 10 Nov 2014 19:08:21 +0100 Subject: [PATCH 6/8] netfilter: nft_compat: use the match->table to validate dependencies Instead of the match->name, which is of course not relevant. Fixes: f3f5dde ("netfilter: nft_compat: validate chain type in match/target") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_compat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 70dc96516305..265e190f2218 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -346,7 +346,7 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, union nft_entry e = {}; int ret; - ret = nft_compat_chain_validate_dependency(match->name, ctx->chain); + ret = nft_compat_chain_validate_dependency(match->table, ctx->chain); if (ret < 0) goto err; @@ -420,7 +420,7 @@ static int nft_match_validate(const struct nft_ctx *ctx, if (!(hook_mask & match->hooks)) return -EINVAL; - ret = nft_compat_chain_validate_dependency(match->name, + ret = nft_compat_chain_validate_dependency(match->table, ctx->chain); if (ret < 0) return ret; From b326dd37b94e29bf6a15940f4fa66aa21a678ab1 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 10 Nov 2014 21:14:12 +0100 Subject: [PATCH 7/8] netfilter: nf_tables: restore synchronous object release from commit/abort The existing xtables matches and targets, when used from nft_compat, may sleep from the destroy path, ie. when removing rules. Since the objects are released via call_rcu from softirq context, this results in lockdep splats and possible lockups that may be hard to reproduce. Patrick also indicated that delayed object release via call_rcu can cause us problems in the ordering of event notifications when anonymous sets are in place. So, this patch restores the synchronous object release from the commit and abort paths. This includes a call to synchronize_rcu() to make sure that no packets are walking on the objects that are going to be released. This is slowier though, but it's simple and it resolves the aforementioned problems. This is a partial revert of c7c32e7 ("netfilter: nf_tables: defer all object release via rcu") that was introduced in 3.16 to speed up interaction with userspace. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 -- net/netfilter/nf_tables_api.c | 24 ++++++++---------------- 2 files changed, 8 insertions(+), 18 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 845c596bf594..3ae969e3acf0 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -396,14 +396,12 @@ struct nft_rule { /** * struct nft_trans - nf_tables object update in transaction * - * @rcu_head: rcu head to defer release of transaction data * @list: used internally * @msg_type: message type * @ctx: transaction context * @data: internal information related to the transaction */ struct nft_trans { - struct rcu_head rcu_head; struct list_head list; int msg_type; struct nft_ctx ctx; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 11ab4b078f3b..66e8425dbfe7 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3484,13 +3484,8 @@ static void nft_chain_commit_update(struct nft_trans *trans) } } -/* Schedule objects for release via rcu to make sure no packets are accesing - * removed rules. - */ -static void nf_tables_commit_release_rcu(struct rcu_head *rt) +static void nf_tables_commit_release(struct nft_trans *trans) { - struct nft_trans *trans = container_of(rt, struct nft_trans, rcu_head); - switch (trans->msg_type) { case NFT_MSG_DELTABLE: nf_tables_table_destroy(&trans->ctx); @@ -3612,10 +3607,11 @@ static int nf_tables_commit(struct sk_buff *skb) } } + synchronize_rcu(); + list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { list_del(&trans->list); - trans->ctx.nla = NULL; - call_rcu(&trans->rcu_head, nf_tables_commit_release_rcu); + nf_tables_commit_release(trans); } nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN); @@ -3623,13 +3619,8 @@ static int nf_tables_commit(struct sk_buff *skb) return 0; } -/* Schedule objects for release via rcu to make sure no packets are accesing - * aborted rules. - */ -static void nf_tables_abort_release_rcu(struct rcu_head *rt) +static void nf_tables_abort_release(struct nft_trans *trans) { - struct nft_trans *trans = container_of(rt, struct nft_trans, rcu_head); - switch (trans->msg_type) { case NFT_MSG_NEWTABLE: nf_tables_table_destroy(&trans->ctx); @@ -3725,11 +3716,12 @@ static int nf_tables_abort(struct sk_buff *skb) } } + synchronize_rcu(); + list_for_each_entry_safe_reverse(trans, next, &net->nft.commit_list, list) { list_del(&trans->list); - trans->ctx.nla = NULL; - call_rcu(&trans->rcu_head, nf_tables_abort_release_rcu); + nf_tables_abort_release(trans); } return 0; From 5195c14c8b27cc0b18220ddbf0e5ad3328a04187 Mon Sep 17 00:00:00 2001 From: bill bonaparte Date: Thu, 6 Nov 2014 14:36:48 +0100 Subject: [PATCH 8/8] netfilter: conntrack: fix race in __nf_conntrack_confirm against get_next_corpse After removal of the central spinlock nf_conntrack_lock, in commit 93bb0ceb75be2 ("netfilter: conntrack: remove central spinlock nf_conntrack_lock"), it is possible to race against get_next_corpse(). The race is against the get_next_corpse() cleanup on the "unconfirmed" list (a per-cpu list with seperate locking), which set the DYING bit. Fix this race, in __nf_conntrack_confirm(), by removing the CT from unconfirmed list before checking the DYING bit. In case race occured, re-add the CT to the dying list. While at this, fix coding style of the comment that has been updated. Fixes: 93bb0ceb75be2 ("netfilter: conntrack: remove central spinlock nf_conntrack_lock") Reported-by: bill bonaparte Signed-off-by: bill bonaparte Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 5016a6929085..2c699757bccf 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -611,12 +611,16 @@ __nf_conntrack_confirm(struct sk_buff *skb) */ NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); pr_debug("Confirming conntrack %p\n", ct); - /* We have to check the DYING flag inside the lock to prevent - a race against nf_ct_get_next_corpse() possibly called from - user context, else we insert an already 'dead' hash, blocking - further use of that particular connection -JM */ + + /* We have to check the DYING flag after unlink to prevent + * a race against nf_ct_get_next_corpse() possibly called from + * user context, else we insert an already 'dead' hash, blocking + * further use of that particular connection -JM. + */ + nf_ct_del_from_dying_or_unconfirmed_list(ct); if (unlikely(nf_ct_is_dying(ct))) { + nf_ct_add_to_dying_list(ct); nf_conntrack_double_unlock(hash, reply_hash); local_bh_enable(); return NF_ACCEPT; @@ -636,8 +640,6 @@ __nf_conntrack_confirm(struct sk_buff *skb) zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) goto out; - nf_ct_del_from_dying_or_unconfirmed_list(ct); - /* Timer relative to confirmation time, not original setting time, otherwise we'd get timer wrap in weird delay cases. */