From d152159b89118841ebc0f7be2aadf79a22c6c501 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 8 Jan 2019 15:45:58 +0100 Subject: [PATCH 01/33] netfilter: nf_tables: prepare nft_object for lookups via hashtable Add a 'key' structure for object, so we can look them up by name + table combination (the name can be the same in each table). Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 19 ++++++++++++++----- net/netfilter/nf_tables_api.c | 27 ++++++++++++++++----------- net/netfilter/nft_objref.c | 2 +- net/netfilter/nft_quota.c | 2 +- 4 files changed, 32 insertions(+), 18 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 841835a387e1..325d0a6b808b 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1011,12 +1011,22 @@ void nft_unregister_expr(struct nft_expr_type *); int nft_verdict_dump(struct sk_buff *skb, int type, const struct nft_verdict *v); +/** + * struct nft_object_hash_key - key to lookup nft_object + * + * @name: name of the stateful object to look up + * @table: table the object belongs to + */ +struct nft_object_hash_key { + const char *name; + const struct nft_table *table; +}; + /** * struct nft_object - nf_tables stateful object * * @list: table stateful object list node - * @table: table this object belongs to - * @name: name of this stateful object + * @key: keys that identify this object * @genmask: generation mask * @use: number of references to this stateful object * @handle: unique object handle @@ -1025,8 +1035,7 @@ int nft_verdict_dump(struct sk_buff *skb, int type, */ struct nft_object { struct list_head list; - char *name; - struct nft_table *table; + struct nft_object_hash_key key; u32 genmask:2, use:30; u64 handle; @@ -1047,7 +1056,7 @@ struct nft_object *nft_obj_lookup(const struct nft_table *table, const struct nlattr *nla, u32 objtype, u8 genmask); -void nft_obj_notify(struct net *net, struct nft_table *table, +void nft_obj_notify(struct net *net, const struct nft_table *table, struct nft_object *obj, u32 portid, u32 seq, int event, int family, int report, gfp_t gfp); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 2b0a93300dd7..5e213941e85b 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3853,7 +3853,7 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) && nla_put_string(skb, NFTA_SET_ELEM_OBJREF, - (*nft_set_ext_obj(ext))->name) < 0) + (*nft_set_ext_obj(ext))->key.name) < 0) goto nla_put_failure; if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && @@ -4826,7 +4826,7 @@ struct nft_object *nft_obj_lookup(const struct nft_table *table, struct nft_object *obj; list_for_each_entry_rcu(obj, &table->objects, list) { - if (!nla_strcmp(nla, obj->name) && + if (!nla_strcmp(nla, obj->key.name) && objtype == obj->ops->type->type && nft_active_genmask(obj, genmask)) return obj; @@ -5014,11 +5014,11 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk, err = PTR_ERR(obj); goto err1; } - obj->table = table; + obj->key.table = table; obj->handle = nf_tables_alloc_handle(table); - obj->name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL); - if (!obj->name) { + obj->key.name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL); + if (!obj->key.name) { err = -ENOMEM; goto err2; } @@ -5031,7 +5031,7 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk, table->use++; return 0; err3: - kfree(obj->name); + kfree(obj->key.name); err2: if (obj->ops->destroy) obj->ops->destroy(&ctx, obj); @@ -5060,7 +5060,7 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net, nfmsg->res_id = htons(net->nft.base_seq & 0xffff); if (nla_put_string(skb, NFTA_OBJ_TABLE, table->name) || - nla_put_string(skb, NFTA_OBJ_NAME, obj->name) || + nla_put_string(skb, NFTA_OBJ_NAME, obj->key.name) || nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->ops->type->type)) || nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) || nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset) || @@ -5246,7 +5246,7 @@ static void nft_obj_destroy(const struct nft_ctx *ctx, struct nft_object *obj) obj->ops->destroy(ctx, obj); module_put(obj->ops->type->owner); - kfree(obj->name); + kfree(obj->key.name); kfree(obj); } @@ -5297,7 +5297,7 @@ static int nf_tables_delobj(struct net *net, struct sock *nlsk, return nft_delobj(&ctx, obj); } -void nft_obj_notify(struct net *net, struct nft_table *table, +void nft_obj_notify(struct net *net, const struct nft_table *table, struct nft_object *obj, u32 portid, u32 seq, int event, int family, int report, gfp_t gfp) { @@ -6404,6 +6404,11 @@ static void nf_tables_commit_chain(struct net *net, struct nft_chain *chain) nf_tables_commit_chain_free_rules_old(g0); } +static void nft_obj_del(struct nft_object *obj) +{ + list_del_rcu(&obj->list); +} + static void nft_chain_del(struct nft_chain *chain) { struct nft_table *table = chain->table; @@ -6580,7 +6585,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nft_trans_destroy(trans); break; case NFT_MSG_DELOBJ: - list_del_rcu(&nft_trans_obj(trans)->list); + nft_obj_del(nft_trans_obj(trans)); nf_tables_obj_notify(&trans->ctx, nft_trans_obj(trans), NFT_MSG_DELOBJ); break; @@ -7330,7 +7335,7 @@ static void __nft_release_tables(struct net *net) nft_set_destroy(set); } list_for_each_entry_safe(obj, ne, &table->objects, list) { - list_del(&obj->list); + nft_obj_del(obj); table->use--; nft_obj_destroy(&ctx, obj); } diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c index a3185ca2a3a9..58eb75ad61bf 100644 --- a/net/netfilter/nft_objref.c +++ b/net/netfilter/nft_objref.c @@ -53,7 +53,7 @@ static int nft_objref_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_object *obj = nft_objref_priv(expr); - if (nla_put_string(skb, NFTA_OBJREF_IMM_NAME, obj->name) || + if (nla_put_string(skb, NFTA_OBJREF_IMM_NAME, obj->key.name) || nla_put_be32(skb, NFTA_OBJREF_IMM_TYPE, htonl(obj->ops->type->type))) goto nla_put_failure; diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c index 0ed124a93fcf..354cde67bca9 100644 --- a/net/netfilter/nft_quota.c +++ b/net/netfilter/nft_quota.c @@ -61,7 +61,7 @@ static void nft_quota_obj_eval(struct nft_object *obj, if (overquota && !test_and_set_bit(NFT_QUOTA_DEPLETED_BIT, &priv->flags)) - nft_obj_notify(nft_net(pkt), obj->table, obj, 0, 0, + nft_obj_notify(nft_net(pkt), obj->key.table, obj, 0, 0, NFT_MSG_NEWOBJ, nft_pf(pkt), 0, GFP_ATOMIC); } From 4d44175aa5bb5f68772b1eb0306554812294ca52 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 8 Jan 2019 15:45:59 +0100 Subject: [PATCH 02/33] netfilter: nf_tables: handle nft_object lookups via rhltable Instead of linear search, use rhlist interface to look up the objects. This fixes rulesets with thousands of named objects (quota, counters and the like). We only use a single table for this and consider the address of the table we're doing the lookup in as a part of the key. This reduces restore time of a sample ruleset with ~20k named counters from 37 seconds to 0.8 seconds. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 7 +- net/netfilter/nf_tables_api.c | 105 ++++++++++++++++++++++++++---- net/netfilter/nft_objref.c | 3 +- 3 files changed, 99 insertions(+), 16 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 325d0a6b808b..45eba7d7ab38 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1027,14 +1027,16 @@ struct nft_object_hash_key { * * @list: table stateful object list node * @key: keys that identify this object + * @rhlhead: nft_objname_ht node * @genmask: generation mask * @use: number of references to this stateful object * @handle: unique object handle * @ops: object operations - * @data: object data, layout depends on type + * @data: object data, layout depends on type */ struct nft_object { struct list_head list; + struct rhlist_head rhlhead; struct nft_object_hash_key key; u32 genmask:2, use:30; @@ -1052,7 +1054,8 @@ static inline void *nft_obj_data(const struct nft_object *obj) #define nft_expr_obj(expr) *((struct nft_object **)nft_expr_priv(expr)) -struct nft_object *nft_obj_lookup(const struct nft_table *table, +struct nft_object *nft_obj_lookup(const struct net *net, + const struct nft_table *table, const struct nlattr *nla, u32 objtype, u8 genmask); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 5e213941e85b..621ff834d3a4 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -37,10 +37,16 @@ enum { NFT_VALIDATE_DO, }; +static struct rhltable nft_objname_ht; + static u32 nft_chain_hash(const void *data, u32 len, u32 seed); static u32 nft_chain_hash_obj(const void *data, u32 len, u32 seed); static int nft_chain_hash_cmp(struct rhashtable_compare_arg *, const void *); +static u32 nft_objname_hash(const void *data, u32 len, u32 seed); +static u32 nft_objname_hash_obj(const void *data, u32 len, u32 seed); +static int nft_objname_hash_cmp(struct rhashtable_compare_arg *, const void *); + static const struct rhashtable_params nft_chain_ht_params = { .head_offset = offsetof(struct nft_chain, rhlhead), .key_offset = offsetof(struct nft_chain, name), @@ -51,6 +57,15 @@ static const struct rhashtable_params nft_chain_ht_params = { .automatic_shrinking = true, }; +static const struct rhashtable_params nft_objname_ht_params = { + .head_offset = offsetof(struct nft_object, rhlhead), + .key_offset = offsetof(struct nft_object, key), + .hashfn = nft_objname_hash, + .obj_hashfn = nft_objname_hash_obj, + .obj_cmpfn = nft_objname_hash_cmp, + .automatic_shrinking = true, +}; + static void nft_validate_state_update(struct net *net, u8 new_validate_state) { switch (net->nft.validate_state) { @@ -814,6 +829,34 @@ static int nft_chain_hash_cmp(struct rhashtable_compare_arg *arg, return strcmp(chain->name, name); } +static u32 nft_objname_hash(const void *data, u32 len, u32 seed) +{ + const struct nft_object_hash_key *k = data; + + seed ^= hash_ptr(k->table, 32); + + return jhash(k->name, strlen(k->name), seed); +} + +static u32 nft_objname_hash_obj(const void *data, u32 len, u32 seed) +{ + const struct nft_object *obj = data; + + return nft_objname_hash(&obj->key, 0, seed); +} + +static int nft_objname_hash_cmp(struct rhashtable_compare_arg *arg, + const void *ptr) +{ + const struct nft_object_hash_key *k = arg->key; + const struct nft_object *obj = ptr; + + if (obj->key.table != k->table) + return -1; + + return strcmp(obj->key.name, k->name); +} + static int nf_tables_newtable(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[], @@ -1070,7 +1113,7 @@ nft_chain_lookup_byhandle(const struct nft_table *table, u64 handle, u8 genmask) return ERR_PTR(-ENOENT); } -static bool lockdep_commit_lock_is_held(struct net *net) +static bool lockdep_commit_lock_is_held(const struct net *net) { #ifdef CONFIG_PROVE_LOCKING return lockdep_is_held(&net->nft.commit_mutex); @@ -4386,7 +4429,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, err = -EINVAL; goto err2; } - obj = nft_obj_lookup(ctx->table, nla[NFTA_SET_ELEM_OBJREF], + obj = nft_obj_lookup(ctx->net, ctx->table, + nla[NFTA_SET_ELEM_OBJREF], set->objtype, genmask); if (IS_ERR(obj)) { err = PTR_ERR(obj); @@ -4819,18 +4863,36 @@ void nft_unregister_obj(struct nft_object_type *obj_type) } EXPORT_SYMBOL_GPL(nft_unregister_obj); -struct nft_object *nft_obj_lookup(const struct nft_table *table, +struct nft_object *nft_obj_lookup(const struct net *net, + const struct nft_table *table, const struct nlattr *nla, u32 objtype, u8 genmask) { + struct nft_object_hash_key k = { .table = table }; + char search[NFT_OBJ_MAXNAMELEN]; + struct rhlist_head *tmp, *list; struct nft_object *obj; - list_for_each_entry_rcu(obj, &table->objects, list) { - if (!nla_strcmp(nla, obj->key.name) && - objtype == obj->ops->type->type && - nft_active_genmask(obj, genmask)) + nla_strlcpy(search, nla, sizeof(search)); + k.name = search; + + WARN_ON_ONCE(!rcu_read_lock_held() && + !lockdep_commit_lock_is_held(net)); + + rcu_read_lock(); + list = rhltable_lookup(&nft_objname_ht, &k, nft_objname_ht_params); + if (!list) + goto out; + + rhl_for_each_entry_rcu(obj, tmp, list, rhlhead) { + if (objtype == obj->ops->type->type && + nft_active_genmask(obj, genmask)) { + rcu_read_unlock(); return obj; + } } +out: + rcu_read_unlock(); return ERR_PTR(-ENOENT); } EXPORT_SYMBOL_GPL(nft_obj_lookup); @@ -4988,7 +5050,7 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk, } objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); - obj = nft_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask); + obj = nft_obj_lookup(net, table, nla[NFTA_OBJ_NAME], objtype, genmask); if (IS_ERR(obj)) { err = PTR_ERR(obj); if (err != -ENOENT) { @@ -5027,9 +5089,18 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk, if (err < 0) goto err3; + err = rhltable_insert(&nft_objname_ht, &obj->rhlhead, + nft_objname_ht_params); + if (err < 0) + goto err4; + list_add_tail_rcu(&obj->list, &table->objects); table->use++; return 0; +err4: + /* queued in transaction log */ + INIT_LIST_HEAD(&obj->list); + return err; err3: kfree(obj->key.name); err2: @@ -5215,7 +5286,7 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk, } objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); - obj = nft_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask); + obj = nft_obj_lookup(net, table, nla[NFTA_OBJ_NAME], objtype, genmask); if (IS_ERR(obj)) { NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]); return PTR_ERR(obj); @@ -5280,7 +5351,7 @@ static int nf_tables_delobj(struct net *net, struct sock *nlsk, obj = nft_obj_lookup_byhandle(table, attr, objtype, genmask); } else { attr = nla[NFTA_OBJ_NAME]; - obj = nft_obj_lookup(table, attr, objtype, genmask); + obj = nft_obj_lookup(net, table, attr, objtype, genmask); } if (IS_ERR(obj)) { @@ -6406,6 +6477,7 @@ static void nf_tables_commit_chain(struct net *net, struct nft_chain *chain) static void nft_obj_del(struct nft_object *obj) { + rhltable_remove(&nft_objname_ht, &obj->rhlhead, nft_objname_ht_params); list_del_rcu(&obj->list); } @@ -6721,7 +6793,7 @@ static int __nf_tables_abort(struct net *net) break; case NFT_MSG_NEWOBJ: trans->ctx.table->use--; - list_del_rcu(&nft_trans_obj(trans)->list); + nft_obj_del(nft_trans_obj(trans)); break; case NFT_MSG_DELOBJ: trans->ctx.table->use++; @@ -7397,12 +7469,18 @@ static int __init nf_tables_module_init(void) if (err < 0) goto err3; - /* must be last */ - err = nfnetlink_subsys_register(&nf_tables_subsys); + err = rhltable_init(&nft_objname_ht, &nft_objname_ht_params); if (err < 0) goto err4; + /* must be last */ + err = nfnetlink_subsys_register(&nf_tables_subsys); + if (err < 0) + goto err5; + return err; +err5: + rhltable_destroy(&nft_objname_ht); err4: unregister_netdevice_notifier(&nf_tables_flowtable_notifier); err3: @@ -7422,6 +7500,7 @@ static void __exit nf_tables_module_exit(void) unregister_pernet_subsys(&nf_tables_net_ops); cancel_work_sync(&trans_destroy_work); rcu_barrier(); + rhltable_destroy(&nft_objname_ht); nf_tables_core_module_exit(); } diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c index 58eb75ad61bf..c1f2adf198a0 100644 --- a/net/netfilter/nft_objref.c +++ b/net/netfilter/nft_objref.c @@ -38,7 +38,8 @@ static int nft_objref_init(const struct nft_ctx *ctx, return -EINVAL; objtype = ntohl(nla_get_be32(tb[NFTA_OBJREF_IMM_TYPE])); - obj = nft_obj_lookup(ctx->table, tb[NFTA_OBJREF_IMM_NAME], objtype, + obj = nft_obj_lookup(ctx->net, ctx->table, + tb[NFTA_OBJREF_IMM_NAME], objtype, genmask); if (IS_ERR(obj)) return -ENOENT; From 10870dd89e956d911d1a39474c0bf4a18c72cffc Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 8 Jan 2019 17:35:34 +0100 Subject: [PATCH 03/33] netfilter: nf_tables: add direct calls for all builtin expressions With CONFIG_RETPOLINE its faster to add an if (ptr == &foo_func) check and and use direct calls for all the built-in expressions. ~15% improvement in pathological cases. checkpatch doesn't like the X macro due to the embedded return statement, but the macro has a very limited scope so I don't think its a problem. I would like to avoid bugs of the form If (e->ops->eval == (unsigned long)nft_foo_eval) nft_bar_eval(); and open-coded if ()/else if()/else cascade, thus the macro. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_core.h | 16 ++++++++++++++++ net/netfilter/nf_tables_core.c | 23 +++++++++++++++++------ net/netfilter/nft_bitwise.c | 5 ++--- net/netfilter/nft_byteorder.c | 6 +++--- net/netfilter/nft_cmp.c | 6 +++--- net/netfilter/nft_dynset.c | 5 ++--- net/netfilter/nft_immediate.c | 6 +++--- net/netfilter/nft_payload.c | 6 +++--- net/netfilter/nft_range.c | 5 ++--- net/netfilter/nft_rt.c | 6 +++--- 10 files changed, 54 insertions(+), 30 deletions(-) diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 2046d104f323..7281895fa6d9 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -80,6 +80,22 @@ struct nft_regs; struct nft_pktinfo; void nft_meta_get_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt); +void nft_cmp_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt); void nft_lookup_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt); +void nft_payload_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt); +void nft_immediate_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt); +void nft_bitwise_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt); +void nft_range_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt); +void nft_byteorder_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt); +void nft_dynset_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt); +void nft_rt_get_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt); #endif /* _NET_NF_TABLES_CORE_H */ diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index a50500232b0a..2a00aef7b6d4 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -124,14 +124,25 @@ static void expr_call_ops_eval(const struct nft_expr *expr, struct nft_regs *regs, struct nft_pktinfo *pkt) { +#ifdef CONFIG_RETPOLINE unsigned long e = (unsigned long)expr->ops->eval; +#define X(e, fun) \ + do { if ((e) == (unsigned long)(fun)) \ + return fun(expr, regs, pkt); } while (0) - if (e == (unsigned long)nft_meta_get_eval) - nft_meta_get_eval(expr, regs, pkt); - else if (e == (unsigned long)nft_lookup_eval) - nft_lookup_eval(expr, regs, pkt); - else - expr->ops->eval(expr, regs, pkt); + X(e, nft_payload_eval); + X(e, nft_cmp_eval); + X(e, nft_meta_get_eval); + X(e, nft_lookup_eval); + X(e, nft_range_eval); + X(e, nft_immediate_eval); + X(e, nft_byteorder_eval); + X(e, nft_dynset_eval); + X(e, nft_rt_get_eval); + X(e, nft_bitwise_eval); +#undef X +#endif /* CONFIG_RETPOLINE */ + expr->ops->eval(expr, regs, pkt); } unsigned int diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index fff8073e2a56..2c75b9e0474e 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -25,9 +25,8 @@ struct nft_bitwise { struct nft_data xor; }; -static void nft_bitwise_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +void nft_bitwise_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_bitwise *priv = nft_expr_priv(expr); const u32 *src = ®s->data[priv->sreg]; diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index 13d4e421a6b3..19dbc34cc75e 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -26,9 +26,9 @@ struct nft_byteorder { u8 size; }; -static void nft_byteorder_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +void nft_byteorder_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) { const struct nft_byteorder *priv = nft_expr_priv(expr); u32 *src = ®s->data[priv->sreg]; diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index 79d48c1d06f4..f9f1fa66a16e 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -24,9 +24,9 @@ struct nft_cmp_expr { enum nft_cmp_ops op:8; }; -static void nft_cmp_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +void nft_cmp_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) { const struct nft_cmp_expr *priv = nft_expr_priv(expr); int d; diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 07d4efd3d851..9658493d37d4 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -62,9 +62,8 @@ err1: return NULL; } -static void nft_dynset_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +void nft_dynset_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_dynset *priv = nft_expr_priv(expr); struct nft_set *set = priv->set; diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 0777a93211e2..3e5ed787b1d4 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -17,9 +17,9 @@ #include #include -static void nft_immediate_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +void nft_immediate_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) { const struct nft_immediate_expr *priv = nft_expr_priv(expr); diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index e110b0ebbf58..54e15de4b79a 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -70,9 +70,9 @@ nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u8 offset, u8 len) return skb_copy_bits(skb, offset + mac_off, dst_u8, len) == 0; } -static void nft_payload_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +void nft_payload_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) { const struct nft_payload *priv = nft_expr_priv(expr); const struct sk_buff *skb = pkt->skb; diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c index cedb96c3619f..529ac8acb19d 100644 --- a/net/netfilter/nft_range.c +++ b/net/netfilter/nft_range.c @@ -23,9 +23,8 @@ struct nft_range_expr { enum nft_range_ops op:8; }; -static void nft_range_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +void nft_range_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_range_expr *priv = nft_expr_priv(expr); int d1, d2; diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c index f35fa33913ae..c48daed5c46b 100644 --- a/net/netfilter/nft_rt.c +++ b/net/netfilter/nft_rt.c @@ -53,9 +53,9 @@ static u16 get_tcpmss(const struct nft_pktinfo *pkt, const struct dst_entry *skb return mtu - minlen; } -static void nft_rt_get_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +void nft_rt_get_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) { const struct nft_rt *priv = nft_expr_priv(expr); const struct sk_buff *skb = pkt->skb; From 827318feb69cb07ed58bb9b9dd6c2eaa81a116ad Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 9 Jan 2019 17:19:34 +0100 Subject: [PATCH 04/33] netfilter: conntrack: remove helper hook again place them into the confirm one. Old: hook (300): ipv4/6_help() first call helper, then seqadj. hook (INT_MAX): confirm Now: hook (INT_MAX): confirm, first call helper, then seqadj, then confirm Not having the extra call is noticeable in bechmarks. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto.c | 142 ++++++++--------------------- 1 file changed, 36 insertions(+), 106 deletions(-) diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 859f5d07a915..2bbc32d939e4 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -364,55 +364,55 @@ void nf_ct_l4proto_pernet_unregister(struct net *net, } EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister); -static unsigned int ipv4_helper(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) +static unsigned int nf_confirm(struct sk_buff *skb, + unsigned int protoff, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo) { - struct nf_conn *ct; - enum ip_conntrack_info ctinfo; const struct nf_conn_help *help; - const struct nf_conntrack_helper *helper; - - /* This is where we call the helper: as the packet goes out. */ - ct = nf_ct_get(skb, &ctinfo); - if (!ct || ctinfo == IP_CT_RELATED_REPLY) - return NF_ACCEPT; help = nfct_help(ct); - if (!help) - return NF_ACCEPT; + if (help) { + const struct nf_conntrack_helper *helper; + int ret; - /* rcu_read_lock()ed by nf_hook_thresh */ - helper = rcu_dereference(help->helper); - if (!helper) - return NF_ACCEPT; + /* rcu_read_lock()ed by nf_hook_thresh */ + helper = rcu_dereference(help->helper); + if (helper) { + ret = helper->help(skb, + protoff, + ct, ctinfo); + if (ret != NF_ACCEPT) + return ret; + } + } - return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb), - ct, ctinfo); + if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && + !nf_is_loopback_packet(skb)) { + if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) { + NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); + return NF_DROP; + } + } + + /* We've seen it coming out the other side: confirm it */ + return nf_conntrack_confirm(skb); } static unsigned int ipv4_confirm(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - struct nf_conn *ct; enum ip_conntrack_info ctinfo; + struct nf_conn *ct; ct = nf_ct_get(skb, &ctinfo); if (!ct || ctinfo == IP_CT_RELATED_REPLY) - goto out; + return nf_conntrack_confirm(skb); - /* adjust seqs for loopback traffic only in outgoing direction */ - if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && - !nf_is_loopback_packet(skb)) { - if (!nf_ct_seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) { - NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); - return NF_DROP; - } - } -out: - /* We've seen it coming out the other side: confirm it */ - return nf_conntrack_confirm(skb); + return nf_confirm(skb, + skb_network_offset(skb) + ip_hdrlen(skb), + ct, ctinfo); } static unsigned int ipv4_conntrack_in(void *priv, @@ -460,24 +460,12 @@ static const struct nf_hook_ops ipv4_conntrack_ops[] = { .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP_PRI_CONNTRACK, }, - { - .hook = ipv4_helper, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_POST_ROUTING, - .priority = NF_IP_PRI_CONNTRACK_HELPER, - }, { .hook = ipv4_confirm, .pf = NFPROTO_IPV4, .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP_PRI_CONNTRACK_CONFIRM, }, - { - .hook = ipv4_helper, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_LOCAL_IN, - .priority = NF_IP_PRI_CONNTRACK_HELPER, - }, { .hook = ipv4_confirm, .pf = NFPROTO_IPV4, @@ -623,31 +611,21 @@ static unsigned int ipv6_confirm(void *priv, struct nf_conn *ct; enum ip_conntrack_info ctinfo; unsigned char pnum = ipv6_hdr(skb)->nexthdr; - int protoff; __be16 frag_off; + int protoff; ct = nf_ct_get(skb, &ctinfo); if (!ct || ctinfo == IP_CT_RELATED_REPLY) - goto out; + return nf_conntrack_confirm(skb); protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum, &frag_off); if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { pr_debug("proto header not found\n"); - goto out; + return nf_conntrack_confirm(skb); } - /* adjust seqs for loopback traffic only in outgoing direction */ - if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && - !nf_is_loopback_packet(skb)) { - if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) { - NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); - return NF_DROP; - } - } -out: - /* We've seen it coming out the other side: confirm it */ - return nf_conntrack_confirm(skb); + return nf_confirm(skb, protoff, ct, ctinfo); } static unsigned int ipv6_conntrack_in(void *priv, @@ -664,42 +642,6 @@ static unsigned int ipv6_conntrack_local(void *priv, return nf_conntrack_in(skb, state); } -static unsigned int ipv6_helper(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - struct nf_conn *ct; - const struct nf_conn_help *help; - const struct nf_conntrack_helper *helper; - enum ip_conntrack_info ctinfo; - __be16 frag_off; - int protoff; - u8 nexthdr; - - /* This is where we call the helper: as the packet goes out. */ - ct = nf_ct_get(skb, &ctinfo); - if (!ct || ctinfo == IP_CT_RELATED_REPLY) - return NF_ACCEPT; - - help = nfct_help(ct); - if (!help) - return NF_ACCEPT; - /* rcu_read_lock()ed by nf_hook_thresh */ - helper = rcu_dereference(help->helper); - if (!helper) - return NF_ACCEPT; - - nexthdr = ipv6_hdr(skb)->nexthdr; - protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, - &frag_off); - if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { - pr_debug("proto header not found\n"); - return NF_ACCEPT; - } - - return helper->help(skb, protoff, ct, ctinfo); -} - static const struct nf_hook_ops ipv6_conntrack_ops[] = { { .hook = ipv6_conntrack_in, @@ -713,24 +655,12 @@ static const struct nf_hook_ops ipv6_conntrack_ops[] = { .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_CONNTRACK, }, - { - .hook = ipv6_helper, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_POST_ROUTING, - .priority = NF_IP6_PRI_CONNTRACK_HELPER, - }, { .hook = ipv6_confirm, .pf = NFPROTO_IPV6, .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP6_PRI_LAST, }, - { - .hook = ipv6_helper, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_LOCAL_IN, - .priority = NF_IP6_PRI_CONNTRACK_HELPER, - }, { .hook = ipv6_confirm, .pf = NFPROTO_IPV6, From 8e2f311a68494a6677c1724bdcb10bada21af37c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 11 Jan 2019 14:46:15 +0100 Subject: [PATCH 05/33] netfilter: physdev: relax br_netfilter dependency Following command: iptables -D FORWARD -m physdev ... causes connectivity loss in some setups. Reason is that iptables userspace will probe kernel for the module revision of the physdev patch, and physdev has an artificial dependency on br_netfilter (xt_physdev use makes no sense unless a br_netfilter module is loaded). This causes the "phydev" module to be loaded, which in turn enables the "call-iptables" infrastructure. bridged packets might then get dropped by the iptables ruleset. The better fix would be to change the "call-iptables" defaults to 0 and enforce explicit setting to 1, but that breaks backwards compatibility. This does the next best thing: add a request_module call to checkentry. This was a stray '-D ... -m physdev' won't activate br_netfilter anymore. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/br_netfilter.h | 1 - net/bridge/br_netfilter_hooks.c | 5 ----- net/netfilter/xt_physdev.c | 9 +++++++-- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h index 4cd56808ac4e..89808ce293c4 100644 --- a/include/net/netfilter/br_netfilter.h +++ b/include/net/netfilter/br_netfilter.h @@ -43,7 +43,6 @@ static inline struct rtable *bridge_parent_rtable(const struct net_device *dev) } struct net_device *setup_pre_routing(struct sk_buff *skb); -void br_netfilter_enable(void); #if IS_ENABLED(CONFIG_IPV6) int br_validate_ipv6(struct net *net, struct sk_buff *skb); diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index c93c35bb73dd..40d058378b52 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -881,11 +881,6 @@ static const struct nf_br_ops br_ops = { .br_dev_xmit_hook = br_nf_dev_xmit, }; -void br_netfilter_enable(void) -{ -} -EXPORT_SYMBOL_GPL(br_netfilter_enable); - /* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because * br_dev_queue_push_xmit is called afterwards */ static const struct nf_hook_ops br_nf_ops[] = { diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index 4034d70bff39..b2e39cb6a590 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -96,8 +96,7 @@ match_outdev: static int physdev_mt_check(const struct xt_mtchk_param *par) { const struct xt_physdev_info *info = par->matchinfo; - - br_netfilter_enable(); + static bool brnf_probed __read_mostly; if (!(info->bitmask & XT_PHYSDEV_OP_MASK) || info->bitmask & ~XT_PHYSDEV_OP_MASK) @@ -111,6 +110,12 @@ static int physdev_mt_check(const struct xt_mtchk_param *par) if (par->hook_mask & (1 << NF_INET_LOCAL_OUT)) return -EINVAL; } + + if (!brnf_probed) { + brnf_probed = true; + request_module("br_netfilter"); + } + return 0; } From 75dd48e2e420a3cbbe56dd7adfcc6f142c948272 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Mon, 14 Jan 2019 18:41:35 +0100 Subject: [PATCH 06/33] netfilter: nf_tables: Support RULE_ID reference in new rule To allow for a batch to contain rules in arbitrary ordering, introduce NFTA_RULE_POSITION_ID attribute which works just like NFTA_RULE_POSITION but contains the ID of another rule within the same batch. This helps iptables-nft-restore handling dumps with mixed insert/append commands correctly. Note that NFTA_RULE_POSITION takes precedence over NFTA_RULE_POSITION_ID, so if the former is present, the latter is ignored. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ net/netfilter/nf_tables_api.c | 9 +++++++++ 2 files changed, 11 insertions(+) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 7de4f1bdaf06..99ca95b830b6 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -219,6 +219,7 @@ enum nft_chain_attributes { * @NFTA_RULE_POSITION: numeric handle of the previous rule (NLA_U64) * @NFTA_RULE_USERDATA: user data (NLA_BINARY, NFT_USERDATA_MAXLEN) * @NFTA_RULE_ID: uniquely identifies a rule in a transaction (NLA_U32) + * @NFTA_RULE_POSITION_ID: transaction unique identifier of the previous rule (NLA_U32) */ enum nft_rule_attributes { NFTA_RULE_UNSPEC, @@ -231,6 +232,7 @@ enum nft_rule_attributes { NFTA_RULE_USERDATA, NFTA_RULE_PAD, NFTA_RULE_ID, + NFTA_RULE_POSITION_ID, __NFTA_RULE_MAX }; #define NFTA_RULE_MAX (__NFTA_RULE_MAX - 1) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 621ff834d3a4..d88c86c5b433 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2610,6 +2610,9 @@ static int nft_table_validate(struct net *net, const struct nft_table *table) return 0; } +static struct nft_rule *nft_rule_lookup_byid(const struct net *net, + const struct nlattr *nla); + #define NFT_RULE_MAXEXPRS 128 static int nf_tables_newrule(struct net *net, struct sock *nlsk, @@ -2679,6 +2682,12 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION]); return PTR_ERR(old_rule); } + } else if (nla[NFTA_RULE_POSITION_ID]) { + old_rule = nft_rule_lookup_byid(net, nla[NFTA_RULE_POSITION_ID]); + if (IS_ERR(old_rule)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION_ID]); + return PTR_ERR(old_rule); + } } } From a47c5404816254d6cc4787f335c4720066a538e6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 15 Jan 2019 22:03:32 +0100 Subject: [PATCH 07/33] netfilter: conntrack: handle builtin l4proto packet functions via direct calls The l4 protocol trackers are invoked via indirect call: l4proto->packet(). With one exception (gre), all l4trackers are builtin, so we can make .packet optional and use a direct call for most protocols. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 37 ++++++++++++++++ net/netfilter/nf_conntrack_core.c | 45 +++++++++++++++++++- net/netfilter/nf_conntrack_proto_dccp.c | 8 ++-- net/netfilter/nf_conntrack_proto_icmp.c | 12 ++---- net/netfilter/nf_conntrack_proto_icmpv6.c | 10 ++--- net/netfilter/nf_conntrack_proto_sctp.c | 12 +++--- net/netfilter/nf_conntrack_proto_tcp.c | 11 +++-- net/netfilter/nf_conntrack_proto_udp.c | 22 +++++----- 8 files changed, 113 insertions(+), 44 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index ae7b86f587f2..6ca4355ae3dc 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -99,6 +99,43 @@ int nf_conntrack_icmpv6_error(struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, const struct nf_hook_state *state); + +int nf_conntrack_icmp_packet(struct nf_conn *ct, + struct sk_buff *skb, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state); + +int nf_conntrack_icmpv6_packet(struct nf_conn *ct, + struct sk_buff *skb, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state); + +int nf_conntrack_udp_packet(struct nf_conn *ct, + struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state); +int nf_conntrack_udplite_packet(struct nf_conn *ct, + struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state); +int nf_conntrack_tcp_packet(struct nf_conn *ct, + struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state); +int nf_conntrack_dccp_packet(struct nf_conn *ct, + struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state); +int nf_conntrack_sctp_packet(struct nf_conn *ct, + struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state); + /* Existing built-in generic protocol */ extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 741b533148ba..5dfd95ae510d 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1522,6 +1522,45 @@ nf_conntrack_handle_icmp(struct nf_conn *tmpl, return ret; } +/* Returns verdict for packet, or -1 for invalid. */ +static int nf_conntrack_handle_packet(struct nf_conn *ct, + struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state) +{ + switch (nf_ct_protonum(ct)) { + case IPPROTO_TCP: + return nf_conntrack_tcp_packet(ct, skb, dataoff, + ctinfo, state); + case IPPROTO_UDP: + return nf_conntrack_udp_packet(ct, skb, dataoff, + ctinfo, state); + case IPPROTO_ICMP: + return nf_conntrack_icmp_packet(ct, skb, ctinfo, state); + case IPPROTO_ICMPV6: + return nf_conntrack_icmpv6_packet(ct, skb, ctinfo, state); +#ifdef CONFIG_NF_CT_PROTO_UDPLITE + case IPPROTO_UDPLITE: + return nf_conntrack_udplite_packet(ct, skb, dataoff, + ctinfo, state); +#endif +#ifdef CONFIG_NF_CT_PROTO_SCTP + case IPPROTO_SCTP: + return nf_conntrack_sctp_packet(ct, skb, dataoff, + ctinfo, state); +#endif +#ifdef CONFIG_NF_CT_PROTO_DCCP + case IPPROTO_DCCP: + return nf_conntrack_dccp_packet(ct, skb, dataoff, + ctinfo, state); +#endif + } + + WARN_ON_ONCE(1); + return -NF_ACCEPT; +} + unsigned int nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state) { @@ -1583,7 +1622,11 @@ repeat: goto out; } - ret = l4proto->packet(ct, skb, dataoff, ctinfo, state); + if (l4proto->packet) + ret = l4proto->packet(ct, skb, dataoff, ctinfo, state); + else + ret = nf_conntrack_handle_packet(ct, skb, dataoff, ctinfo, state); + if (ret <= 0) { /* Invalid: inverse of the return code tells * the netfilter core what to do */ diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 023c1445bc39..84408ec80624 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -472,9 +472,10 @@ out_invalid: return true; } -static int dccp_packet(struct nf_conn *ct, struct sk_buff *skb, - unsigned int dataoff, enum ip_conntrack_info ctinfo, - const struct nf_hook_state *state) +int nf_conntrack_dccp_packet(struct nf_conn *ct, struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); struct dccp_hdr _dh, *dh; @@ -839,7 +840,6 @@ static struct nf_proto_net *dccp_get_net_proto(struct net *net) const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp = { .l4proto = IPPROTO_DCCP, - .packet = dccp_packet, .can_early_drop = dccp_can_early_drop, #ifdef CONFIG_NF_CONNTRACK_PROCFS .print_conntrack = dccp_print_conntrack, diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c index de64d8a5fdfd..381a7f3ea68a 100644 --- a/net/netfilter/nf_conntrack_proto_icmp.c +++ b/net/netfilter/nf_conntrack_proto_icmp.c @@ -68,11 +68,10 @@ static bool icmp_invert_tuple(struct nf_conntrack_tuple *tuple, } /* Returns verdict for packet, or -1 for invalid. */ -static int icmp_packet(struct nf_conn *ct, - struct sk_buff *skb, - unsigned int dataoff, - enum ip_conntrack_info ctinfo, - const struct nf_hook_state *state) +int nf_conntrack_icmp_packet(struct nf_conn *ct, + struct sk_buff *skb, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state) { /* Do not immediately delete the connection after the first successful reply to avoid excessive conntrackd traffic @@ -350,9 +349,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp = .l4proto = IPPROTO_ICMP, .pkt_to_tuple = icmp_pkt_to_tuple, .invert_tuple = icmp_invert_tuple, - .packet = icmp_packet, - .destroy = NULL, - .me = NULL, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .tuple_to_nlattr = icmp_tuple_to_nlattr, .nlattr_tuple_size = icmp_nlattr_tuple_size, diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c index a15eefb8e317..93f27d0bcb75 100644 --- a/net/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/netfilter/nf_conntrack_proto_icmpv6.c @@ -86,11 +86,10 @@ static unsigned int *icmpv6_get_timeouts(struct net *net) } /* Returns verdict for packet, or -1 for invalid. */ -static int icmpv6_packet(struct nf_conn *ct, - struct sk_buff *skb, - unsigned int dataoff, - enum ip_conntrack_info ctinfo, - const struct nf_hook_state *state) +int nf_conntrack_icmpv6_packet(struct nf_conn *ct, + struct sk_buff *skb, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state) { unsigned int *timeout = nf_ct_timeout_lookup(ct); static const u8 valid_new[] = { @@ -361,7 +360,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 = .l4proto = IPPROTO_ICMPV6, .pkt_to_tuple = icmpv6_pkt_to_tuple, .invert_tuple = icmpv6_invert_tuple, - .packet = icmpv6_packet, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .tuple_to_nlattr = icmpv6_tuple_to_nlattr, .nlattr_tuple_size = icmpv6_nlattr_tuple_size, diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index d53e3e78f605..952f2cc0fd74 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -357,11 +357,11 @@ out_invalid: } /* Returns verdict for packet, or -NF_ACCEPT for invalid. */ -static int sctp_packet(struct nf_conn *ct, - struct sk_buff *skb, - unsigned int dataoff, - enum ip_conntrack_info ctinfo, - const struct nf_hook_state *state) +int nf_conntrack_sctp_packet(struct nf_conn *ct, + struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state) { enum sctp_conntrack new_state, old_state; enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); @@ -759,9 +759,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp = { #ifdef CONFIG_NF_CONNTRACK_PROCFS .print_conntrack = sctp_print_conntrack, #endif - .packet = sctp_packet, .can_early_drop = sctp_can_early_drop, - .me = THIS_MODULE, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .nlattr_size = SCTP_NLATTR_SIZE, .to_nlattr = sctp_to_nlattr, diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 4dcbd51a8e97..32aac9a835d4 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -829,11 +829,11 @@ static noinline bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, } /* Returns verdict for packet, or -1 for invalid. */ -static int tcp_packet(struct nf_conn *ct, - struct sk_buff *skb, - unsigned int dataoff, - enum ip_conntrack_info ctinfo, - const struct nf_hook_state *state) +int nf_conntrack_tcp_packet(struct nf_conn *ct, + struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state) { struct net *net = nf_ct_net(ct); struct nf_tcp_net *tn = nf_tcp_pernet(net); @@ -1535,7 +1535,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp = #ifdef CONFIG_NF_CONNTRACK_PROCFS .print_conntrack = tcp_print_conntrack, #endif - .packet = tcp_packet, .can_early_drop = tcp_can_early_drop, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .to_nlattr = tcp_to_nlattr, diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index b4f5d5e82031..e1f409aa0006 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -85,11 +85,11 @@ static bool udp_error(struct sk_buff *skb, } /* Returns verdict for packet, and may modify conntracktype */ -static int udp_packet(struct nf_conn *ct, - struct sk_buff *skb, - unsigned int dataoff, - enum ip_conntrack_info ctinfo, - const struct nf_hook_state *state) +int nf_conntrack_udp_packet(struct nf_conn *ct, + struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state) { unsigned int *timeouts; @@ -177,11 +177,11 @@ static bool udplite_error(struct sk_buff *skb, } /* Returns verdict for packet, and may modify conntracktype */ -static int udplite_packet(struct nf_conn *ct, - struct sk_buff *skb, - unsigned int dataoff, - enum ip_conntrack_info ctinfo, - const struct nf_hook_state *state) +int nf_conntrack_udplite_packet(struct nf_conn *ct, + struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state) { unsigned int *timeouts; @@ -319,7 +319,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp = { .l4proto = IPPROTO_UDP, .allow_clash = true, - .packet = udp_packet, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, @@ -344,7 +343,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite = { .l4proto = IPPROTO_UDPLITE, .allow_clash = true, - .packet = udplite_packet, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, From e2e48b471634faa6958718c27208d366b13fe878 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 15 Jan 2019 22:03:33 +0100 Subject: [PATCH 08/33] netfilter: conntrack: handle icmp pkt_to_tuple helper via direct calls rather than handling them via indirect call, use a direct one instead. This leaves GRE as the last user of this indirect call facility. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 10 ++++++++++ net/netfilter/nf_conntrack_core.c | 6 ++++++ net/netfilter/nf_conntrack_proto_icmp.c | 5 ++--- net/netfilter/nf_conntrack_proto_icmpv6.c | 9 ++++----- 4 files changed, 22 insertions(+), 8 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 6ca4355ae3dc..46d554806eb3 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -90,6 +90,16 @@ struct nf_conntrack_l4proto { struct module *me; }; +bool icmp_pkt_to_tuple(const struct sk_buff *skb, + unsigned int dataoff, + struct net *net, + struct nf_conntrack_tuple *tuple); + +bool icmpv6_pkt_to_tuple(const struct sk_buff *skb, + unsigned int dataoff, + struct net *net, + struct nf_conntrack_tuple *tuple); + int nf_conntrack_icmpv4_error(struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 5dfd95ae510d..b3840d36c3a6 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -274,6 +274,12 @@ nf_ct_get_tuple(const struct sk_buff *skb, tuple->dst.protonum = protonum; tuple->dst.dir = IP_CT_DIR_ORIGINAL; + switch (protonum) { + case IPPROTO_ICMPV6: + return icmpv6_pkt_to_tuple(skb, dataoff, net, tuple); + case IPPROTO_ICMP: + return icmp_pkt_to_tuple(skb, dataoff, net, tuple); + } if (unlikely(l4proto->pkt_to_tuple)) return l4proto->pkt_to_tuple(skb, dataoff, net, tuple); diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c index 381a7f3ea68a..805c1fe5b837 100644 --- a/net/netfilter/nf_conntrack_proto_icmp.c +++ b/net/netfilter/nf_conntrack_proto_icmp.c @@ -25,8 +25,8 @@ static const unsigned int nf_ct_icmp_timeout = 30*HZ; -static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, - struct net *net, struct nf_conntrack_tuple *tuple) +bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, + struct net *net, struct nf_conntrack_tuple *tuple) { const struct icmphdr *hp; struct icmphdr _hdr; @@ -347,7 +347,6 @@ static struct nf_proto_net *icmp_get_net_proto(struct net *net) const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp = { .l4proto = IPPROTO_ICMP, - .pkt_to_tuple = icmp_pkt_to_tuple, .invert_tuple = icmp_invert_tuple, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .tuple_to_nlattr = icmp_tuple_to_nlattr, diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c index 93f27d0bcb75..20cd55e55e41 100644 --- a/net/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/netfilter/nf_conntrack_proto_icmpv6.c @@ -30,10 +30,10 @@ static const unsigned int nf_ct_icmpv6_timeout = 30*HZ; -static bool icmpv6_pkt_to_tuple(const struct sk_buff *skb, - unsigned int dataoff, - struct net *net, - struct nf_conntrack_tuple *tuple) +bool icmpv6_pkt_to_tuple(const struct sk_buff *skb, + unsigned int dataoff, + struct net *net, + struct nf_conntrack_tuple *tuple) { const struct icmp6hdr *hp; struct icmp6hdr _hdr; @@ -358,7 +358,6 @@ static struct nf_proto_net *icmpv6_get_net_proto(struct net *net) const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 = { .l4proto = IPPROTO_ICMPV6, - .pkt_to_tuple = icmpv6_pkt_to_tuple, .invert_tuple = icmpv6_invert_tuple, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .tuple_to_nlattr = icmpv6_tuple_to_nlattr, From 202e651cd43c69a43f75b445e90f55b59f9af0ad Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 15 Jan 2019 22:03:34 +0100 Subject: [PATCH 09/33] netfilter: conntrack: gre: convert rwlock to rcu We can use gre. Lock is only needed when a new expectation is added. In case a single spinlock proves to be problematic we can either add one per netns or use an array of locks combined with net_hash_mix() or similar to pick the 'correct' one. But given this is only needed for an expectation rather than per packet a single one should be ok. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- .../linux/netfilter/nf_conntrack_proto_gre.h | 1 + net/netfilter/nf_conntrack_proto_gre.c | 37 ++++++++----------- 2 files changed, 16 insertions(+), 22 deletions(-) diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h index 6989e2e4eabf..222c9d3d453f 100644 --- a/include/linux/netfilter/nf_conntrack_proto_gre.h +++ b/include/linux/netfilter/nf_conntrack_proto_gre.h @@ -19,6 +19,7 @@ struct nf_conn; struct nf_ct_gre_keymap { struct list_head list; struct nf_conntrack_tuple tuple; + struct rcu_head rcu; }; enum grep_conntrack { diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 8899b51aad44..34dd89485be2 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -49,6 +49,8 @@ static const unsigned int gre_timeouts[GRE_CT_MAX] = { }; static unsigned int proto_gre_net_id __read_mostly; +/* used when expectation is added */ +static DEFINE_SPINLOCK(keymap_lock); static inline struct netns_proto_gre *gre_pernet(struct net *net) { @@ -60,12 +62,12 @@ static void nf_ct_gre_keymap_flush(struct net *net) struct netns_proto_gre *net_gre = gre_pernet(net); struct nf_ct_gre_keymap *km, *tmp; - write_lock_bh(&net_gre->keymap_lock); + spin_lock_bh(&keymap_lock); list_for_each_entry_safe(km, tmp, &net_gre->keymap_list, list) { - list_del(&km->list); - kfree(km); + list_del_rcu(&km->list); + kfree_rcu(km, rcu); } - write_unlock_bh(&net_gre->keymap_lock); + spin_unlock_bh(&keymap_lock); } static inline int gre_key_cmpfn(const struct nf_ct_gre_keymap *km, @@ -85,14 +87,12 @@ static __be16 gre_keymap_lookup(struct net *net, struct nf_conntrack_tuple *t) struct nf_ct_gre_keymap *km; __be16 key = 0; - read_lock_bh(&net_gre->keymap_lock); - list_for_each_entry(km, &net_gre->keymap_list, list) { + list_for_each_entry_rcu(km, &net_gre->keymap_list, list) { if (gre_key_cmpfn(km, t)) { key = km->tuple.src.u.gre.key; break; } } - read_unlock_bh(&net_gre->keymap_lock); pr_debug("lookup src key 0x%x for ", key); nf_ct_dump_tuple(t); @@ -112,14 +112,10 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir, kmp = &ct_pptp_info->keymap[dir]; if (*kmp) { /* check whether it's a retransmission */ - read_lock_bh(&net_gre->keymap_lock); - list_for_each_entry(km, &net_gre->keymap_list, list) { - if (gre_key_cmpfn(km, t) && km == *kmp) { - read_unlock_bh(&net_gre->keymap_lock); + list_for_each_entry_rcu(km, &net_gre->keymap_list, list) { + if (gre_key_cmpfn(km, t) && km == *kmp) return 0; - } } - read_unlock_bh(&net_gre->keymap_lock); pr_debug("trying to override keymap_%s for ct %p\n", dir == IP_CT_DIR_REPLY ? "reply" : "orig", ct); return -EEXIST; @@ -134,9 +130,9 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir, pr_debug("adding new entry %p: ", km); nf_ct_dump_tuple(&km->tuple); - write_lock_bh(&net_gre->keymap_lock); + spin_lock_bh(&keymap_lock); list_add_tail(&km->list, &net_gre->keymap_list); - write_unlock_bh(&net_gre->keymap_lock); + spin_unlock_bh(&keymap_lock); return 0; } @@ -145,24 +141,22 @@ EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_add); /* destroy the keymap entries associated with specified master ct */ void nf_ct_gre_keymap_destroy(struct nf_conn *ct) { - struct net *net = nf_ct_net(ct); - struct netns_proto_gre *net_gre = gre_pernet(net); struct nf_ct_pptp_master *ct_pptp_info = nfct_help_data(ct); enum ip_conntrack_dir dir; pr_debug("entering for ct %p\n", ct); - write_lock_bh(&net_gre->keymap_lock); + spin_lock_bh(&keymap_lock); for (dir = IP_CT_DIR_ORIGINAL; dir < IP_CT_DIR_MAX; dir++) { if (ct_pptp_info->keymap[dir]) { pr_debug("removing %p from list\n", ct_pptp_info->keymap[dir]); - list_del(&ct_pptp_info->keymap[dir]->list); - kfree(ct_pptp_info->keymap[dir]); + list_del_rcu(&ct_pptp_info->keymap[dir]->list); + kfree_rcu(ct_pptp_info->keymap[dir], rcu); ct_pptp_info->keymap[dir] = NULL; } } - write_unlock_bh(&net_gre->keymap_lock); + spin_unlock_bh(&keymap_lock); } EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_destroy); @@ -365,7 +359,6 @@ static int gre_init_net(struct net *net) struct nf_proto_net *nf = &net_gre->nf; int i; - rwlock_init(&net_gre->keymap_lock); INIT_LIST_HEAD(&net_gre->keymap_list); for (i = 0; i < GRE_CT_MAX; i++) net_gre->gre_timeouts[i] = gre_timeouts[i]; From 22fc4c4c9fd60427bcda00878cee94e7622cfa7a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 15 Jan 2019 22:03:35 +0100 Subject: [PATCH 10/33] netfilter: conntrack: gre: switch module to be built-in This makes the last of the modular l4 trackers 'bool'. After this, all infrastructure to handle dynamic l4 protocol registration becomes obsolete and can be removed in followup patches. Old: 302824 net/netfilter/nf_conntrack.ko 21504 net/netfilter/nf_conntrack_proto_gre.ko New: 313728 net/netfilter/nf_conntrack.ko Old: text data bss dec hex filename 6281 1732 4 8017 1f51 nf_conntrack_proto_gre.ko 108356 20613 236 129205 1f8b5 nf_conntrack.ko New: 112095 21381 240 133716 20a54 nf_conntrack.ko The size increase is only temporary. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- .../linux/netfilter/nf_conntrack_proto_gre.h | 14 +-- .../net/netfilter/ipv4/nf_conntrack_ipv4.h | 3 + include/net/netfilter/nf_conntrack_l4proto.h | 7 ++ include/net/netns/conntrack.h | 17 ++++ net/netfilter/Kconfig | 2 +- net/netfilter/Makefile | 3 +- net/netfilter/nf_conntrack_proto.c | 7 +- net/netfilter/nf_conntrack_proto_gre.c | 93 ++++--------------- net/netfilter/nfnetlink_cttimeout.c | 7 +- 9 files changed, 55 insertions(+), 98 deletions(-) diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h index 222c9d3d453f..59714e9ee4ef 100644 --- a/include/linux/netfilter/nf_conntrack_proto_gre.h +++ b/include/linux/netfilter/nf_conntrack_proto_gre.h @@ -22,23 +22,11 @@ struct nf_ct_gre_keymap { struct rcu_head rcu; }; -enum grep_conntrack { - GRE_CT_UNREPLIED, - GRE_CT_REPLIED, - GRE_CT_MAX -}; - -struct netns_proto_gre { - struct nf_proto_net nf; - rwlock_t keymap_lock; - struct list_head keymap_list; - unsigned int gre_timeouts[GRE_CT_MAX]; -}; - /* add new tuple->key_reply pair to keymap */ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir, struct nf_conntrack_tuple *t); +void nf_ct_gre_keymap_flush(struct net *net); /* delete keymap entries */ void nf_ct_gre_keymap_destroy(struct nf_conn *ct); diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h index 135ee702c7b0..2c8c2b023848 100644 --- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h +++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h @@ -22,5 +22,8 @@ extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp; #ifdef CONFIG_NF_CT_PROTO_UDPLITE extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite; #endif +#ifdef CONFIG_NF_CT_PROTO_GRE +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre; +#endif #endif /*_NF_CONNTRACK_IPV4_H*/ diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 46d554806eb3..fded3f164dcc 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -239,4 +239,11 @@ static inline struct nf_sctp_net *nf_sctp_pernet(struct net *net) } #endif +#ifdef CONFIG_NF_CT_PROTO_GRE +static inline struct nf_gre_net *nf_gre_pernet(struct net *net) +{ + return &net->ct.nf_ct_proto.gre; +} +#endif + #endif /*_NF_CONNTRACK_PROTOCOL_H*/ diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 51cba0b8adf5..c72f413a2d4d 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -70,6 +70,20 @@ struct nf_sctp_net { }; #endif +#ifdef CONFIG_NF_CT_PROTO_GRE +enum gre_conntrack { + GRE_CT_UNREPLIED, + GRE_CT_REPLIED, + GRE_CT_MAX +}; + +struct nf_gre_net { + struct nf_proto_net nf; + struct list_head keymap_list; + unsigned int timeouts[GRE_CT_MAX]; +}; +#endif + struct nf_ip_net { struct nf_generic_net generic; struct nf_tcp_net tcp; @@ -82,6 +96,9 @@ struct nf_ip_net { #ifdef CONFIG_NF_CT_PROTO_SCTP struct nf_sctp_net sctp; #endif +#ifdef CONFIG_NF_CT_PROTO_GRE + struct nf_gre_net gre; +#endif }; struct ct_pcpu { diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index beb3a69ce1d4..fefd63a243f2 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -174,7 +174,7 @@ config NF_CT_PROTO_DCCP If unsure, say Y. config NF_CT_PROTO_GRE - tristate + bool config NF_CT_PROTO_SCTP bool 'SCTP protocol connection tracking support' diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 1ae65a314d7a..e66067befa42 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -13,6 +13,7 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o +nf_conntrack-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o obj-$(CONFIG_NETFILTER) = netfilter.o @@ -25,8 +26,6 @@ obj-$(CONFIG_NETFILTER_NETLINK_OSF) += nfnetlink_osf.o # connection tracking obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o -obj-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o - # netlink interface for nf_conntrack obj-$(CONFIG_NF_CT_NETLINK) += nf_conntrack_netlink.o obj-$(CONFIG_NF_CT_NETLINK_TIMEOUT) += nfnetlink_cttimeout.o diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 2bbc32d939e4..e113bb2dc88d 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -817,6 +817,9 @@ static const struct nf_conntrack_l4proto * const builtin_l4proto[] = { #ifdef CONFIG_NF_CT_PROTO_UDPLITE &nf_conntrack_l4proto_udplite, #endif +#ifdef CONFIG_NF_CT_PROTO_GRE + &nf_conntrack_l4proto_gre, +#endif #if IS_ENABLED(CONFIG_IPV6) &nf_conntrack_l4proto_icmpv6, #endif /* CONFIG_IPV6 */ @@ -897,9 +900,11 @@ void nf_conntrack_proto_pernet_fini(struct net *net) ARRAY_SIZE(builtin_l4proto)); pn->users--; nf_ct_l4proto_unregister_sysctl(pn); +#ifdef CONFIG_NF_CT_PROTO_GRE + nf_ct_gre_keymap_flush(net); +#endif } - module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint, &nf_conntrack_htable_size, 0600); diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 34dd89485be2..68f9bfb79c4e 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -48,18 +48,17 @@ static const unsigned int gre_timeouts[GRE_CT_MAX] = { [GRE_CT_REPLIED] = 180*HZ, }; -static unsigned int proto_gre_net_id __read_mostly; /* used when expectation is added */ static DEFINE_SPINLOCK(keymap_lock); -static inline struct netns_proto_gre *gre_pernet(struct net *net) +static inline struct nf_gre_net *gre_pernet(struct net *net) { - return net_generic(net, proto_gre_net_id); + return &net->ct.nf_ct_proto.gre; } -static void nf_ct_gre_keymap_flush(struct net *net) +void nf_ct_gre_keymap_flush(struct net *net) { - struct netns_proto_gre *net_gre = gre_pernet(net); + struct nf_gre_net *net_gre = gre_pernet(net); struct nf_ct_gre_keymap *km, *tmp; spin_lock_bh(&keymap_lock); @@ -83,7 +82,7 @@ static inline int gre_key_cmpfn(const struct nf_ct_gre_keymap *km, /* look up the source key for a given tuple */ static __be16 gre_keymap_lookup(struct net *net, struct nf_conntrack_tuple *t) { - struct netns_proto_gre *net_gre = gre_pernet(net); + struct nf_gre_net *net_gre = gre_pernet(net); struct nf_ct_gre_keymap *km; __be16 key = 0; @@ -105,7 +104,7 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir, struct nf_conntrack_tuple *t) { struct net *net = nf_ct_net(ct); - struct netns_proto_gre *net_gre = gre_pernet(net); + struct nf_gre_net *net_gre = gre_pernet(net); struct nf_ct_pptp_master *ct_pptp_info = nfct_help_data(ct); struct nf_ct_gre_keymap **kmp, *km; @@ -210,7 +209,7 @@ static void gre_print_conntrack(struct seq_file *s, struct nf_conn *ct) static unsigned int *gre_get_timeouts(struct net *net) { - return gre_pernet(net)->gre_timeouts; + return gre_pernet(net)->timeouts; } /* Returns verdict for packet, and may modify conntrack */ @@ -272,13 +271,13 @@ static int gre_timeout_nlattr_to_obj(struct nlattr *tb[], struct net *net, void *data) { unsigned int *timeouts = data; - struct netns_proto_gre *net_gre = gre_pernet(net); + struct nf_gre_net *net_gre = gre_pernet(net); if (!timeouts) timeouts = gre_get_timeouts(net); /* set default timeouts for GRE. */ - timeouts[GRE_CT_UNREPLIED] = net_gre->gre_timeouts[GRE_CT_UNREPLIED]; - timeouts[GRE_CT_REPLIED] = net_gre->gre_timeouts[GRE_CT_REPLIED]; + timeouts[GRE_CT_UNREPLIED] = net_gre->timeouts[GRE_CT_UNREPLIED]; + timeouts[GRE_CT_REPLIED] = net_gre->timeouts[GRE_CT_REPLIED]; if (tb[CTA_TIMEOUT_GRE_UNREPLIED]) { timeouts[GRE_CT_UNREPLIED] = @@ -332,10 +331,11 @@ static struct ctl_table gre_sysctl_table[] = { }; #endif -static int gre_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *nf, - struct netns_proto_gre *net_gre) +static int gre_kmemdup_sysctl_table(struct net *net) { #ifdef CONFIG_SYSCTL + struct nf_gre_net *net_gre = gre_pernet(net); + struct nf_proto_net *nf = &net_gre->nf; int i; if (nf->ctl_table) @@ -348,26 +348,25 @@ static int gre_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *nf, return -ENOMEM; for (i = 0; i < GRE_CT_MAX; i++) - nf->ctl_table[i].data = &net_gre->gre_timeouts[i]; + nf->ctl_table[i].data = &net_gre->timeouts[i]; #endif return 0; } static int gre_init_net(struct net *net) { - struct netns_proto_gre *net_gre = gre_pernet(net); - struct nf_proto_net *nf = &net_gre->nf; + struct nf_gre_net *net_gre = gre_pernet(net); int i; INIT_LIST_HEAD(&net_gre->keymap_list); for (i = 0; i < GRE_CT_MAX; i++) - net_gre->gre_timeouts[i] = gre_timeouts[i]; + net_gre->timeouts[i] = gre_timeouts[i]; - return gre_kmemdup_sysctl_table(net, nf, net_gre); + return gre_kmemdup_sysctl_table(net); } /* protocol helper struct */ -static const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 = { +const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre = { .l4proto = IPPROTO_GRE, .pkt_to_tuple = gre_pkt_to_tuple, #ifdef CONFIG_NF_CONNTRACK_PROCFS @@ -391,61 +390,5 @@ static const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 = { .nla_policy = gre_timeout_nla_policy, }, #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ - .net_id = &proto_gre_net_id, .init_net = gre_init_net, }; - -static int proto_gre_net_init(struct net *net) -{ - int ret = 0; - - ret = nf_ct_l4proto_pernet_register_one(net, - &nf_conntrack_l4proto_gre4); - if (ret < 0) - pr_err("nf_conntrack_gre4: pernet registration failed.\n"); - return ret; -} - -static void proto_gre_net_exit(struct net *net) -{ - nf_ct_l4proto_pernet_unregister_one(net, &nf_conntrack_l4proto_gre4); - nf_ct_gre_keymap_flush(net); -} - -static struct pernet_operations proto_gre_net_ops = { - .init = proto_gre_net_init, - .exit = proto_gre_net_exit, - .id = &proto_gre_net_id, - .size = sizeof(struct netns_proto_gre), -}; - -static int __init nf_ct_proto_gre_init(void) -{ - int ret; - - BUILD_BUG_ON(offsetof(struct netns_proto_gre, nf) != 0); - - ret = register_pernet_subsys(&proto_gre_net_ops); - if (ret < 0) - goto out_pernet; - ret = nf_ct_l4proto_register_one(&nf_conntrack_l4proto_gre4); - if (ret < 0) - goto out_gre4; - - return 0; -out_gre4: - unregister_pernet_subsys(&proto_gre_net_ops); -out_pernet: - return ret; -} - -static void __exit nf_ct_proto_gre_fini(void) -{ - nf_ct_l4proto_unregister_one(&nf_conntrack_l4proto_gre4); - unregister_pernet_subsys(&proto_gre_net_ops); -} - -module_init(nf_ct_proto_gre_init); -module_exit(nf_ct_proto_gre_fini); - -MODULE_LICENSE("GPL"); diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 109b0d27345a..0e3e1a018206 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -474,12 +474,7 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl, break; case IPPROTO_GRE: #ifdef CONFIG_NF_CT_PROTO_GRE - if (l4proto->net_id) { - struct netns_proto_gre *net_gre; - - net_gre = net_generic(net, *l4proto->net_id); - timeouts = net_gre->gre_timeouts; - } + timeouts = nf_gre_pernet(net)->timeouts; #endif break; case 255: From 751fc301ecbd0fc08d4d6c388f170e2081df26e6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 15 Jan 2019 22:03:36 +0100 Subject: [PATCH 11/33] netfilter: conntrack: remove net_id No users anymore. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 2 +- net/netfilter/nf_conntrack_proto.c | 8 ++------ 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index fded3f164dcc..3585f8666fc0 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -79,7 +79,7 @@ struct nf_conntrack_l4proto { /* Print out the private part of the conntrack. */ void (*print_conntrack)(struct seq_file *s, struct nf_conn *); #endif - unsigned int *net_id; + /* Init l4proto pernet data */ int (*init_net)(struct net *net); diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index e113bb2dc88d..47b9294a86f7 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -163,13 +163,9 @@ static int kill_l4proto(struct nf_conn *i, void *data) static struct nf_proto_net *nf_ct_l4proto_net(struct net *net, const struct nf_conntrack_l4proto *l4proto) { - if (l4proto->get_net_proto) { - /* statically built-in protocols use static per-net */ + if (l4proto->get_net_proto) return l4proto->get_net_proto(net); - } else if (l4proto->net_id) { - /* ... and loadable protocols use dynamic per-net */ - return net_generic(net, *l4proto->net_id); - } + return NULL; } From df5e1629087a45ca915fa0f69ea662175261855e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 15 Jan 2019 22:03:37 +0100 Subject: [PATCH 12/33] netfilter: conntrack: remove pkt_to_tuple callback GRE is now builtin, so we can handle it via direct call and remove the callback. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_proto_gre.h | 2 ++ include/net/netfilter/nf_conntrack_l4proto.h | 5 ----- net/netfilter/nf_conntrack_core.c | 6 ++++-- net/netfilter/nf_conntrack_proto_generic.c | 11 ----------- net/netfilter/nf_conntrack_proto_gre.c | 5 ++--- 5 files changed, 8 insertions(+), 21 deletions(-) diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h index 59714e9ee4ef..25f9a770fb84 100644 --- a/include/linux/netfilter/nf_conntrack_proto_gre.h +++ b/include/linux/netfilter/nf_conntrack_proto_gre.h @@ -30,5 +30,7 @@ void nf_ct_gre_keymap_flush(struct net *net); /* delete keymap entries */ void nf_ct_gre_keymap_destroy(struct nf_conn *ct); +bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, + struct net *net, struct nf_conntrack_tuple *tuple); #endif /* __KERNEL__ */ #endif /* _CONNTRACK_PROTO_GRE_H */ diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 3585f8666fc0..0d4b0398aeb9 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -27,11 +27,6 @@ struct nf_conntrack_l4proto { /* protoinfo nlattr size, closes a hole */ u16 nlattr_size; - /* Try to fill in the third arg: dataoff is offset past network protocol - hdr. Return true if possible. */ - bool (*pkt_to_tuple)(const struct sk_buff *skb, unsigned int dataoff, - struct net *net, struct nf_conntrack_tuple *tuple); - /* Invert the per-proto part of the tuple: ie. turn xmit into reply. * Only used by icmp, most protocols use a generic version. */ diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index b3840d36c3a6..b71e271f2b44 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -279,9 +279,11 @@ nf_ct_get_tuple(const struct sk_buff *skb, return icmpv6_pkt_to_tuple(skb, dataoff, net, tuple); case IPPROTO_ICMP: return icmp_pkt_to_tuple(skb, dataoff, net, tuple); +#ifdef CONFIG_NF_CT_PROTO_GRE + case IPPROTO_GRE: + return gre_pkt_to_tuple(skb, dataoff, net, tuple); +#endif } - if (unlikely(l4proto->pkt_to_tuple)) - return l4proto->pkt_to_tuple(skb, dataoff, net, tuple); /* Actually only need first 4 bytes to get ports. */ inet_hdr = skb_header_pointer(skb, dataoff, sizeof(_inet_hdr), &_inet_hdr); diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index 5da19d5fbc76..5a5bf7cb6508 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -27,16 +27,6 @@ static bool nf_generic_should_process(u8 proto) } } -static bool generic_pkt_to_tuple(const struct sk_buff *skb, - unsigned int dataoff, - struct net *net, struct nf_conntrack_tuple *tuple) -{ - tuple->src.u.all = 0; - tuple->dst.u.all = 0; - - return true; -} - /* Returns verdict for packet, or -1 for invalid. */ static int generic_packet(struct nf_conn *ct, struct sk_buff *skb, @@ -149,7 +139,6 @@ static struct nf_proto_net *generic_get_net_proto(struct net *net) const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic = { .l4proto = 255, - .pkt_to_tuple = generic_pkt_to_tuple, .packet = generic_packet, #ifdef CONFIG_NF_CONNTRACK_TIMEOUT .ctnl_timeout = { diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 68f9bfb79c4e..04bc982b274d 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -162,8 +162,8 @@ EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_destroy); /* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */ /* gre hdr info to tuple */ -static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, - struct net *net, struct nf_conntrack_tuple *tuple) +bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, + struct net *net, struct nf_conntrack_tuple *tuple) { const struct pptp_gre_header *pgrehdr; struct pptp_gre_header _pgrehdr; @@ -368,7 +368,6 @@ static int gre_init_net(struct net *net) /* protocol helper struct */ const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre = { .l4proto = IPPROTO_GRE, - .pkt_to_tuple = gre_pkt_to_tuple, #ifdef CONFIG_NF_CONNTRACK_PROCFS .print_conntrack = gre_print_conntrack, #endif From 197c4300aec09b7cd2df124cea92f9f5a82efe42 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 15 Jan 2019 22:03:38 +0100 Subject: [PATCH 13/33] netfilter: conntrack: remove invert_tuple callback Only used by icmp(v6). Prefer a direct call and remove this function from the l4proto struct. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 11 +++++------ net/netfilter/nf_conntrack_core.c | 8 ++++++-- net/netfilter/nf_conntrack_proto_icmp.c | 5 ++--- net/netfilter/nf_conntrack_proto_icmpv6.c | 5 ++--- 4 files changed, 15 insertions(+), 14 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 0d4b0398aeb9..6cec8337e684 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -27,12 +27,6 @@ struct nf_conntrack_l4proto { /* protoinfo nlattr size, closes a hole */ u16 nlattr_size; - /* Invert the per-proto part of the tuple: ie. turn xmit into reply. - * Only used by icmp, most protocols use a generic version. - */ - bool (*invert_tuple)(struct nf_conntrack_tuple *inverse, - const struct nf_conntrack_tuple *orig); - /* Returns verdict for packet, or -1 for invalid. */ int (*packet)(struct nf_conn *ct, struct sk_buff *skb, @@ -95,6 +89,11 @@ bool icmpv6_pkt_to_tuple(const struct sk_buff *skb, struct net *net, struct nf_conntrack_tuple *tuple); +bool nf_conntrack_invert_icmp_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple *orig); +bool nf_conntrack_invert_icmpv6_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple *orig); + int nf_conntrack_icmpv4_error(struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index b71e271f2b44..d56cb0fc82b6 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -423,8 +423,12 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, inverse->dst.protonum = orig->dst.protonum; - if (unlikely(l4proto->invert_tuple)) - return l4proto->invert_tuple(inverse, orig); + switch (orig->dst.protonum) { + case IPPROTO_ICMP: + return nf_conntrack_invert_icmp_tuple(inverse, orig); + case IPPROTO_ICMPV6: + return nf_conntrack_invert_icmpv6_tuple(inverse, orig); + } inverse->src.u.all = orig->dst.u.all; inverse->dst.u.all = orig->src.u.all; diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c index 805c1fe5b837..d28c1d7633b2 100644 --- a/net/netfilter/nf_conntrack_proto_icmp.c +++ b/net/netfilter/nf_conntrack_proto_icmp.c @@ -54,8 +54,8 @@ static const u_int8_t invmap[] = { [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1 }; -static bool icmp_invert_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_tuple *orig) +bool nf_conntrack_invert_icmp_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple *orig) { if (orig->dst.u.icmp.type >= sizeof(invmap) || !invmap[orig->dst.u.icmp.type]) @@ -347,7 +347,6 @@ static struct nf_proto_net *icmp_get_net_proto(struct net *net) const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp = { .l4proto = IPPROTO_ICMP, - .invert_tuple = icmp_invert_tuple, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .tuple_to_nlattr = icmp_tuple_to_nlattr, .nlattr_tuple_size = icmp_nlattr_tuple_size, diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c index 20cd55e55e41..2910dcdea134 100644 --- a/net/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/netfilter/nf_conntrack_proto_icmpv6.c @@ -67,8 +67,8 @@ static const u_int8_t noct_valid_new[] = { [ICMPV6_MLD2_REPORT - 130] = 1 }; -static bool icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_tuple *orig) +bool nf_conntrack_invert_icmpv6_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple *orig) { int type = orig->dst.u.icmp.type - 128; if (type < 0 || type >= sizeof(invmap) || !invmap[type]) @@ -358,7 +358,6 @@ static struct nf_proto_net *icmpv6_get_net_proto(struct net *net) const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 = { .l4proto = IPPROTO_ICMPV6, - .invert_tuple = icmpv6_invert_tuple, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .tuple_to_nlattr = icmpv6_tuple_to_nlattr, .nlattr_tuple_size = icmpv6_nlattr_tuple_size, From b184356d0a003ac5e82b4adf37f2d334df1d213c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 15 Jan 2019 22:03:39 +0100 Subject: [PATCH 14/33] netfilter: conntrack: remove module owner field No need to get/put module owner reference, none of these can be removed anymore. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 4 ---- net/netfilter/nf_conntrack_proto.c | 8 -------- net/netfilter/nf_conntrack_proto_gre.c | 1 - net/netfilter/nfnetlink_cttimeout.c | 6 ------ net/netfilter/nft_ct.c | 2 -- 5 files changed, 21 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 6cec8337e684..fd8954429e87 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -74,9 +74,6 @@ struct nf_conntrack_l4proto { /* Return the per-net protocol part. */ struct nf_proto_net *(*get_net_proto)(struct net *net); - - /* Module (if any) which this is connected to. */ - struct module *me; }; bool icmp_pkt_to_tuple(const struct sk_buff *skb, @@ -148,7 +145,6 @@ extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic; const struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u8 l4proto); const struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u8 l4proto); -void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p); /* Protocol pernet registration. */ int nf_ct_l4proto_pernet_register_one(struct net *net, diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 47b9294a86f7..3455f993cc68 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -139,20 +139,12 @@ const struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u8 l4num) rcu_read_lock(); p = __nf_ct_l4proto_find(l4num); - if (!try_module_get(p->me)) - p = &nf_conntrack_l4proto_generic; rcu_read_unlock(); return p; } EXPORT_SYMBOL_GPL(nf_ct_l4proto_find_get); -void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p) -{ - module_put(p->me); -} -EXPORT_SYMBOL_GPL(nf_ct_l4proto_put); - static int kill_l4proto(struct nf_conn *i, void *data) { const struct nf_conntrack_l4proto *l4proto; diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 04bc982b274d..0c26cfd5d29f 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -373,7 +373,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre = { #endif .packet = gre_packet, .destroy = gre_destroy, - .me = THIS_MODULE, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 0e3e1a018206..37b4f84ac153 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -152,7 +152,6 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl, err: kfree(timeout); err_proto_put: - nf_ct_l4proto_put(l4proto); return ret; } @@ -302,7 +301,6 @@ static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout) if (refcount_dec_if_one(&timeout->refcnt)) { /* We are protected by nfnl mutex. */ list_del_rcu(&timeout->head); - nf_ct_l4proto_put(timeout->timeout.l4proto); nf_ct_untimeout(net, &timeout->timeout); kfree_rcu(timeout, rcu_head); } else { @@ -372,10 +370,8 @@ static int cttimeout_default_set(struct net *net, struct sock *ctnl, if (ret < 0) goto err; - nf_ct_l4proto_put(l4proto); return 0; err: - nf_ct_l4proto_put(l4proto); return ret; } @@ -511,7 +507,6 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl, /* this avoids a loop in nfnetlink. */ return ret == -EAGAIN ? -ENOBUFS : ret; err: - nf_ct_l4proto_put(l4proto); return err; } @@ -592,7 +587,6 @@ static void __net_exit cttimeout_net_exit(struct net *net) list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list, head) { list_del_rcu(&cur->head); - nf_ct_l4proto_put(cur->timeout.l4proto); if (refcount_dec_and_test(&cur->refcnt)) kfree_rcu(cur, rcu_head); diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 586627c361df..3249cc059048 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -902,7 +902,6 @@ static int nft_ct_timeout_obj_init(const struct nft_ctx *ctx, err_free_timeout: kfree(timeout); err_proto_put: - nf_ct_l4proto_put(l4proto); return ret; } @@ -913,7 +912,6 @@ static void nft_ct_timeout_obj_destroy(const struct nft_ctx *ctx, struct nf_ct_timeout *timeout = priv->timeout; nf_ct_untimeout(ctx->net, timeout); - nf_ct_l4proto_put(timeout->l4proto); nf_ct_netns_put(ctx->net, ctx->family); kfree(priv->timeout); } From 44fb87f6351d04fafae2b17df5f46f1ce22c6f93 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 15 Jan 2019 22:03:40 +0100 Subject: [PATCH 15/33] netfilter: conntrack: remove remaining l4proto indirect packet calls Now that all l4trackers are builtin, no need to use a mix of direct and indirect calls. This removes the last two users: gre and the generic l4 protocol tracker. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 12 +++---- net/netfilter/nf_conntrack_core.c | 26 +++++++++++---- net/netfilter/nf_conntrack_proto_generic.c | 35 -------------------- net/netfilter/nf_conntrack_proto_gre.c | 11 +++--- 4 files changed, 29 insertions(+), 55 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index fd8954429e87..dda028996559 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -27,13 +27,6 @@ struct nf_conntrack_l4proto { /* protoinfo nlattr size, closes a hole */ u16 nlattr_size; - /* Returns verdict for packet, or -1 for invalid. */ - int (*packet)(struct nf_conn *ct, - struct sk_buff *skb, - unsigned int dataoff, - enum ip_conntrack_info ctinfo, - const struct nf_hook_state *state); - /* Called when a conntrack entry is destroyed */ void (*destroy)(struct nf_conn *ct); @@ -136,6 +129,11 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct, unsigned int dataoff, enum ip_conntrack_info ctinfo, const struct nf_hook_state *state); +int nf_conntrack_gre_packet(struct nf_conn *ct, + struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state); /* Existing built-in generic protocol */ extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index d56cb0fc82b6..264074f04615 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1534,6 +1534,18 @@ nf_conntrack_handle_icmp(struct nf_conn *tmpl, return ret; } +static int generic_packet(struct nf_conn *ct, struct sk_buff *skb, + enum ip_conntrack_info ctinfo) +{ + const unsigned int *timeout = nf_ct_timeout_lookup(ct); + + if (!timeout) + timeout = &nf_generic_pernet(nf_ct_net(ct))->timeout; + + nf_ct_refresh_acct(ct, ctinfo, skb, *timeout); + return NF_ACCEPT; +} + /* Returns verdict for packet, or -1 for invalid. */ static int nf_conntrack_handle_packet(struct nf_conn *ct, struct sk_buff *skb, @@ -1566,11 +1578,15 @@ static int nf_conntrack_handle_packet(struct nf_conn *ct, case IPPROTO_DCCP: return nf_conntrack_dccp_packet(ct, skb, dataoff, ctinfo, state); +#endif +#ifdef CONFIG_NF_CT_PROTO_GRE + case IPPROTO_GRE: + return nf_conntrack_gre_packet(ct, skb, dataoff, + ctinfo, state); #endif } - WARN_ON_ONCE(1); - return -NF_ACCEPT; + return generic_packet(ct, skb, ctinfo); } unsigned int @@ -1634,11 +1650,7 @@ repeat: goto out; } - if (l4proto->packet) - ret = l4proto->packet(ct, skb, dataoff, ctinfo, state); - else - ret = nf_conntrack_handle_packet(ct, skb, dataoff, ctinfo, state); - + ret = nf_conntrack_handle_packet(ct, skb, dataoff, ctinfo, state); if (ret <= 0) { /* Invalid: inverse of the return code tells * the netfilter core what to do */ diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index 5a5bf7cb6508..78358d192c60 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -15,40 +15,6 @@ static const unsigned int nf_ct_generic_timeout = 600*HZ; -static bool nf_generic_should_process(u8 proto) -{ - switch (proto) { -#ifdef CONFIG_NF_CT_PROTO_GRE_MODULE - case IPPROTO_GRE: - return false; -#endif - default: - return true; - } -} - -/* Returns verdict for packet, or -1 for invalid. */ -static int generic_packet(struct nf_conn *ct, - struct sk_buff *skb, - unsigned int dataoff, - enum ip_conntrack_info ctinfo, - const struct nf_hook_state *state) -{ - const unsigned int *timeout = nf_ct_timeout_lookup(ct); - - if (!nf_generic_should_process(nf_ct_protonum(ct))) { - pr_warn_once("conntrack: generic helper won't handle protocol %d. Please consider loading the specific helper module.\n", - nf_ct_protonum(ct)); - return -NF_ACCEPT; - } - - if (!timeout) - timeout = &nf_generic_pernet(nf_ct_net(ct))->timeout; - - nf_ct_refresh_acct(ct, ctinfo, skb, *timeout); - return NF_ACCEPT; -} - #ifdef CONFIG_NF_CONNTRACK_TIMEOUT #include @@ -139,7 +105,6 @@ static struct nf_proto_net *generic_get_net_proto(struct net *net) const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic = { .l4proto = 255, - .packet = generic_packet, #ifdef CONFIG_NF_CONNTRACK_TIMEOUT .ctnl_timeout = { .nlattr_to_obj = generic_timeout_nlattr_to_obj, diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 0c26cfd5d29f..f6391991bcf6 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -213,11 +213,11 @@ static unsigned int *gre_get_timeouts(struct net *net) } /* Returns verdict for packet, and may modify conntrack */ -static int gre_packet(struct nf_conn *ct, - struct sk_buff *skb, - unsigned int dataoff, - enum ip_conntrack_info ctinfo, - const struct nf_hook_state *state) +int nf_conntrack_gre_packet(struct nf_conn *ct, + struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state) { if (state->pf != NFPROTO_IPV4) return -NF_ACCEPT; @@ -371,7 +371,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre = { #ifdef CONFIG_NF_CONNTRACK_PROCFS .print_conntrack = gre_print_conntrack, #endif - .packet = gre_packet, .destroy = gre_destroy, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, From edf0338dabf0f330873f7a9f84ea9f9ac431c0a8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 15 Jan 2019 22:03:41 +0100 Subject: [PATCH 16/33] netfilter: conntrack: remove pernet l4 proto register interface No used anymore. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 12 --------- net/netfilter/nf_conntrack_proto.c | 28 +++++++++----------- 2 files changed, 12 insertions(+), 28 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index dda028996559..87d9c198c05e 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -144,18 +144,6 @@ const struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u8 l4proto); const struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u8 l4proto); -/* Protocol pernet registration. */ -int nf_ct_l4proto_pernet_register_one(struct net *net, - const struct nf_conntrack_l4proto *proto); -void nf_ct_l4proto_pernet_unregister_one(struct net *net, - const struct nf_conntrack_l4proto *proto); -int nf_ct_l4proto_pernet_register(struct net *net, - const struct nf_conntrack_l4proto *const proto[], - unsigned int num_proto); -void nf_ct_l4proto_pernet_unregister(struct net *net, - const struct nf_conntrack_l4proto *const proto[], - unsigned int num_proto); - /* Protocol global registration. */ int nf_ct_l4proto_register_one(const struct nf_conntrack_l4proto *proto); void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *proto); diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 3455f993cc68..13b782cc6a7a 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -221,8 +221,8 @@ out_unlock: } EXPORT_SYMBOL_GPL(nf_ct_l4proto_register_one); -int nf_ct_l4proto_pernet_register_one(struct net *net, - const struct nf_conntrack_l4proto *l4proto) +static int nf_ct_l4proto_pernet_register_one(struct net *net, + const struct nf_conntrack_l4proto *l4proto) { int ret = 0; struct nf_proto_net *pn = NULL; @@ -245,7 +245,6 @@ int nf_ct_l4proto_pernet_register_one(struct net *net, out: return ret; } -EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register_one); static void __nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto) @@ -272,7 +271,7 @@ void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto) } EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister_one); -void nf_ct_l4proto_pernet_unregister_one(struct net *net, +static void nf_ct_l4proto_pernet_unregister_one(struct net *net, const struct nf_conntrack_l4proto *l4proto) { struct nf_proto_net *pn = nf_ct_l4proto_net(net, l4proto); @@ -283,7 +282,6 @@ void nf_ct_l4proto_pernet_unregister_one(struct net *net, pn->users--; nf_ct_l4proto_unregister_sysctl(pn); } -EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister_one); static void nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const l4proto[], @@ -322,7 +320,15 @@ nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const l4proto[], return ret; } -int nf_ct_l4proto_pernet_register(struct net *net, +static void nf_ct_l4proto_pernet_unregister(struct net *net, + const struct nf_conntrack_l4proto *const l4proto[], + unsigned int num_proto) +{ + while (num_proto-- != 0) + nf_ct_l4proto_pernet_unregister_one(net, l4proto[num_proto]); +} + +static int nf_ct_l4proto_pernet_register(struct net *net, const struct nf_conntrack_l4proto *const l4proto[], unsigned int num_proto) { @@ -341,16 +347,6 @@ int nf_ct_l4proto_pernet_register(struct net *net, } return ret; } -EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register); - -void nf_ct_l4proto_pernet_unregister(struct net *net, - const struct nf_conntrack_l4proto *const l4proto[], - unsigned int num_proto) -{ - while (num_proto-- != 0) - nf_ct_l4proto_pernet_unregister_one(net, l4proto[num_proto]); -} -EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister); static unsigned int nf_confirm(struct sk_buff *skb, unsigned int protoff, From 303e0c5589592e4f623bfcaf4292a1ed816328ad Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 15 Jan 2019 22:03:42 +0100 Subject: [PATCH 17/33] netfilter: conntrack: avoid unneeded nf_conntrack_l4proto lookups after removal of the packet and invert function pointers, several places do not need to lookup the l4proto structure anymore. Remove those lookups. The function nf_ct_invert_tuplepr becomes redundant, replace it with nf_ct_invert_tuple everywhere. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 2 - include/net/netfilter/nf_conntrack_core.h | 3 +- net/ipv4/netfilter/nf_nat_l3proto_ipv4.c | 2 +- net/ipv6/netfilter/nf_nat_l3proto_ipv6.c | 2 +- net/netfilter/nf_conntrack_core.c | 53 ++++------------------- net/netfilter/nf_conntrack_pptp.c | 2 +- net/netfilter/nf_conntrack_proto_icmp.c | 6 +-- net/netfilter/nf_conntrack_proto_icmpv6.c | 6 +-- net/netfilter/nf_nat_core.c | 12 ++--- net/openvswitch/conntrack.c | 2 +- 10 files changed, 22 insertions(+), 68 deletions(-) diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 249d0a5b12b8..b5aac5ae5129 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -187,8 +187,6 @@ bool nf_ct_delete(struct nf_conn *ct, u32 pid, int report); bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff, u_int16_t l3num, struct net *net, struct nf_conntrack_tuple *tuple); -bool nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse, - const struct nf_conntrack_tuple *orig); void __nf_ct_refresh_acct(struct nf_conn *ct, enum ip_conntrack_info ctinfo, const struct sk_buff *skb, diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index afc9b3620473..235c182022b2 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -39,8 +39,7 @@ void nf_conntrack_init_end(void); void nf_conntrack_cleanup_end(void); bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, - const struct nf_conntrack_tuple *orig, - const struct nf_conntrack_l4proto *l4proto); + const struct nf_conntrack_tuple *orig); /* Find a connection corresponding to a tuple. */ struct nf_conntrack_tuple_hash * diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c index 2687db015b6f..e26165af45cb 100644 --- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c @@ -214,7 +214,7 @@ int nf_nat_icmp_reply_translation(struct sk_buff *skb, } /* Change outer to look like the reply to an incoming packet */ - nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); + nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple); if (!nf_nat_ipv4_manip_pkt(skb, 0, &target, manip)) return 0; diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c index 23022447eb49..9c914db44bec 100644 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -225,7 +225,7 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, skb->len - hdrlen, 0)); } - nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); + nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple); if (!nf_nat_ipv6_manip_pkt(skb, 0, &target, manip)) return 0; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 264074f04615..728d2b5bdb1a 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -229,8 +229,7 @@ nf_ct_get_tuple(const struct sk_buff *skb, u_int16_t l3num, u_int8_t protonum, struct net *net, - struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_l4proto *l4proto) + struct nf_conntrack_tuple *tuple) { unsigned int size; const __be32 *ap; @@ -374,33 +373,20 @@ bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff, u_int16_t l3num, struct net *net, struct nf_conntrack_tuple *tuple) { - const struct nf_conntrack_l4proto *l4proto; u8 protonum; int protoff; - int ret; - - rcu_read_lock(); protoff = get_l4proto(skb, nhoff, l3num, &protonum); - if (protoff <= 0) { - rcu_read_unlock(); + if (protoff <= 0) return false; - } - l4proto = __nf_ct_l4proto_find(protonum); - - ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, net, tuple, - l4proto); - - rcu_read_unlock(); - return ret; + return nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, net, tuple); } EXPORT_SYMBOL_GPL(nf_ct_get_tuplepr); bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, - const struct nf_conntrack_tuple *orig, - const struct nf_conntrack_l4proto *l4proto) + const struct nf_conntrack_tuple *orig) { memset(inverse, 0, sizeof(*inverse)); @@ -1354,7 +1340,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_free); static noinline struct nf_conntrack_tuple_hash * init_conntrack(struct net *net, struct nf_conn *tmpl, const struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_l4proto *l4proto, struct sk_buff *skb, unsigned int dataoff, u32 hash) { @@ -1367,7 +1352,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, struct nf_conn_timeout *timeout_ext; struct nf_conntrack_zone tmp; - if (!nf_ct_invert_tuple(&repl_tuple, tuple, l4proto)) { + if (!nf_ct_invert_tuple(&repl_tuple, tuple)) { pr_debug("Can't invert tuple.\n"); return NULL; } @@ -1449,7 +1434,6 @@ resolve_normal_ct(struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, u_int8_t protonum, - const struct nf_conntrack_l4proto *l4proto, const struct nf_hook_state *state) { const struct nf_conntrack_zone *zone; @@ -1462,7 +1446,7 @@ resolve_normal_ct(struct nf_conn *tmpl, if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, state->pf, protonum, state->net, - &tuple, l4proto)) { + &tuple)) { pr_debug("Can't get tuple\n"); return 0; } @@ -1472,7 +1456,7 @@ resolve_normal_ct(struct nf_conn *tmpl, hash = hash_conntrack_raw(&tuple, state->net); h = __nf_conntrack_find_get(state->net, zone, &tuple, hash); if (!h) { - h = init_conntrack(state->net, tmpl, &tuple, l4proto, + h = init_conntrack(state->net, tmpl, &tuple, skb, dataoff, hash); if (!h) return 0; @@ -1592,7 +1576,6 @@ static int nf_conntrack_handle_packet(struct nf_conn *ct, unsigned int nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state) { - const struct nf_conntrack_l4proto *l4proto; enum ip_conntrack_info ctinfo; struct nf_conn *ct, *tmpl; u_int8_t protonum; @@ -1619,8 +1602,6 @@ nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state) goto out; } - l4proto = __nf_ct_l4proto_find(protonum); - if (protonum == IPPROTO_ICMP || protonum == IPPROTO_ICMPV6) { ret = nf_conntrack_handle_icmp(tmpl, skb, dataoff, protonum, state); @@ -1634,7 +1615,7 @@ nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state) } repeat: ret = resolve_normal_ct(tmpl, skb, dataoff, - protonum, l4proto, state); + protonum, state); if (ret < 0) { /* Too stressed to deal. */ NF_CT_STAT_INC_ATOMIC(state->net, drop); @@ -1681,19 +1662,6 @@ out: } EXPORT_SYMBOL_GPL(nf_conntrack_in); -bool nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse, - const struct nf_conntrack_tuple *orig) -{ - bool ret; - - rcu_read_lock(); - ret = nf_ct_invert_tuple(inverse, orig, - __nf_ct_l4proto_find(orig->dst.protonum)); - rcu_read_unlock(); - return ret; -} -EXPORT_SYMBOL_GPL(nf_ct_invert_tuplepr); - /* Alter reply tuple (maybe alter helper). This is for NAT, and is implicitly racy: see __nf_conntrack_confirm */ void nf_conntrack_alter_reply(struct nf_conn *ct, @@ -1824,7 +1792,6 @@ static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb) static int nf_conntrack_update(struct net *net, struct sk_buff *skb) { - const struct nf_conntrack_l4proto *l4proto; struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple tuple; enum ip_conntrack_info ctinfo; @@ -1845,10 +1812,8 @@ static int nf_conntrack_update(struct net *net, struct sk_buff *skb) if (dataoff <= 0) return -1; - l4proto = nf_ct_l4proto_find_get(l4num); - if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num, - l4num, net, &tuple, l4proto)) + l4num, net, &tuple)) return -1; if (ct->status & IPS_SRC_NAT) { diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 11562f2a08bb..976f1dcb97f0 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -121,7 +121,7 @@ static void pptp_expectfn(struct nf_conn *ct, struct nf_conntrack_expect *exp_other; /* obviously this tuple inversion only works until you do NAT */ - nf_ct_invert_tuplepr(&inv_t, &exp->tuple); + nf_ct_invert_tuple(&inv_t, &exp->tuple); pr_debug("trying to unexpect other dir: "); nf_ct_dump_tuple(&inv_t); diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c index d28c1d7633b2..1007efae741d 100644 --- a/net/netfilter/nf_conntrack_proto_icmp.c +++ b/net/netfilter/nf_conntrack_proto_icmp.c @@ -109,7 +109,6 @@ icmp_error_message(struct nf_conn *tmpl, struct sk_buff *skb, const struct nf_hook_state *state) { struct nf_conntrack_tuple innertuple, origtuple; - const struct nf_conntrack_l4proto *innerproto; const struct nf_conntrack_tuple_hash *h; const struct nf_conntrack_zone *zone; enum ip_conntrack_info ctinfo; @@ -127,12 +126,9 @@ icmp_error_message(struct nf_conn *tmpl, struct sk_buff *skb, return -NF_ACCEPT; } - /* rcu_read_lock()ed by nf_hook_thresh */ - innerproto = __nf_ct_l4proto_find(origtuple.dst.protonum); - /* Ordinarily, we'd expect the inverted tupleproto, but it's been preserved inside the ICMP. */ - if (!nf_ct_invert_tuple(&innertuple, &origtuple, innerproto)) { + if (!nf_ct_invert_tuple(&innertuple, &origtuple)) { pr_debug("icmp_error_message: no match\n"); return -NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c index 2910dcdea134..6c93c091a8dd 100644 --- a/net/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/netfilter/nf_conntrack_proto_icmpv6.c @@ -130,7 +130,6 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl, { struct nf_conntrack_tuple intuple, origtuple; const struct nf_conntrack_tuple_hash *h; - const struct nf_conntrack_l4proto *inproto; enum ip_conntrack_info ctinfo; struct nf_conntrack_zone tmp; @@ -146,12 +145,9 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl, return -NF_ACCEPT; } - /* rcu_read_lock()ed by nf_hook_thresh */ - inproto = __nf_ct_l4proto_find(origtuple.dst.protonum); - /* Ordinarily, we'd expect the inverted tupleproto, but it's been preserved inside the ICMP. */ - if (!nf_ct_invert_tuple(&intuple, &origtuple, inproto)) { + if (!nf_ct_invert_tuple(&intuple, &origtuple)) { pr_debug("icmpv6_error: Can't invert tuple\n"); return -NF_ACCEPT; } diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index d159e9e7835b..44f97b3a215a 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -158,7 +158,7 @@ nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple, */ struct nf_conntrack_tuple reply; - nf_ct_invert_tuplepr(&reply, tuple); + nf_ct_invert_tuple(&reply, tuple); return nf_conntrack_tuple_taken(&reply, ignored_conntrack); } EXPORT_SYMBOL(nf_nat_used_tuple); @@ -253,7 +253,7 @@ find_appropriate_src(struct net *net, net_eq(net, nf_ct_net(ct)) && nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) { /* Copy source part from reply tuple. */ - nf_ct_invert_tuplepr(result, + nf_ct_invert_tuple(result, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); result->dst = tuple->dst; @@ -560,8 +560,8 @@ nf_nat_setup_info(struct nf_conn *ct, * manipulations (future optimization: if num_manips == 0, * orig_tp = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */ - nf_ct_invert_tuplepr(&curr_tuple, - &ct->tuplehash[IP_CT_DIR_REPLY].tuple); + nf_ct_invert_tuple(&curr_tuple, + &ct->tuplehash[IP_CT_DIR_REPLY].tuple); get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype); @@ -569,7 +569,7 @@ nf_nat_setup_info(struct nf_conn *ct, struct nf_conntrack_tuple reply; /* Alter conntrack table so will recognize replies. */ - nf_ct_invert_tuplepr(&reply, &new_tuple); + nf_ct_invert_tuple(&reply, &new_tuple); nf_conntrack_alter_reply(ct, &reply); /* Non-atomic: we own this at the moment. */ @@ -640,7 +640,7 @@ static unsigned int nf_nat_manip_pkt(struct sk_buff *skb, struct nf_conn *ct, struct nf_conntrack_tuple target; /* We are aiming to look like inverse of other direction. */ - nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); + nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple); l3proto = __nf_nat_l3proto_find(target.src.l3num); if (!l3proto->manip_pkt(skb, 0, &target, mtype)) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index cd94f925495a..35884f836260 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -622,7 +622,7 @@ ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone, if (natted) { struct nf_conntrack_tuple inverse; - if (!nf_ct_invert_tuplepr(&inverse, &tuple)) { + if (!nf_ct_invert_tuple(&inverse, &tuple)) { pr_debug("ovs_ct_find_existing: Inversion failed!\n"); return NULL; } From b884fa46177659166ab6a9fd87640b9cd5f04fe7 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 15 Jan 2019 22:03:43 +0100 Subject: [PATCH 18/33] netfilter: conntrack: unify sysctl handling Due to historical reasons, all l4 trackers register their own sysctls. This leads to copy&pasted boilerplate code, that does exactly same thing, just with different data structure. Place all of this in a single file. This allows to remove the various ctl_table pointers from the ct_netns structure and reduces overall code size. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto_dccp.c | 86 +---- net/netfilter/nf_conntrack_proto_generic.c | 30 +- net/netfilter/nf_conntrack_proto_gre.c | 42 +-- net/netfilter/nf_conntrack_proto_icmp.c | 29 +- net/netfilter/nf_conntrack_proto_icmpv6.c | 29 +- net/netfilter/nf_conntrack_proto_sctp.c | 89 +---- net/netfilter/nf_conntrack_proto_tcp.c | 116 +----- net/netfilter/nf_conntrack_proto_udp.c | 36 +- net/netfilter/nf_conntrack_standalone.c | 394 ++++++++++++++++++++- 9 files changed, 391 insertions(+), 460 deletions(-) diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 84408ec80624..31ba88311bc4 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -724,90 +724,6 @@ dccp_timeout_nla_policy[CTA_TIMEOUT_DCCP_MAX+1] = { }; #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ -#ifdef CONFIG_SYSCTL -/* template, data assigned later */ -static struct ctl_table dccp_sysctl_table[] = { - { - .procname = "nf_conntrack_dccp_timeout_request", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_dccp_timeout_respond", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_dccp_timeout_partopen", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_dccp_timeout_open", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_dccp_timeout_closereq", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_dccp_timeout_closing", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_dccp_timeout_timewait", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_dccp_loose", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { } -}; -#endif /* CONFIG_SYSCTL */ - -static int dccp_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *pn, - struct nf_dccp_net *dn) -{ -#ifdef CONFIG_SYSCTL - if (pn->ctl_table) - return 0; - - pn->ctl_table = kmemdup(dccp_sysctl_table, - sizeof(dccp_sysctl_table), - GFP_KERNEL); - if (!pn->ctl_table) - return -ENOMEM; - - pn->ctl_table[0].data = &dn->dccp_timeout[CT_DCCP_REQUEST]; - pn->ctl_table[1].data = &dn->dccp_timeout[CT_DCCP_RESPOND]; - pn->ctl_table[2].data = &dn->dccp_timeout[CT_DCCP_PARTOPEN]; - pn->ctl_table[3].data = &dn->dccp_timeout[CT_DCCP_OPEN]; - pn->ctl_table[4].data = &dn->dccp_timeout[CT_DCCP_CLOSEREQ]; - pn->ctl_table[5].data = &dn->dccp_timeout[CT_DCCP_CLOSING]; - pn->ctl_table[6].data = &dn->dccp_timeout[CT_DCCP_TIMEWAIT]; - pn->ctl_table[7].data = &dn->dccp_loose; - - /* Don't export sysctls to unprivileged users */ - if (net->user_ns != &init_user_ns) - pn->ctl_table[0].procname = NULL; -#endif - return 0; -} - static int dccp_init_net(struct net *net) { struct nf_dccp_net *dn = nf_dccp_pernet(net); @@ -830,7 +746,7 @@ static int dccp_init_net(struct net *net) dn->dccp_timeout[CT_DCCP_NONE] = dn->dccp_timeout[CT_DCCP_REQUEST]; } - return dccp_kmemdup_sysctl_table(net, pn, dn); + return 0; } static struct nf_proto_net *dccp_get_net_proto(struct net *net) diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index 78358d192c60..0edbf82594d0 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -60,41 +60,13 @@ generic_timeout_nla_policy[CTA_TIMEOUT_GENERIC_MAX+1] = { }; #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ -#ifdef CONFIG_SYSCTL -static struct ctl_table generic_sysctl_table[] = { - { - .procname = "nf_conntrack_generic_timeout", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { } -}; -#endif /* CONFIG_SYSCTL */ - -static int generic_kmemdup_sysctl_table(struct nf_proto_net *pn, - struct nf_generic_net *gn) -{ -#ifdef CONFIG_SYSCTL - pn->ctl_table = kmemdup(generic_sysctl_table, - sizeof(generic_sysctl_table), - GFP_KERNEL); - if (!pn->ctl_table) - return -ENOMEM; - - pn->ctl_table[0].data = &gn->timeout; -#endif - return 0; -} - static int generic_init_net(struct net *net) { struct nf_generic_net *gn = nf_generic_pernet(net); - struct nf_proto_net *pn = &gn->pn; gn->timeout = nf_ct_generic_timeout; - return generic_kmemdup_sysctl_table(pn, gn); + return 0; } static struct nf_proto_net *generic_get_net_proto(struct net *net) diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index f6391991bcf6..e573ec0fa12b 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -313,46 +313,6 @@ gre_timeout_nla_policy[CTA_TIMEOUT_GRE_MAX+1] = { }; #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ -#ifdef CONFIG_SYSCTL -static struct ctl_table gre_sysctl_table[] = { - { - .procname = "nf_conntrack_gre_timeout", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_gre_timeout_stream", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - {} -}; -#endif - -static int gre_kmemdup_sysctl_table(struct net *net) -{ -#ifdef CONFIG_SYSCTL - struct nf_gre_net *net_gre = gre_pernet(net); - struct nf_proto_net *nf = &net_gre->nf; - int i; - - if (nf->ctl_table) - return 0; - - nf->ctl_table = kmemdup(gre_sysctl_table, - sizeof(gre_sysctl_table), - GFP_KERNEL); - if (!nf->ctl_table) - return -ENOMEM; - - for (i = 0; i < GRE_CT_MAX; i++) - nf->ctl_table[i].data = &net_gre->timeouts[i]; -#endif - return 0; -} - static int gre_init_net(struct net *net) { struct nf_gre_net *net_gre = gre_pernet(net); @@ -362,7 +322,7 @@ static int gre_init_net(struct net *net) for (i = 0; i < GRE_CT_MAX; i++) net_gre->timeouts[i] = gre_timeouts[i]; - return gre_kmemdup_sysctl_table(net); + return 0; } /* protocol helper struct */ diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c index 1007efae741d..eb77f747759f 100644 --- a/net/netfilter/nf_conntrack_proto_icmp.c +++ b/net/netfilter/nf_conntrack_proto_icmp.c @@ -298,41 +298,14 @@ icmp_timeout_nla_policy[CTA_TIMEOUT_ICMP_MAX+1] = { }; #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ -#ifdef CONFIG_SYSCTL -static struct ctl_table icmp_sysctl_table[] = { - { - .procname = "nf_conntrack_icmp_timeout", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { } -}; -#endif /* CONFIG_SYSCTL */ - -static int icmp_kmemdup_sysctl_table(struct nf_proto_net *pn, - struct nf_icmp_net *in) -{ -#ifdef CONFIG_SYSCTL - pn->ctl_table = kmemdup(icmp_sysctl_table, - sizeof(icmp_sysctl_table), - GFP_KERNEL); - if (!pn->ctl_table) - return -ENOMEM; - - pn->ctl_table[0].data = &in->timeout; -#endif - return 0; -} static int icmp_init_net(struct net *net) { struct nf_icmp_net *in = nf_icmp_pernet(net); - struct nf_proto_net *pn = &in->pn; in->timeout = nf_ct_icmp_timeout; - return icmp_kmemdup_sysctl_table(pn, in); + return 0; } static struct nf_proto_net *icmp_get_net_proto(struct net *net) diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c index 6c93c091a8dd..d243ef8a128e 100644 --- a/net/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/netfilter/nf_conntrack_proto_icmpv6.c @@ -309,41 +309,14 @@ icmpv6_timeout_nla_policy[CTA_TIMEOUT_ICMPV6_MAX+1] = { }; #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ -#ifdef CONFIG_SYSCTL -static struct ctl_table icmpv6_sysctl_table[] = { - { - .procname = "nf_conntrack_icmpv6_timeout", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { } -}; -#endif /* CONFIG_SYSCTL */ - -static int icmpv6_kmemdup_sysctl_table(struct nf_proto_net *pn, - struct nf_icmp_net *in) -{ -#ifdef CONFIG_SYSCTL - pn->ctl_table = kmemdup(icmpv6_sysctl_table, - sizeof(icmpv6_sysctl_table), - GFP_KERNEL); - if (!pn->ctl_table) - return -ENOMEM; - - pn->ctl_table[0].data = &in->timeout; -#endif - return 0; -} static int icmpv6_init_net(struct net *net) { struct nf_icmp_net *in = nf_icmpv6_pernet(net); - struct nf_proto_net *pn = &in->pn; in->timeout = nf_ct_icmpv6_timeout; - return icmpv6_kmemdup_sysctl_table(pn, in); + return 0; } static struct nf_proto_net *icmpv6_get_net_proto(struct net *net) diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 952f2cc0fd74..31130f218d8f 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -642,93 +642,6 @@ sctp_timeout_nla_policy[CTA_TIMEOUT_SCTP_MAX+1] = { }; #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ - -#ifdef CONFIG_SYSCTL -static struct ctl_table sctp_sysctl_table[] = { - { - .procname = "nf_conntrack_sctp_timeout_closed", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_sctp_timeout_cookie_wait", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_sctp_timeout_cookie_echoed", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_sctp_timeout_established", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_sctp_timeout_shutdown_sent", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_sctp_timeout_shutdown_recd", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_sctp_timeout_shutdown_ack_sent", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_sctp_timeout_heartbeat_sent", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_sctp_timeout_heartbeat_acked", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { } -}; -#endif - -static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn, - struct nf_sctp_net *sn) -{ -#ifdef CONFIG_SYSCTL - if (pn->ctl_table) - return 0; - - pn->ctl_table = kmemdup(sctp_sysctl_table, - sizeof(sctp_sysctl_table), - GFP_KERNEL); - if (!pn->ctl_table) - return -ENOMEM; - - pn->ctl_table[0].data = &sn->timeouts[SCTP_CONNTRACK_CLOSED]; - pn->ctl_table[1].data = &sn->timeouts[SCTP_CONNTRACK_COOKIE_WAIT]; - pn->ctl_table[2].data = &sn->timeouts[SCTP_CONNTRACK_COOKIE_ECHOED]; - pn->ctl_table[3].data = &sn->timeouts[SCTP_CONNTRACK_ESTABLISHED]; - pn->ctl_table[4].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_SENT]; - pn->ctl_table[5].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_RECD]; - pn->ctl_table[6].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT]; - pn->ctl_table[7].data = &sn->timeouts[SCTP_CONNTRACK_HEARTBEAT_SENT]; - pn->ctl_table[8].data = &sn->timeouts[SCTP_CONNTRACK_HEARTBEAT_ACKED]; -#endif - return 0; -} - static int sctp_init_net(struct net *net) { struct nf_sctp_net *sn = nf_sctp_pernet(net); @@ -746,7 +659,7 @@ static int sctp_init_net(struct net *net) sn->timeouts[0] = sctp_timeouts[SCTP_CONNTRACK_CLOSED]; } - return sctp_kmemdup_sysctl_table(pn, sn); + return 0; } static struct nf_proto_net *sctp_get_net_proto(struct net *net) diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 32aac9a835d4..422bdedac0ed 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1387,120 +1387,6 @@ static const struct nla_policy tcp_timeout_nla_policy[CTA_TIMEOUT_TCP_MAX+1] = { }; #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ -#ifdef CONFIG_SYSCTL -static struct ctl_table tcp_sysctl_table[] = { - { - .procname = "nf_conntrack_tcp_timeout_syn_sent", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_tcp_timeout_syn_recv", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_tcp_timeout_established", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_tcp_timeout_fin_wait", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_tcp_timeout_close_wait", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_tcp_timeout_last_ack", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_tcp_timeout_time_wait", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_tcp_timeout_close", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_tcp_timeout_max_retrans", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_tcp_timeout_unacknowledged", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_tcp_loose", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "nf_conntrack_tcp_be_liberal", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "nf_conntrack_tcp_max_retrans", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { } -}; -#endif /* CONFIG_SYSCTL */ - -static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn, - struct nf_tcp_net *tn) -{ -#ifdef CONFIG_SYSCTL - if (pn->ctl_table) - return 0; - - pn->ctl_table = kmemdup(tcp_sysctl_table, - sizeof(tcp_sysctl_table), - GFP_KERNEL); - if (!pn->ctl_table) - return -ENOMEM; - - pn->ctl_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT]; - pn->ctl_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV]; - pn->ctl_table[2].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED]; - pn->ctl_table[3].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT]; - pn->ctl_table[4].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT]; - pn->ctl_table[5].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK]; - pn->ctl_table[6].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT]; - pn->ctl_table[7].data = &tn->timeouts[TCP_CONNTRACK_CLOSE]; - pn->ctl_table[8].data = &tn->timeouts[TCP_CONNTRACK_RETRANS]; - pn->ctl_table[9].data = &tn->timeouts[TCP_CONNTRACK_UNACK]; - pn->ctl_table[10].data = &tn->tcp_loose; - pn->ctl_table[11].data = &tn->tcp_be_liberal; - pn->ctl_table[12].data = &tn->tcp_max_retrans; -#endif - return 0; -} - static int tcp_init_net(struct net *net) { struct nf_tcp_net *tn = nf_tcp_pernet(net); @@ -1521,7 +1407,7 @@ static int tcp_init_net(struct net *net) tn->tcp_max_retrans = nf_ct_tcp_max_retrans; } - return tcp_kmemdup_sysctl_table(pn, tn); + return 0; } static struct nf_proto_net *tcp_get_net_proto(struct net *net) diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index e1f409aa0006..6e81e79844d7 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -260,40 +260,6 @@ udp_timeout_nla_policy[CTA_TIMEOUT_UDP_MAX+1] = { }; #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ -#ifdef CONFIG_SYSCTL -static struct ctl_table udp_sysctl_table[] = { - { - .procname = "nf_conntrack_udp_timeout", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_udp_timeout_stream", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { } -}; -#endif /* CONFIG_SYSCTL */ - -static int udp_kmemdup_sysctl_table(struct nf_proto_net *pn, - struct nf_udp_net *un) -{ -#ifdef CONFIG_SYSCTL - if (pn->ctl_table) - return 0; - pn->ctl_table = kmemdup(udp_sysctl_table, - sizeof(udp_sysctl_table), - GFP_KERNEL); - if (!pn->ctl_table) - return -ENOMEM; - pn->ctl_table[0].data = &un->timeouts[UDP_CT_UNREPLIED]; - pn->ctl_table[1].data = &un->timeouts[UDP_CT_REPLIED]; -#endif - return 0; -} static int udp_init_net(struct net *net) { @@ -307,7 +273,7 @@ static int udp_init_net(struct net *net) un->timeouts[i] = udp_timeouts[i]; } - return udp_kmemdup_sysctl_table(pn, un); + return 0; } static struct nf_proto_net *udp_get_net_proto(struct net *net) diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index b6177fd73304..d848de713dc0 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -547,8 +547,55 @@ enum nf_ct_sysctl_index { #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP NF_SYSCTL_CT_TIMESTAMP, #endif + NF_SYSCTL_CT_PROTO_TIMEOUT_GENERIC, + NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_SYN_SENT, + NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_SYN_RECV, + NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_ESTABLISHED, + NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_FIN_WAIT, + NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_CLOSE_WAIT, + NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_LAST_ACK, + NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_TIME_WAIT, + NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_CLOSE, + NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_RETRANS, + NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_UNACK, + NF_SYSCTL_CT_PROTO_TCP_LOOSE, + NF_SYSCTL_CT_PROTO_TCP_LIBERAL, + NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS, + NF_SYSCTL_CT_PROTO_TIMEOUT_UDP, + NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM, + NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP, + NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6, +#ifdef CONFIG_NF_CT_PROTO_SCTP + NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_CLOSED, + NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_COOKIE_WAIT, + NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_COOKIE_ECHOED, + NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_ESTABLISHED, + NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_SENT, + NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_RECD, + NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT, + NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_SENT, + NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_ACKED, +#endif +#ifdef CONFIG_NF_CT_PROTO_DCCP + NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_REQUEST, + NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_RESPOND, + NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_PARTOPEN, + NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_OPEN, + NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_CLOSEREQ, + NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_CLOSING, + NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_TIMEWAIT, + NF_SYSCTL_CT_PROTO_DCCP_LOOSE, +#endif +#ifdef CONFIG_NF_CT_PROTO_GRE + NF_SYSCTL_CT_PROTO_TIMEOUT_GRE, + NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM, +#endif + + __NF_SYSCTL_CT_LAST_SYSCTL, }; +#define NF_SYSCTL_CT_LAST_SYSCTL (__NF_SYSCTL_CT_LAST_SYSCTL + 1) + static struct ctl_table nf_ct_sysctl_table[] = { [NF_SYSCTL_CT_MAX] = { .procname = "nf_conntrack_max", @@ -626,7 +673,235 @@ static struct ctl_table nf_ct_sysctl_table[] = { .proc_handler = proc_dointvec, }, #endif - { } + [NF_SYSCTL_CT_PROTO_TIMEOUT_GENERIC] = { + .procname = "nf_conntrack_generic_timeout", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_SYN_SENT] = { + .procname = "nf_conntrack_tcp_timeout_syn_sent", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_SYN_RECV] = { + .procname = "nf_conntrack_tcp_timeout_syn_recv", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_ESTABLISHED] = { + .procname = "nf_conntrack_tcp_timeout_established", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_FIN_WAIT] = { + .procname = "nf_conntrack_tcp_timeout_fin_wait", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_CLOSE_WAIT] = { + .procname = "nf_conntrack_tcp_timeout_close_wait", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_LAST_ACK] = { + .procname = "nf_conntrack_tcp_timeout_last_ack", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_TIME_WAIT] = { + .procname = "nf_conntrack_tcp_timeout_time_wait", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_CLOSE] = { + .procname = "nf_conntrack_tcp_timeout_close", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_RETRANS] = { + .procname = "nf_conntrack_tcp_timeout_max_retrans", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_UNACK] = { + .procname = "nf_conntrack_tcp_timeout_unacknowledged", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TCP_LOOSE] = { + .procname = "nf_conntrack_tcp_loose", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + [NF_SYSCTL_CT_PROTO_TCP_LIBERAL] = { + .procname = "nf_conntrack_tcp_be_liberal", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + [NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS] = { + .procname = "nf_conntrack_tcp_max_retrans", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_UDP] = { + .procname = "nf_conntrack_udp_timeout", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM] = { + .procname = "nf_conntrack_udp_timeout_stream", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP] = { + .procname = "nf_conntrack_icmp_timeout", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6] = { + .procname = "nf_conntrack_icmpv6_timeout", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, +#ifdef CONFIG_NF_CT_PROTO_SCTP + [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_CLOSED] = { + .procname = "nf_conntrack_sctp_timeout_closed", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_COOKIE_WAIT] = { + .procname = "nf_conntrack_sctp_timeout_cookie_wait", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_COOKIE_ECHOED] = { + .procname = "nf_conntrack_sctp_timeout_cookie_echoed", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_ESTABLISHED] = { + .procname = "nf_conntrack_sctp_timeout_established", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_SENT] = { + .procname = "nf_conntrack_sctp_timeout_shutdown_sent", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_RECD] = { + .procname = "nf_conntrack_sctp_timeout_shutdown_recd", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT] = { + .procname = "nf_conntrack_sctp_timeout_shutdown_ack_sent", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_SENT] = { + .procname = "nf_conntrack_sctp_timeout_heartbeat_sent", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_ACKED] = { + .procname = "nf_conntrack_sctp_timeout_heartbeat_acked", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, +#endif +#ifdef CONFIG_NF_CT_PROTO_DCCP + [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_REQUEST] = { + .procname = "nf_conntrack_dccp_timeout_request", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_RESPOND] = { + .procname = "nf_conntrack_dccp_timeout_respond", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_PARTOPEN] = { + .procname = "nf_conntrack_dccp_timeout_partopen", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_OPEN] = { + .procname = "nf_conntrack_dccp_timeout_open", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_CLOSEREQ] = { + .procname = "nf_conntrack_dccp_timeout_closereq", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_CLOSING] = { + .procname = "nf_conntrack_dccp_timeout_closing", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_TIMEWAIT] = { + .procname = "nf_conntrack_dccp_timeout_timewait", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_DCCP_LOOSE] = { + .procname = "nf_conntrack_dccp_loose", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif +#ifdef CONFIG_NF_CT_PROTO_GRE + [NF_SYSCTL_CT_PROTO_TIMEOUT_GRE] = { + .procname = "nf_conntrack_gre_timeout", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM] = { + .procname = "nf_conntrack_gre_timeout_stream", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, +#endif + {} }; static struct ctl_table nf_ct_netfilter_table[] = { @@ -640,14 +915,103 @@ static struct ctl_table nf_ct_netfilter_table[] = { { } }; +static void nf_conntrack_standalone_init_tcp_sysctl(struct net *net, + struct ctl_table *table) +{ + struct nf_tcp_net *tn = nf_tcp_pernet(net); + +#define XASSIGN(XNAME, tn) \ + table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_ ## XNAME].data = \ + &(tn)->timeouts[TCP_CONNTRACK_ ## XNAME] + + XASSIGN(SYN_SENT, tn); + XASSIGN(SYN_RECV, tn); + XASSIGN(ESTABLISHED, tn); + XASSIGN(FIN_WAIT, tn); + XASSIGN(CLOSE_WAIT, tn); + XASSIGN(LAST_ACK, tn); + XASSIGN(TIME_WAIT, tn); + XASSIGN(CLOSE, tn); + XASSIGN(RETRANS, tn); + XASSIGN(UNACK, tn); +#undef XASSIGN +#define XASSIGN(XNAME, rval) \ + table[NF_SYSCTL_CT_PROTO_TCP_ ## XNAME].data = (rval) + + XASSIGN(LOOSE, &tn->tcp_loose); + XASSIGN(LIBERAL, &tn->tcp_be_liberal); + XASSIGN(MAX_RETRANS, &tn->tcp_max_retrans); +#undef XASSIGN +} + +static void nf_conntrack_standalone_init_sctp_sysctl(struct net *net, + struct ctl_table *table) +{ +#ifdef CONFIG_NF_CT_PROTO_SCTP + struct nf_sctp_net *sn = nf_sctp_pernet(net); + +#define XASSIGN(XNAME, sn) \ + table[NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_ ## XNAME].data = \ + &(sn)->timeouts[SCTP_CONNTRACK_ ## XNAME] + + XASSIGN(CLOSED, sn); + XASSIGN(COOKIE_WAIT, sn); + XASSIGN(COOKIE_ECHOED, sn); + XASSIGN(ESTABLISHED, sn); + XASSIGN(SHUTDOWN_SENT, sn); + XASSIGN(SHUTDOWN_RECD, sn); + XASSIGN(SHUTDOWN_ACK_SENT, sn); + XASSIGN(HEARTBEAT_SENT, sn); + XASSIGN(HEARTBEAT_ACKED, sn); +#undef XASSIGN +#endif +} + +static void nf_conntrack_standalone_init_dccp_sysctl(struct net *net, + struct ctl_table *table) +{ +#ifdef CONFIG_NF_CT_PROTO_DCCP + struct nf_dccp_net *dn = nf_dccp_pernet(net); + +#define XASSIGN(XNAME, dn) \ + table[NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_ ## XNAME].data = \ + &(dn)->dccp_timeout[CT_DCCP_ ## XNAME] + + XASSIGN(REQUEST, dn); + XASSIGN(RESPOND, dn); + XASSIGN(PARTOPEN, dn); + XASSIGN(OPEN, dn); + XASSIGN(CLOSEREQ, dn); + XASSIGN(CLOSING, dn); + XASSIGN(TIMEWAIT, dn); +#undef XASSIGN + + table[NF_SYSCTL_CT_PROTO_DCCP_LOOSE].data = &dn->dccp_loose; +#endif +} + +static void nf_conntrack_standalone_init_gre_sysctl(struct net *net, + struct ctl_table *table) +{ +#ifdef CONFIG_NF_CT_PROTO_GRE + struct nf_gre_net *gn = nf_gre_pernet(net); + + table[NF_SYSCTL_CT_PROTO_TIMEOUT_GRE].data = &gn->timeouts[GRE_CT_UNREPLIED]; + table[NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM].data = &gn->timeouts[GRE_CT_REPLIED]; +#endif +} + static int nf_conntrack_standalone_init_sysctl(struct net *net) { + struct nf_udp_net *un = nf_udp_pernet(net); struct ctl_table *table; + BUILD_BUG_ON(ARRAY_SIZE(nf_ct_sysctl_table) != NF_SYSCTL_CT_LAST_SYSCTL); + table = kmemdup(nf_ct_sysctl_table, sizeof(nf_ct_sysctl_table), GFP_KERNEL); if (!table) - goto out_kmemdup; + return -ENOMEM; table[NF_SYSCTL_CT_COUNT].data = &net->ct.count; table[NF_SYSCTL_CT_CHECKSUM].data = &net->ct.sysctl_checksum; @@ -655,6 +1019,16 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net) #ifdef CONFIG_NF_CONNTRACK_EVENTS table[NF_SYSCTL_CT_EVENTS].data = &net->ct.sysctl_events; #endif + table[NF_SYSCTL_CT_PROTO_TIMEOUT_GENERIC].data = &nf_generic_pernet(net)->timeout; + table[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP].data = &nf_icmp_pernet(net)->timeout; + table[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6].data = &nf_icmpv6_pernet(net)->timeout; + table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP].data = &un->timeouts[UDP_CT_UNREPLIED]; + table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM].data = &un->timeouts[UDP_CT_REPLIED]; + + nf_conntrack_standalone_init_tcp_sysctl(net, table); + nf_conntrack_standalone_init_sctp_sysctl(net, table); + nf_conntrack_standalone_init_dccp_sysctl(net, table); + nf_conntrack_standalone_init_gre_sysctl(net, table); /* Don't export sysctls to unprivileged users */ if (net->user_ns != &init_user_ns) { @@ -680,7 +1054,6 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net) out_unregister_netfilter: kfree(table); -out_kmemdup: return -ENOMEM; } @@ -707,27 +1080,26 @@ static int nf_conntrack_pernet_init(struct net *net) { int ret; - ret = nf_conntrack_init_net(net); + net->ct.sysctl_checksum = 1; + + ret = nf_conntrack_standalone_init_sysctl(net); if (ret < 0) - goto out_init; + return ret; ret = nf_conntrack_standalone_init_proc(net); if (ret < 0) goto out_proc; - net->ct.sysctl_checksum = 1; - net->ct.sysctl_log_invalid = 0; - ret = nf_conntrack_standalone_init_sysctl(net); + ret = nf_conntrack_init_net(net); if (ret < 0) - goto out_sysctl; + goto out_init_net; return 0; -out_sysctl: +out_init_net: nf_conntrack_standalone_fini_proc(net); out_proc: nf_conntrack_cleanup_net(net); -out_init: return ret; } From 70aed4647cbd63e3371386ddfaeb195413f38d99 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 15 Jan 2019 22:03:44 +0100 Subject: [PATCH 19/33] netfilter: conntrack: remove sysctl registration helpers After previous patch these are not used anymore. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netns/conntrack.h | 4 -- net/netfilter/nf_conntrack_proto.c | 77 +----------------------------- 2 files changed, 1 insertion(+), 80 deletions(-) diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index c72f413a2d4d..b8faa72e0f5b 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -19,10 +19,6 @@ struct ctl_table_header; struct nf_conntrack_ecache; struct nf_proto_net { -#ifdef CONFIG_SYSCTL - struct ctl_table_header *ctl_table_header; - struct ctl_table *ctl_table; -#endif unsigned int users; }; diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 13b782cc6a7a..c8f024d731c8 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -48,35 +48,6 @@ static struct nf_conntrack_l4proto __rcu *nf_ct_protos[MAX_NF_CT_PROTO + 1] __re static DEFINE_MUTEX(nf_ct_proto_mutex); #ifdef CONFIG_SYSCTL -static int -nf_ct_register_sysctl(struct net *net, - struct ctl_table_header **header, - const char *path, - struct ctl_table *table) -{ - if (*header == NULL) { - *header = register_net_sysctl(net, path, table); - if (*header == NULL) - return -ENOMEM; - } - - return 0; -} - -static void -nf_ct_unregister_sysctl(struct ctl_table_header **header, - struct ctl_table **table, - unsigned int users) -{ - if (users > 0) - return; - - unregister_net_sysctl_table(*header); - kfree(*table); - *header = NULL; - *table = NULL; -} - __printf(5, 6) void nf_l4proto_log_invalid(const struct sk_buff *skb, struct net *net, @@ -161,40 +132,6 @@ static struct nf_proto_net *nf_ct_l4proto_net(struct net *net, return NULL; } -static -int nf_ct_l4proto_register_sysctl(struct net *net, - struct nf_proto_net *pn) -{ - int err = 0; - -#ifdef CONFIG_SYSCTL - if (pn->ctl_table != NULL) { - err = nf_ct_register_sysctl(net, - &pn->ctl_table_header, - "net/netfilter", - pn->ctl_table); - if (err < 0) { - if (!pn->users) { - kfree(pn->ctl_table); - pn->ctl_table = NULL; - } - } - } -#endif /* CONFIG_SYSCTL */ - return err; -} - -static -void nf_ct_l4proto_unregister_sysctl(struct nf_proto_net *pn) -{ -#ifdef CONFIG_SYSCTL - if (pn->ctl_table_header != NULL) - nf_ct_unregister_sysctl(&pn->ctl_table_header, - &pn->ctl_table, - pn->users); -#endif /* CONFIG_SYSCTL */ -} - /* FIXME: Allow NULL functions and sub in pointers to generic for them. --RR */ int nf_ct_l4proto_register_one(const struct nf_conntrack_l4proto *l4proto) @@ -237,10 +174,6 @@ static int nf_ct_l4proto_pernet_register_one(struct net *net, if (pn == NULL) goto out; - ret = nf_ct_l4proto_register_sysctl(net, pn); - if (ret < 0) - goto out; - pn->users++; out: return ret; @@ -280,7 +213,6 @@ static void nf_ct_l4proto_pernet_unregister_one(struct net *net, return; pn->users--; - nf_ct_l4proto_unregister_sysctl(pn); } static void @@ -859,17 +791,11 @@ int nf_conntrack_proto_pernet_init(struct net *net) err = nf_conntrack_l4proto_generic.init_net(net); if (err < 0) return err; - err = nf_ct_l4proto_register_sysctl(net, - pn); - if (err < 0) - return err; err = nf_ct_l4proto_pernet_register(net, builtin_l4proto, ARRAY_SIZE(builtin_l4proto)); - if (err < 0) { - nf_ct_l4proto_unregister_sysctl(pn); + if (err < 0) return err; - } pn->users++; return 0; @@ -883,7 +809,6 @@ void nf_conntrack_proto_pernet_fini(struct net *net) nf_ct_l4proto_pernet_unregister(net, builtin_l4proto, ARRAY_SIZE(builtin_l4proto)); pn->users--; - nf_ct_l4proto_unregister_sysctl(pn); #ifdef CONFIG_NF_CT_PROTO_GRE nf_ct_gre_keymap_flush(net); #endif From 2a389de86e4a5d0bd3abed9e6fee27050652d339 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 15 Jan 2019 22:03:45 +0100 Subject: [PATCH 20/33] netfilter: conntrack: remove l4proto init and get_net callbacks Those were needed we still had modular trackers. As we don't have those anymore, prefer direct calls and remove all the (un)register infrastructure associated with this. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 15 +-- include/net/netns/conntrack.h | 11 -- net/netfilter/nf_conntrack_proto.c | 103 +++---------------- net/netfilter/nf_conntrack_proto_dccp.c | 40 +++---- net/netfilter/nf_conntrack_proto_generic.c | 11 +- net/netfilter/nf_conntrack_proto_gre.c | 5 +- net/netfilter/nf_conntrack_proto_icmp.c | 12 +-- net/netfilter/nf_conntrack_proto_icmpv6.c | 12 +-- net/netfilter/nf_conntrack_proto_sctp.c | 29 ++---- net/netfilter/nf_conntrack_proto_tcp.c | 35 ++----- net/netfilter/nf_conntrack_proto_udp.c | 24 +---- 11 files changed, 64 insertions(+), 233 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 87d9c198c05e..d5909e51ca92 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -61,12 +61,6 @@ struct nf_conntrack_l4proto { /* Print out the private part of the conntrack. */ void (*print_conntrack)(struct seq_file *s, struct nf_conn *); #endif - - /* Init l4proto pernet data */ - int (*init_net)(struct net *net); - - /* Return the per-net protocol part. */ - struct nf_proto_net *(*get_net_proto)(struct net *net); }; bool icmp_pkt_to_tuple(const struct sk_buff *skb, @@ -135,6 +129,15 @@ int nf_conntrack_gre_packet(struct nf_conn *ct, enum ip_conntrack_info ctinfo, const struct nf_hook_state *state); +void nf_conntrack_generic_init_net(struct net *net); +void nf_conntrack_tcp_init_net(struct net *net); +void nf_conntrack_udp_init_net(struct net *net); +void nf_conntrack_gre_init_net(struct net *net); +void nf_conntrack_dccp_init_net(struct net *net); +void nf_conntrack_sctp_init_net(struct net *net); +void nf_conntrack_icmp_init_net(struct net *net); +void nf_conntrack_icmpv6_init_net(struct net *net); + /* Existing built-in generic protocol */ extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic; diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index b8faa72e0f5b..f19b53130bf7 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -18,17 +18,11 @@ struct ctl_table_header; struct nf_conntrack_ecache; -struct nf_proto_net { - unsigned int users; -}; - struct nf_generic_net { - struct nf_proto_net pn; unsigned int timeout; }; struct nf_tcp_net { - struct nf_proto_net pn; unsigned int timeouts[TCP_CONNTRACK_TIMEOUT_MAX]; unsigned int tcp_loose; unsigned int tcp_be_liberal; @@ -42,18 +36,15 @@ enum udp_conntrack { }; struct nf_udp_net { - struct nf_proto_net pn; unsigned int timeouts[UDP_CT_MAX]; }; struct nf_icmp_net { - struct nf_proto_net pn; unsigned int timeout; }; #ifdef CONFIG_NF_CT_PROTO_DCCP struct nf_dccp_net { - struct nf_proto_net pn; int dccp_loose; unsigned int dccp_timeout[CT_DCCP_MAX + 1]; }; @@ -61,7 +52,6 @@ struct nf_dccp_net { #ifdef CONFIG_NF_CT_PROTO_SCTP struct nf_sctp_net { - struct nf_proto_net pn; unsigned int timeouts[SCTP_CONNTRACK_MAX]; }; #endif @@ -74,7 +64,6 @@ enum gre_conntrack { }; struct nf_gre_net { - struct nf_proto_net nf; struct list_head keymap_list; unsigned int timeouts[GRE_CT_MAX]; }; diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index c8f024d731c8..e6bc02c13f0f 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -123,15 +123,6 @@ static int kill_l4proto(struct nf_conn *i, void *data) return nf_ct_protonum(i) == l4proto->l4proto; } -static struct nf_proto_net *nf_ct_l4proto_net(struct net *net, - const struct nf_conntrack_l4proto *l4proto) -{ - if (l4proto->get_net_proto) - return l4proto->get_net_proto(net); - - return NULL; -} - /* FIXME: Allow NULL functions and sub in pointers to generic for them. --RR */ int nf_ct_l4proto_register_one(const struct nf_conntrack_l4proto *l4proto) @@ -158,27 +149,6 @@ out_unlock: } EXPORT_SYMBOL_GPL(nf_ct_l4proto_register_one); -static int nf_ct_l4proto_pernet_register_one(struct net *net, - const struct nf_conntrack_l4proto *l4proto) -{ - int ret = 0; - struct nf_proto_net *pn = NULL; - - if (l4proto->init_net) { - ret = l4proto->init_net(net); - if (ret < 0) - goto out; - } - - pn = nf_ct_l4proto_net(net, l4proto); - if (pn == NULL) - goto out; - - pn->users++; -out: - return ret; -} - static void __nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto) { @@ -204,17 +174,6 @@ void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto) } EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister_one); -static void nf_ct_l4proto_pernet_unregister_one(struct net *net, - const struct nf_conntrack_l4proto *l4proto) -{ - struct nf_proto_net *pn = nf_ct_l4proto_net(net, l4proto); - - if (pn == NULL) - return; - - pn->users--; -} - static void nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const l4proto[], unsigned int num_proto) @@ -252,34 +211,6 @@ nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const l4proto[], return ret; } -static void nf_ct_l4proto_pernet_unregister(struct net *net, - const struct nf_conntrack_l4proto *const l4proto[], - unsigned int num_proto) -{ - while (num_proto-- != 0) - nf_ct_l4proto_pernet_unregister_one(net, l4proto[num_proto]); -} - -static int nf_ct_l4proto_pernet_register(struct net *net, - const struct nf_conntrack_l4proto *const l4proto[], - unsigned int num_proto) -{ - int ret = -EINVAL; - unsigned int i; - - for (i = 0; i < num_proto; i++) { - ret = nf_ct_l4proto_pernet_register_one(net, l4proto[i]); - if (ret < 0) - break; - } - if (i != num_proto) { - pr_err("nf_conntrack %d: pernet registration failed\n", - l4proto[i]->l4proto); - nf_ct_l4proto_pernet_unregister(net, l4proto, i); - } - return ret; -} - static unsigned int nf_confirm(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, @@ -784,31 +715,25 @@ void nf_conntrack_proto_fini(void) int nf_conntrack_proto_pernet_init(struct net *net) { - int err; - struct nf_proto_net *pn = nf_ct_l4proto_net(net, - &nf_conntrack_l4proto_generic); - - err = nf_conntrack_l4proto_generic.init_net(net); - if (err < 0) - return err; - - err = nf_ct_l4proto_pernet_register(net, builtin_l4proto, - ARRAY_SIZE(builtin_l4proto)); - if (err < 0) - return err; - - pn->users++; + nf_conntrack_generic_init_net(net); + nf_conntrack_udp_init_net(net); + nf_conntrack_tcp_init_net(net); + nf_conntrack_icmp_init_net(net); + nf_conntrack_icmpv6_init_net(net); +#ifdef CONFIG_NF_CT_PROTO_DCCP + nf_conntrack_dccp_init_net(net); +#endif +#ifdef CONFIG_NF_CT_PROTO_SCTP + nf_conntrack_sctp_init_net(net); +#endif +#ifdef CONFIG_NF_CT_PROTO_GRE + nf_conntrack_gre_init_net(net); +#endif return 0; } void nf_conntrack_proto_pernet_fini(struct net *net) { - struct nf_proto_net *pn = nf_ct_l4proto_net(net, - &nf_conntrack_l4proto_generic); - - nf_ct_l4proto_pernet_unregister(net, builtin_l4proto, - ARRAY_SIZE(builtin_l4proto)); - pn->users--; #ifdef CONFIG_NF_CT_PROTO_GRE nf_ct_gre_keymap_flush(net); #endif diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 31ba88311bc4..6fca80587505 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -724,34 +724,24 @@ dccp_timeout_nla_policy[CTA_TIMEOUT_DCCP_MAX+1] = { }; #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ -static int dccp_init_net(struct net *net) +void nf_conntrack_dccp_init_net(struct net *net) { struct nf_dccp_net *dn = nf_dccp_pernet(net); - struct nf_proto_net *pn = &dn->pn; - if (!pn->users) { - /* default values */ - dn->dccp_loose = 1; - dn->dccp_timeout[CT_DCCP_REQUEST] = 2 * DCCP_MSL; - dn->dccp_timeout[CT_DCCP_RESPOND] = 4 * DCCP_MSL; - dn->dccp_timeout[CT_DCCP_PARTOPEN] = 4 * DCCP_MSL; - dn->dccp_timeout[CT_DCCP_OPEN] = 12 * 3600 * HZ; - dn->dccp_timeout[CT_DCCP_CLOSEREQ] = 64 * HZ; - dn->dccp_timeout[CT_DCCP_CLOSING] = 64 * HZ; - dn->dccp_timeout[CT_DCCP_TIMEWAIT] = 2 * DCCP_MSL; + /* default values */ + dn->dccp_loose = 1; + dn->dccp_timeout[CT_DCCP_REQUEST] = 2 * DCCP_MSL; + dn->dccp_timeout[CT_DCCP_RESPOND] = 4 * DCCP_MSL; + dn->dccp_timeout[CT_DCCP_PARTOPEN] = 4 * DCCP_MSL; + dn->dccp_timeout[CT_DCCP_OPEN] = 12 * 3600 * HZ; + dn->dccp_timeout[CT_DCCP_CLOSEREQ] = 64 * HZ; + dn->dccp_timeout[CT_DCCP_CLOSING] = 64 * HZ; + dn->dccp_timeout[CT_DCCP_TIMEWAIT] = 2 * DCCP_MSL; - /* timeouts[0] is unused, make it same as SYN_SENT so - * ->timeouts[0] contains 'new' timeout, like udp or icmp. - */ - dn->dccp_timeout[CT_DCCP_NONE] = dn->dccp_timeout[CT_DCCP_REQUEST]; - } - - return 0; -} - -static struct nf_proto_net *dccp_get_net_proto(struct net *net) -{ - return &net->ct.nf_ct_proto.dccp.pn; + /* timeouts[0] is unused, make it same as SYN_SENT so + * ->timeouts[0] contains 'new' timeout, like udp or icmp. + */ + dn->dccp_timeout[CT_DCCP_NONE] = dn->dccp_timeout[CT_DCCP_REQUEST]; } const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp = { @@ -778,6 +768,4 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp = { .nla_policy = dccp_timeout_nla_policy, }, #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ - .init_net = dccp_init_net, - .get_net_proto = dccp_get_net_proto, }; diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index 0edbf82594d0..0f526fafecae 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -60,18 +60,11 @@ generic_timeout_nla_policy[CTA_TIMEOUT_GENERIC_MAX+1] = { }; #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ -static int generic_init_net(struct net *net) +void nf_conntrack_generic_init_net(struct net *net) { struct nf_generic_net *gn = nf_generic_pernet(net); gn->timeout = nf_ct_generic_timeout; - - return 0; -} - -static struct nf_proto_net *generic_get_net_proto(struct net *net) -{ - return &net->ct.nf_ct_proto.generic.pn; } const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic = @@ -86,6 +79,4 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic = .nla_policy = generic_timeout_nla_policy, }, #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ - .init_net = generic_init_net, - .get_net_proto = generic_get_net_proto, }; diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index e573ec0fa12b..fa765d2fd586 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -313,7 +313,7 @@ gre_timeout_nla_policy[CTA_TIMEOUT_GRE_MAX+1] = { }; #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ -static int gre_init_net(struct net *net) +void nf_conntrack_gre_init_net(struct net *net) { struct nf_gre_net *net_gre = gre_pernet(net); int i; @@ -321,8 +321,6 @@ static int gre_init_net(struct net *net) INIT_LIST_HEAD(&net_gre->keymap_list); for (i = 0; i < GRE_CT_MAX; i++) net_gre->timeouts[i] = gre_timeouts[i]; - - return 0; } /* protocol helper struct */ @@ -347,5 +345,4 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre = { .nla_policy = gre_timeout_nla_policy, }, #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ - .init_net = gre_init_net, }; diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c index eb77f747759f..7df477996b16 100644 --- a/net/netfilter/nf_conntrack_proto_icmp.c +++ b/net/netfilter/nf_conntrack_proto_icmp.c @@ -298,19 +298,11 @@ icmp_timeout_nla_policy[CTA_TIMEOUT_ICMP_MAX+1] = { }; #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ - -static int icmp_init_net(struct net *net) +void nf_conntrack_icmp_init_net(struct net *net) { struct nf_icmp_net *in = nf_icmp_pernet(net); in->timeout = nf_ct_icmp_timeout; - - return 0; -} - -static struct nf_proto_net *icmp_get_net_proto(struct net *net) -{ - return &net->ct.nf_ct_proto.icmp.pn; } const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp = @@ -331,6 +323,4 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp = .nla_policy = icmp_timeout_nla_policy, }, #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ - .init_net = icmp_init_net, - .get_net_proto = icmp_get_net_proto, }; diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c index d243ef8a128e..bec4a3211658 100644 --- a/net/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/netfilter/nf_conntrack_proto_icmpv6.c @@ -309,19 +309,11 @@ icmpv6_timeout_nla_policy[CTA_TIMEOUT_ICMPV6_MAX+1] = { }; #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ - -static int icmpv6_init_net(struct net *net) +void nf_conntrack_icmpv6_init_net(struct net *net) { struct nf_icmp_net *in = nf_icmpv6_pernet(net); in->timeout = nf_ct_icmpv6_timeout; - - return 0; -} - -static struct nf_proto_net *icmpv6_get_net_proto(struct net *net) -{ - return &net->ct.nf_ct_proto.icmpv6.pn; } const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 = @@ -342,6 +334,4 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 = .nla_policy = icmpv6_timeout_nla_policy, }, #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ - .init_net = icmpv6_init_net, - .get_net_proto = icmpv6_get_net_proto, }; diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 31130f218d8f..a7818101ad80 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -642,29 +642,18 @@ sctp_timeout_nla_policy[CTA_TIMEOUT_SCTP_MAX+1] = { }; #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ -static int sctp_init_net(struct net *net) +void nf_conntrack_sctp_init_net(struct net *net) { struct nf_sctp_net *sn = nf_sctp_pernet(net); - struct nf_proto_net *pn = &sn->pn; + int i; - if (!pn->users) { - int i; + for (i = 0; i < SCTP_CONNTRACK_MAX; i++) + sn->timeouts[i] = sctp_timeouts[i]; - for (i = 0; i < SCTP_CONNTRACK_MAX; i++) - sn->timeouts[i] = sctp_timeouts[i]; - - /* timeouts[0] is unused, init it so ->timeouts[0] contains - * 'new' timeout, like udp or icmp. - */ - sn->timeouts[0] = sctp_timeouts[SCTP_CONNTRACK_CLOSED]; - } - - return 0; -} - -static struct nf_proto_net *sctp_get_net_proto(struct net *net) -{ - return &net->ct.nf_ct_proto.sctp.pn; + /* timeouts[0] is unused, init it so ->timeouts[0] contains + * 'new' timeout, like udp or icmp. + */ + sn->timeouts[0] = sctp_timeouts[SCTP_CONNTRACK_CLOSED]; } const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp = { @@ -691,6 +680,4 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp = { .nla_policy = sctp_timeout_nla_policy, }, #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ - .init_net = sctp_init_net, - .get_net_proto = sctp_get_net_proto, }; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 422bdedac0ed..01c748fa8913 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1387,32 +1387,21 @@ static const struct nla_policy tcp_timeout_nla_policy[CTA_TIMEOUT_TCP_MAX+1] = { }; #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ -static int tcp_init_net(struct net *net) +void nf_conntrack_tcp_init_net(struct net *net) { struct nf_tcp_net *tn = nf_tcp_pernet(net); - struct nf_proto_net *pn = &tn->pn; + int i; - if (!pn->users) { - int i; + for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++) + tn->timeouts[i] = tcp_timeouts[i]; - for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++) - tn->timeouts[i] = tcp_timeouts[i]; - - /* timeouts[0] is unused, make it same as SYN_SENT so - * ->timeouts[0] contains 'new' timeout, like udp or icmp. - */ - tn->timeouts[0] = tcp_timeouts[TCP_CONNTRACK_SYN_SENT]; - tn->tcp_loose = nf_ct_tcp_loose; - tn->tcp_be_liberal = nf_ct_tcp_be_liberal; - tn->tcp_max_retrans = nf_ct_tcp_max_retrans; - } - - return 0; -} - -static struct nf_proto_net *tcp_get_net_proto(struct net *net) -{ - return &net->ct.nf_ct_proto.tcp.pn; + /* timeouts[0] is unused, make it same as SYN_SENT so + * ->timeouts[0] contains 'new' timeout, like udp or icmp. + */ + tn->timeouts[0] = tcp_timeouts[TCP_CONNTRACK_SYN_SENT]; + tn->tcp_loose = nf_ct_tcp_loose; + tn->tcp_be_liberal = nf_ct_tcp_be_liberal; + tn->tcp_max_retrans = nf_ct_tcp_max_retrans; } const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp = @@ -1441,6 +1430,4 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp = .nla_policy = tcp_timeout_nla_policy, }, #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ - .init_net = tcp_init_net, - .get_net_proto = tcp_get_net_proto, }; diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 6e81e79844d7..951366dfbec3 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -260,25 +260,13 @@ udp_timeout_nla_policy[CTA_TIMEOUT_UDP_MAX+1] = { }; #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ - -static int udp_init_net(struct net *net) +void nf_conntrack_udp_init_net(struct net *net) { struct nf_udp_net *un = nf_udp_pernet(net); - struct nf_proto_net *pn = &un->pn; + int i; - if (!pn->users) { - int i; - - for (i = 0; i < UDP_CT_MAX; i++) - un->timeouts[i] = udp_timeouts[i]; - } - - return 0; -} - -static struct nf_proto_net *udp_get_net_proto(struct net *net) -{ - return &net->ct.nf_ct_proto.udp.pn; + for (i = 0; i < UDP_CT_MAX; i++) + un->timeouts[i] = udp_timeouts[i]; } const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp = @@ -300,8 +288,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp = .nla_policy = udp_timeout_nla_policy, }, #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ - .init_net = udp_init_net, - .get_net_proto = udp_get_net_proto, }; #ifdef CONFIG_NF_CT_PROTO_UDPLITE @@ -324,7 +310,5 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite = .nla_policy = udp_timeout_nla_policy, }, #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ - .init_net = udp_init_net, - .get_net_proto = udp_get_net_proto, }; #endif From e56894356f60f9d11bdf53ee5a050a235f6d2b48 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 15 Jan 2019 22:03:46 +0100 Subject: [PATCH 21/33] netfilter: conntrack: remove l4proto destroy hook Only one user (gre), add a direct call and remove this facility. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 3 --- net/netfilter/nf_conntrack_core.c | 15 +++++++++++---- net/netfilter/nf_conntrack_proto_gre.c | 14 -------------- 3 files changed, 11 insertions(+), 21 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index d5909e51ca92..5d1419ac6a38 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -27,9 +27,6 @@ struct nf_conntrack_l4proto { /* protoinfo nlattr size, closes a hole */ u16 nlattr_size; - /* Called when a conntrack entry is destroyed */ - void (*destroy)(struct nf_conn *ct); - /* called by gc worker if table is full */ bool (*can_early_drop)(const struct nf_conn *ct); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 728d2b5bdb1a..52e6c5c6f202 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -524,11 +524,18 @@ void nf_ct_tmpl_free(struct nf_conn *tmpl) } EXPORT_SYMBOL_GPL(nf_ct_tmpl_free); +static void destroy_gre_conntrack(struct nf_conn *ct) +{ + struct nf_conn *master = ct->master; + + if (master) + nf_ct_gre_keymap_destroy(master); +} + static void destroy_conntrack(struct nf_conntrack *nfct) { struct nf_conn *ct = (struct nf_conn *)nfct; - const struct nf_conntrack_l4proto *l4proto; pr_debug("destroy_conntrack(%p)\n", ct); WARN_ON(atomic_read(&nfct->use) != 0); @@ -537,9 +544,9 @@ destroy_conntrack(struct nf_conntrack *nfct) nf_ct_tmpl_free(ct); return; } - l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct)); - if (l4proto->destroy) - l4proto->destroy(ct); + + if (unlikely(nf_ct_protonum(ct) == IPPROTO_GRE)) + destroy_gre_conntrack(ct); local_bh_disable(); /* Expectations will have been removed in clean_from_lists, diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index fa765d2fd586..ee9ab10a32e4 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -249,19 +249,6 @@ int nf_conntrack_gre_packet(struct nf_conn *ct, return NF_ACCEPT; } -/* Called when a conntrack entry has already been removed from the hashes - * and is about to be deleted from memory */ -static void gre_destroy(struct nf_conn *ct) -{ - struct nf_conn *master = ct->master; - pr_debug(" entering\n"); - - if (!master) - pr_debug("no master !?!\n"); - else - nf_ct_gre_keymap_destroy(master); -} - #ifdef CONFIG_NF_CONNTRACK_TIMEOUT #include @@ -329,7 +316,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre = { #ifdef CONFIG_NF_CONNTRACK_PROCFS .print_conntrack = gre_print_conntrack, #endif - .destroy = gre_destroy, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, From 4a60dc748d121b52533a2956567df4f87a3835b1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 15 Jan 2019 22:03:47 +0100 Subject: [PATCH 22/33] netfilter: conntrack: remove nf_ct_l4proto_find_get Its now same as __nf_ct_l4proto_find(), so rename that to nf_ct_l4proto_find and use it everywhere. It never returns NULL and doesn't need locks or reference counts. Before this series: 302824 net/netfilter/nf_conntrack.ko 21504 net/netfilter/nf_conntrack_proto_gre.ko text data bss dec hex filename 6281 1732 4 8017 1f51 nf_conntrack_proto_gre.ko 108356 20613 236 129205 1f8b5 nf_conntrack.ko After: 294864 net/netfilter/nf_conntrack.ko text data bss dec hex filename 106979 19557 240 126776 1ef38 nf_conntrack.ko so, even with builtin gre, total size got reduced. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_core.h | 2 +- include/net/netfilter/nf_conntrack_l4proto.h | 8 +- net/netfilter/nf_conntrack_core.c | 11 +- net/netfilter/nf_conntrack_expect.c | 2 +- net/netfilter/nf_conntrack_netlink.c | 14 +- net/netfilter/nf_conntrack_proto.c | 175 +++---------------- net/netfilter/nf_conntrack_standalone.c | 3 +- net/netfilter/nf_flow_table_core.c | 2 +- net/netfilter/nfnetlink_cttimeout.c | 6 +- net/netfilter/nft_ct.c | 2 +- net/netfilter/xt_CT.c | 2 +- 11 files changed, 45 insertions(+), 182 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 235c182022b2..ae41e92251dd 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -26,7 +26,7 @@ int nf_conntrack_init_net(struct net *net); void nf_conntrack_cleanup_net(struct net *net); void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list); -int nf_conntrack_proto_pernet_init(struct net *net); +void nf_conntrack_proto_pernet_init(struct net *net); void nf_conntrack_proto_pernet_fini(struct net *net); int nf_conntrack_proto_init(void); diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 5d1419ac6a38..778087591983 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -140,13 +140,7 @@ extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic; #define MAX_NF_CT_PROTO IPPROTO_UDPLITE -const struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u8 l4proto); - -const struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u8 l4proto); - -/* Protocol global registration. */ -int nf_ct_l4proto_register_one(const struct nf_conntrack_l4proto *proto); -void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *proto); +const struct nf_conntrack_l4proto *nf_ct_l4proto_find(u8 l4proto); /* Generic netlink helpers */ int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb, diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 52e6c5c6f202..171659aa69a1 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -845,7 +845,7 @@ static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb, enum ip_conntrack_info oldinfo; struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo); - l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct)); + l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct)); if (l4proto->allow_clash && !nf_ct_is_dying(ct) && atomic_inc_not_zero(&ct->ct_general.use)) { @@ -1117,7 +1117,7 @@ static bool gc_worker_can_early_drop(const struct nf_conn *ct) if (!test_bit(IPS_ASSURED_BIT, &ct->status)) return true; - l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct)); + l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct)); if (l4proto->can_early_drop && l4proto->can_early_drop(ct)) return true; @@ -2452,15 +2452,10 @@ int nf_conntrack_init_net(struct net *net) nf_conntrack_tstamp_pernet_init(net); nf_conntrack_ecache_pernet_init(net); nf_conntrack_helper_pernet_init(net); + nf_conntrack_proto_pernet_init(net); - ret = nf_conntrack_proto_pernet_init(net); - if (ret < 0) - goto err_proto; return 0; -err_proto: - nf_conntrack_ecache_pernet_fini(net); - nf_conntrack_expect_pernet_fini(net); err_expect: free_percpu(net->ct.stat); err_pcpu_lists: diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 3034038bfdf0..334d6e5b7762 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -610,7 +610,7 @@ static int exp_seq_show(struct seq_file *s, void *v) expect->tuple.src.l3num, expect->tuple.dst.protonum); print_tuple(s, &expect->tuple, - __nf_ct_l4proto_find(expect->tuple.dst.protonum)); + nf_ct_l4proto_find(expect->tuple.dst.protonum)); if (expect->flags & NF_CT_EXPECT_PERMANENT) { seq_puts(s, "PERMANENT"); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 1213beb5a714..8071bb04a849 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -134,7 +134,7 @@ static int ctnetlink_dump_tuples(struct sk_buff *skb, ret = ctnetlink_dump_tuples_ip(skb, tuple); if (ret >= 0) { - l4proto = __nf_ct_l4proto_find(tuple->dst.protonum); + l4proto = nf_ct_l4proto_find(tuple->dst.protonum); ret = ctnetlink_dump_tuples_proto(skb, tuple, l4proto); } rcu_read_unlock(); @@ -182,7 +182,7 @@ static int ctnetlink_dump_protoinfo(struct sk_buff *skb, struct nf_conn *ct) struct nlattr *nest_proto; int ret; - l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct)); + l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct)); if (!l4proto->to_nlattr) return 0; @@ -590,7 +590,7 @@ static size_t ctnetlink_proto_size(const struct nf_conn *ct) len = nla_policy_len(cta_ip_nla_policy, CTA_IP_MAX + 1); len *= 3u; /* ORIG, REPLY, MASTER */ - l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct)); + l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct)); len += l4proto->nlattr_size; if (l4proto->nlattr_tuple_size) { len4 = l4proto->nlattr_tuple_size(); @@ -1059,7 +1059,7 @@ static int ctnetlink_parse_tuple_proto(struct nlattr *attr, tuple->dst.protonum = nla_get_u8(tb[CTA_PROTO_NUM]); rcu_read_lock(); - l4proto = __nf_ct_l4proto_find(tuple->dst.protonum); + l4proto = nf_ct_l4proto_find(tuple->dst.protonum); if (likely(l4proto->nlattr_to_tuple)) { ret = nla_validate_nested(attr, CTA_PROTO_MAX, @@ -1722,11 +1722,9 @@ static int ctnetlink_change_protoinfo(struct nf_conn *ct, if (err < 0) return err; - rcu_read_lock(); - l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct)); + l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct)); if (l4proto->from_nlattr) err = l4proto->from_nlattr(tb, ct); - rcu_read_unlock(); return err; } @@ -2676,7 +2674,7 @@ static int ctnetlink_exp_dump_mask(struct sk_buff *skb, rcu_read_lock(); ret = ctnetlink_dump_tuples_ip(skb, &m); if (ret >= 0) { - l4proto = __nf_ct_l4proto_find(tuple->dst.protonum); + l4proto = nf_ct_l4proto_find(tuple->dst.protonum); ret = ctnetlink_dump_tuples_proto(skb, &m, l4proto); } rcu_read_unlock(); diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index e6bc02c13f0f..aa8d3fe0b37f 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -43,8 +43,6 @@ extern unsigned int nf_conntrack_net_id; -static struct nf_conntrack_l4proto __rcu *nf_ct_protos[MAX_NF_CT_PROTO + 1] __read_mostly; - static DEFINE_MUTEX(nf_ct_proto_mutex); #ifdef CONFIG_SYSCTL @@ -95,121 +93,32 @@ void nf_ct_l4proto_log_invalid(const struct sk_buff *skb, EXPORT_SYMBOL_GPL(nf_ct_l4proto_log_invalid); #endif -const struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u8 l4proto) +const struct nf_conntrack_l4proto *nf_ct_l4proto_find(u8 l4proto) { - if (unlikely(l4proto >= ARRAY_SIZE(nf_ct_protos))) - return &nf_conntrack_l4proto_generic; - - return rcu_dereference(nf_ct_protos[l4proto]); -} -EXPORT_SYMBOL_GPL(__nf_ct_l4proto_find); - -const struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u8 l4num) -{ - const struct nf_conntrack_l4proto *p; - - rcu_read_lock(); - p = __nf_ct_l4proto_find(l4num); - rcu_read_unlock(); - - return p; -} -EXPORT_SYMBOL_GPL(nf_ct_l4proto_find_get); - -static int kill_l4proto(struct nf_conn *i, void *data) -{ - const struct nf_conntrack_l4proto *l4proto; - l4proto = data; - return nf_ct_protonum(i) == l4proto->l4proto; -} - -/* FIXME: Allow NULL functions and sub in pointers to generic for - them. --RR */ -int nf_ct_l4proto_register_one(const struct nf_conntrack_l4proto *l4proto) -{ - int ret = 0; - - if ((l4proto->to_nlattr && l4proto->nlattr_size == 0) || - (l4proto->tuple_to_nlattr && !l4proto->nlattr_tuple_size)) - return -EINVAL; - - mutex_lock(&nf_ct_proto_mutex); - if (rcu_dereference_protected( - nf_ct_protos[l4proto->l4proto], - lockdep_is_held(&nf_ct_proto_mutex) - ) != &nf_conntrack_l4proto_generic) { - ret = -EBUSY; - goto out_unlock; + switch (l4proto) { + case IPPROTO_UDP: return &nf_conntrack_l4proto_udp; + case IPPROTO_TCP: return &nf_conntrack_l4proto_tcp; + case IPPROTO_ICMP: return &nf_conntrack_l4proto_icmp; +#ifdef CONFIG_NF_CT_PROTO_DCCP + case IPPROTO_DCCP: return &nf_conntrack_l4proto_dccp; +#endif +#ifdef CONFIG_NF_CT_PROTO_SCTP + case IPPROTO_SCTP: return &nf_conntrack_l4proto_sctp; +#endif +#ifdef CONFIG_NF_CT_PROTO_UDPLITE + case IPPROTO_UDPLITE: return &nf_conntrack_l4proto_udplite; +#endif +#ifdef CONFIG_NF_CT_PROTO_GRE + case IPPROTO_GRE: return &nf_conntrack_l4proto_gre; +#endif +#if IS_ENABLED(CONFIG_IPV6) + case IPPROTO_ICMPV6: return &nf_conntrack_l4proto_icmpv6; +#endif /* CONFIG_IPV6 */ } - rcu_assign_pointer(nf_ct_protos[l4proto->l4proto], l4proto); -out_unlock: - mutex_unlock(&nf_ct_proto_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(nf_ct_l4proto_register_one); - -static void __nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto) - -{ - BUG_ON(l4proto->l4proto >= ARRAY_SIZE(nf_ct_protos)); - - BUG_ON(rcu_dereference_protected( - nf_ct_protos[l4proto->l4proto], - lockdep_is_held(&nf_ct_proto_mutex) - ) != l4proto); - rcu_assign_pointer(nf_ct_protos[l4proto->l4proto], - &nf_conntrack_l4proto_generic); -} - -void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto) -{ - mutex_lock(&nf_ct_proto_mutex); - __nf_ct_l4proto_unregister_one(l4proto); - mutex_unlock(&nf_ct_proto_mutex); - - synchronize_net(); - /* Remove all contrack entries for this protocol */ - nf_ct_iterate_destroy(kill_l4proto, (void *)l4proto); -} -EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister_one); - -static void -nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const l4proto[], - unsigned int num_proto) -{ - int i; - - mutex_lock(&nf_ct_proto_mutex); - for (i = 0; i < num_proto; i++) - __nf_ct_l4proto_unregister_one(l4proto[i]); - mutex_unlock(&nf_ct_proto_mutex); - - synchronize_net(); - - for (i = 0; i < num_proto; i++) - nf_ct_iterate_destroy(kill_l4proto, (void *)l4proto[i]); -} - -static int -nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const l4proto[], - unsigned int num_proto) -{ - int ret = -EINVAL; - unsigned int i; - - for (i = 0; i < num_proto; i++) { - ret = nf_ct_l4proto_register_one(l4proto[i]); - if (ret < 0) - break; - } - if (i != num_proto) { - pr_err("nf_conntrack: can't register l4 %d proto.\n", - l4proto[i]->l4proto); - nf_ct_l4proto_unregister(l4proto, i); - } - return ret; -} + return &nf_conntrack_l4proto_generic; +}; +EXPORT_SYMBOL_GPL(nf_ct_l4proto_find); static unsigned int nf_confirm(struct sk_buff *skb, unsigned int protoff, @@ -651,30 +560,9 @@ void nf_ct_netns_put(struct net *net, uint8_t nfproto) } EXPORT_SYMBOL_GPL(nf_ct_netns_put); -static const struct nf_conntrack_l4proto * const builtin_l4proto[] = { - &nf_conntrack_l4proto_tcp, - &nf_conntrack_l4proto_udp, - &nf_conntrack_l4proto_icmp, -#ifdef CONFIG_NF_CT_PROTO_DCCP - &nf_conntrack_l4proto_dccp, -#endif -#ifdef CONFIG_NF_CT_PROTO_SCTP - &nf_conntrack_l4proto_sctp, -#endif -#ifdef CONFIG_NF_CT_PROTO_UDPLITE - &nf_conntrack_l4proto_udplite, -#endif -#ifdef CONFIG_NF_CT_PROTO_GRE - &nf_conntrack_l4proto_gre, -#endif -#if IS_ENABLED(CONFIG_IPV6) - &nf_conntrack_l4proto_icmpv6, -#endif /* CONFIG_IPV6 */ -}; - int nf_conntrack_proto_init(void) { - int ret = 0, i; + int ret; ret = nf_register_sockopt(&so_getorigdst); if (ret < 0) @@ -686,18 +574,8 @@ int nf_conntrack_proto_init(void) goto cleanup_sockopt; #endif - for (i = 0; i < ARRAY_SIZE(nf_ct_protos); i++) - RCU_INIT_POINTER(nf_ct_protos[i], - &nf_conntrack_l4proto_generic); - - ret = nf_ct_l4proto_register(builtin_l4proto, - ARRAY_SIZE(builtin_l4proto)); - if (ret < 0) - goto cleanup_sockopt2; - return ret; -cleanup_sockopt2: - nf_unregister_sockopt(&so_getorigdst); + #if IS_ENABLED(CONFIG_IPV6) cleanup_sockopt: nf_unregister_sockopt(&so_getorigdst6); @@ -713,7 +591,7 @@ void nf_conntrack_proto_fini(void) #endif } -int nf_conntrack_proto_pernet_init(struct net *net) +void nf_conntrack_proto_pernet_init(struct net *net) { nf_conntrack_generic_init_net(net); nf_conntrack_udp_init_net(net); @@ -729,7 +607,6 @@ int nf_conntrack_proto_pernet_init(struct net *net) #ifdef CONFIG_NF_CT_PROTO_GRE nf_conntrack_gre_init_net(net); #endif - return 0; } void nf_conntrack_proto_pernet_fini(struct net *net) diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index d848de713dc0..ddfca5f1784c 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -310,8 +310,7 @@ static int ct_seq_show(struct seq_file *s, void *v) if (!net_eq(nf_ct_net(ct), net)) goto release; - l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct)); - WARN_ON(!l4proto); + l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct)); ret = -ENOSPC; seq_printf(s, "%-8s %u %-8s %u ", diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index fa0844e2a68d..8099f0f778ab 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -120,7 +120,7 @@ static void flow_offload_fixup_ct_state(struct nf_conn *ct) if (l4num == IPPROTO_TCP) flow_offload_fixup_tcp(&ct->proto.tcp); - l4proto = __nf_ct_l4proto_find(l4num); + l4proto = nf_ct_l4proto_find(l4num); if (!l4proto) return; diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 37b4f84ac153..c69b11ca5aad 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -122,7 +122,7 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl, return -EBUSY; } - l4proto = nf_ct_l4proto_find_get(l4num); + l4proto = nf_ct_l4proto_find(l4num); /* This protocol is not supportted, skip. */ if (l4proto->l4proto != l4num) { @@ -357,7 +357,7 @@ static int cttimeout_default_set(struct net *net, struct sock *ctnl, return -EINVAL; l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]); - l4proto = nf_ct_l4proto_find_get(l4num); + l4proto = nf_ct_l4proto_find(l4num); /* This protocol is not supported, skip. */ if (l4proto->l4proto != l4num) { @@ -438,7 +438,7 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl, l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO])); l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]); - l4proto = nf_ct_l4proto_find_get(l4num); + l4proto = nf_ct_l4proto_find(l4num); err = -EOPNOTSUPP; if (l4proto->l4proto != l4num) diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 3249cc059048..7b717fad6cdc 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -870,7 +870,7 @@ static int nft_ct_timeout_obj_init(const struct nft_ctx *ctx, l4num = nla_get_u8(tb[NFTA_CT_TIMEOUT_L4PROTO]); priv->l4proto = l4num; - l4proto = nf_ct_l4proto_find_get(l4num); + l4proto = nf_ct_l4proto_find(l4num); if (l4proto->l4proto != l4num) { ret = -EOPNOTSUPP; diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 2c7a4b80206f..0fa863f57575 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -159,7 +159,7 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par, /* Make sure the timeout policy matches any existing protocol tracker, * otherwise default to generic. */ - l4proto = __nf_ct_l4proto_find(proto); + l4proto = nf_ct_l4proto_find(proto); if (timeout->l4proto->l4proto != l4proto->l4proto) { ret = -EINVAL; pr_info_ratelimited("Timeout policy `%s' can only be used by L%d protocol number %d\n", From ba3fbe663635ae7b33a2d972c5d2def036258e42 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 16 Jan 2019 18:24:17 +0100 Subject: [PATCH 23/33] netfilter: nf_conntrack: provide modparam to always register conntrack hooks The connection tracking hooks can be optionally registered per netns when conntrack is specifically invoked from the ruleset since 0c66dc1ea3f0 ("netfilter: conntrack: register hooks in netns when needed by ruleset"). Then, since 4d3a57f23dec ("netfilter: conntrack: do not enable connection tracking unless needed"), the default behaviour is changed to always register them on demand. This patch provides a toggle that allows users to always register them. Without this toggle, in order to use conntrack for statistics collection, you need a dummy rule that refers to conntrack, eg. iptables -I INPUT -m state --state NEW This patch allows users to restore the original behaviour via modparam, ie. always register connection tracking, eg. modprobe nf_conntrack enable_hooks=1 Hence, no dummy rule is required. Reported-by: Laura Garcia Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_standalone.c | 28 +++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index ddfca5f1784c..8928a4d0933e 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -24,6 +24,10 @@ #include #include +static bool enable_hooks __read_mostly; +MODULE_PARM_DESC(enable_hooks, "Always enable conntrack hooks"); +module_param(enable_hooks, bool, 0000); + unsigned int nf_conntrack_net_id __read_mostly; #ifdef CONFIG_NF_CONNTRACK_PROCFS @@ -1075,6 +1079,15 @@ static void nf_conntrack_standalone_fini_sysctl(struct net *net) } #endif /* CONFIG_SYSCTL */ +static void nf_conntrack_fini_net(struct net *net) +{ + if (enable_hooks) + nf_ct_netns_put(net, NFPROTO_INET); + + nf_conntrack_standalone_fini_proc(net); + nf_conntrack_standalone_fini_sysctl(net); +} + static int nf_conntrack_pernet_init(struct net *net) { int ret; @@ -1093,8 +1106,16 @@ static int nf_conntrack_pernet_init(struct net *net) if (ret < 0) goto out_init_net; + if (enable_hooks) { + ret = nf_ct_netns_get(net, NFPROTO_INET); + if (ret < 0) + goto out_hooks; + } + return 0; +out_hooks: + nf_conntrack_fini_net(net); out_init_net: nf_conntrack_standalone_fini_proc(net); out_proc: @@ -1106,10 +1127,9 @@ static void nf_conntrack_pernet_exit(struct list_head *net_exit_list) { struct net *net; - list_for_each_entry(net, net_exit_list, exit_list) { - nf_conntrack_standalone_fini_sysctl(net); - nf_conntrack_standalone_fini_proc(net); - } + list_for_each_entry(net, net_exit_list, exit_list) + nf_conntrack_fini_net(net); + nf_conntrack_cleanup_net_list(net_exit_list); } From 0fb4d21956f4a9af225594a46857ccf29bd747bc Mon Sep 17 00:00:00 2001 From: wenxu Date: Wed, 16 Jan 2019 07:53:51 +0800 Subject: [PATCH 24/33] netfilter: nft_meta: Add NFT_META_I/OIFKIND meta type In the ip_rcv the skb goes through the PREROUTING hook first, then kicks in vrf device and go through the same hook again. When conntrack dnat works with vrf, there will be some conflict with rules because the packet goes through the hook twice with different nf status. ip link add user1 type vrf table 1 ip link add user2 type vrf table 2 ip l set dev tun1 master user1 ip l set dev tun2 master user2 nft add table firewall nft add chain firewall zones { type filter hook prerouting priority - 300 \; } nft add rule firewall zones counter ct zone set iif map { "tun1" : 1, "tun2" : 2 } nft add chain firewall rule-1000-ingress nft add rule firewall rule-1000-ingress ct zone 1 tcp dport 22 ct state new counter accept nft add rule firewall rule-1000-ingress counter drop nft add chain firewall rule-1000-egress nft add rule firewall rule-1000-egress tcp dport 22 ct state new counter drop nft add rule firewall rule-1000-egress counter accept nft add chain firewall rules-all { type filter hook prerouting priority - 150 \; } nft add rule firewall rules-all ip daddr vmap { "2.2.2.11" : jump rule-1000-ingress } nft add rule firewall rules-all ct zone vmap { 1 : jump rule-1000-egress } nft add rule firewall dnat-all ct zone vmap { 1 : jump dnat-1000 } nft add rule firewall dnat-1000 ip daddr 2.2.2.11 counter dnat to 10.0.0.7 For a package with ip daddr 2.2.2.11 and tcp dport 22, first time accept in the rule-1000-ingress and dnat to 10.0.0.7. Then second time the packet goto the wrong chain rule-1000-egress which leads the packet drop With this patch, userspace can add the 'don't re-do entire ruleset for vrf' policy itself via: nft add rule firewall rules-all meta iifkind "vrf" counter accept Signed-off-by: wenxu Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 4 ++++ net/netfilter/nft_meta.c | 12 ++++++++++++ 2 files changed, 16 insertions(+) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 99ca95b830b6..0ba8f48bdf0b 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -791,6 +791,8 @@ enum nft_exthdr_attributes { * @NFT_META_CGROUP: socket control group (skb->sk->sk_classid) * @NFT_META_PRANDOM: a 32bit pseudo-random number * @NFT_META_SECPATH: boolean, secpath_exists (!!skb->sp) + * @NFT_META_IIFKIND: packet input interface kind name (dev->rtnl_link_ops->kind) + * @NFT_META_OIFKIND: packet output interface kind name (dev->rtnl_link_ops->kind) */ enum nft_meta_keys { NFT_META_LEN, @@ -819,6 +821,8 @@ enum nft_meta_keys { NFT_META_CGROUP, NFT_META_PRANDOM, NFT_META_SECPATH, + NFT_META_IIFKIND, + NFT_META_OIFKIND, }; /** diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 6df486c5ebd3..987d2d6ce624 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -244,6 +244,16 @@ void nft_meta_get_eval(const struct nft_expr *expr, strncpy((char *)dest, p->br->dev->name, IFNAMSIZ); return; #endif + case NFT_META_IIFKIND: + if (in == NULL || in->rtnl_link_ops == NULL) + goto err; + strncpy((char *)dest, in->rtnl_link_ops->kind, IFNAMSIZ); + break; + case NFT_META_OIFKIND: + if (out == NULL || out->rtnl_link_ops == NULL) + goto err; + strncpy((char *)dest, out->rtnl_link_ops->kind, IFNAMSIZ); + break; default: WARN_ON(1); goto err; @@ -340,6 +350,8 @@ static int nft_meta_get_init(const struct nft_ctx *ctx, break; case NFT_META_IIFNAME: case NFT_META_OIFNAME: + case NFT_META_IIFKIND: + case NFT_META_OIFKIND: len = IFNAMSIZ; break; case NFT_META_PRANDOM: From 472caa69183f7eaf30ecb34451826dd5b98d7b3a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 17 Jan 2019 00:11:43 +0100 Subject: [PATCH 25/33] netfilter: nat: un-export nf_nat_used_tuple Not used since 203f2e78200c27e ("netfilter: nat: remove l4proto->unique_tuple") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_nat.h | 4 ---- net/netfilter/nf_nat_core.c | 3 +-- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index a17eb2f8d40e..8aff77cafb8b 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -47,10 +47,6 @@ extern unsigned int nf_nat_alloc_null_binding(struct nf_conn *ct, struct nf_conn_nat *nf_ct_nat_ext_add(struct nf_conn *ct); -/* Is this tuple already taken? (not by us)*/ -int nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple, - const struct nf_conn *ignored_conntrack); - static inline struct nf_conn_nat *nfct_nat(const struct nf_conn *ct) { #if defined(CONFIG_NF_NAT) || defined(CONFIG_NF_NAT_MODULE) diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 44f97b3a215a..35e61038ae96 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -146,7 +146,7 @@ hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple) } /* Is this tuple already taken? (not by us) */ -int +static int nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple, const struct nf_conn *ignored_conntrack) { @@ -161,7 +161,6 @@ nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple, nf_ct_invert_tuple(&reply, tuple); return nf_conntrack_tuple_taken(&reply, ignored_conntrack); } -EXPORT_SYMBOL(nf_nat_used_tuple); static bool nf_nat_inet_in_range(const struct nf_conntrack_tuple *t, const struct nf_nat_range2 *range) From 0123a75e1d57c3df31e536868339c98c02c14917 Mon Sep 17 00:00:00 2001 From: Laura Garcia Liebana Date: Fri, 18 Jan 2019 14:36:29 +0100 Subject: [PATCH 26/33] Revert "netfilter: nft_hash: add map lookups for hashing operations" A better way to implement this from userspace has been found without specific code in the kernel side, revert this. Fixes: b9ccc07e3f31 ("netfilter: nft_hash: add map lookups for hashing operations") Signed-off-by: Laura Garcia Liebana Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 4 +- net/netfilter/nft_hash.c | 121 ----------------------- 2 files changed, 2 insertions(+), 123 deletions(-) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 0ba8f48bdf0b..030302893d96 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -877,8 +877,8 @@ enum nft_hash_attributes { NFTA_HASH_SEED, NFTA_HASH_OFFSET, NFTA_HASH_TYPE, - NFTA_HASH_SET_NAME, - NFTA_HASH_SET_ID, + NFTA_HASH_SET_NAME, /* deprecated */ + NFTA_HASH_SET_ID, /* deprecated */ __NFTA_HASH_MAX, }; #define NFTA_HASH_MAX (__NFTA_HASH_MAX - 1) diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index c2d237144f74..ea658e6c53e3 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -25,7 +25,6 @@ struct nft_jhash { u32 modulus; u32 seed; u32 offset; - struct nft_set *map; }; static void nft_jhash_eval(const struct nft_expr *expr, @@ -42,33 +41,10 @@ static void nft_jhash_eval(const struct nft_expr *expr, regs->data[priv->dreg] = h + priv->offset; } -static void nft_jhash_map_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) -{ - struct nft_jhash *priv = nft_expr_priv(expr); - const void *data = ®s->data[priv->sreg]; - const struct nft_set *map = priv->map; - const struct nft_set_ext *ext; - u32 result; - bool found; - - result = reciprocal_scale(jhash(data, priv->len, priv->seed), - priv->modulus) + priv->offset; - - found = map->ops->lookup(nft_net(pkt), map, &result, &ext); - if (!found) - return; - - nft_data_copy(®s->data[priv->dreg], - nft_set_ext_data(ext), map->dlen); -} - struct nft_symhash { enum nft_registers dreg:8; u32 modulus; u32 offset; - struct nft_set *map; }; static void nft_symhash_eval(const struct nft_expr *expr, @@ -84,28 +60,6 @@ static void nft_symhash_eval(const struct nft_expr *expr, regs->data[priv->dreg] = h + priv->offset; } -static void nft_symhash_map_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) -{ - struct nft_symhash *priv = nft_expr_priv(expr); - struct sk_buff *skb = pkt->skb; - const struct nft_set *map = priv->map; - const struct nft_set_ext *ext; - u32 result; - bool found; - - result = reciprocal_scale(__skb_get_hash_symmetric(skb), - priv->modulus) + priv->offset; - - found = map->ops->lookup(nft_net(pkt), map, &result, &ext); - if (!found) - return; - - nft_data_copy(®s->data[priv->dreg], - nft_set_ext_data(ext), map->dlen); -} - static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = { [NFTA_HASH_SREG] = { .type = NLA_U32 }, [NFTA_HASH_DREG] = { .type = NLA_U32 }, @@ -114,9 +68,6 @@ static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = { [NFTA_HASH_SEED] = { .type = NLA_U32 }, [NFTA_HASH_OFFSET] = { .type = NLA_U32 }, [NFTA_HASH_TYPE] = { .type = NLA_U32 }, - [NFTA_HASH_SET_NAME] = { .type = NLA_STRING, - .len = NFT_SET_MAXNAMELEN - 1 }, - [NFTA_HASH_SET_ID] = { .type = NLA_U32 }, }; static int nft_jhash_init(const struct nft_ctx *ctx, @@ -166,20 +117,6 @@ static int nft_jhash_init(const struct nft_ctx *ctx, NFT_DATA_VALUE, sizeof(u32)); } -static int nft_jhash_map_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]) -{ - struct nft_jhash *priv = nft_expr_priv(expr); - u8 genmask = nft_genmask_next(ctx->net); - - nft_jhash_init(ctx, expr, tb); - priv->map = nft_set_lookup_global(ctx->net, ctx->table, - tb[NFTA_HASH_SET_NAME], - tb[NFTA_HASH_SET_ID], genmask); - return PTR_ERR_OR_ZERO(priv->map); -} - static int nft_symhash_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) @@ -206,20 +143,6 @@ static int nft_symhash_init(const struct nft_ctx *ctx, NFT_DATA_VALUE, sizeof(u32)); } -static int nft_symhash_map_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]) -{ - struct nft_jhash *priv = nft_expr_priv(expr); - u8 genmask = nft_genmask_next(ctx->net); - - nft_symhash_init(ctx, expr, tb); - priv->map = nft_set_lookup_global(ctx->net, ctx->table, - tb[NFTA_HASH_SET_NAME], - tb[NFTA_HASH_SET_ID], genmask); - return PTR_ERR_OR_ZERO(priv->map); -} - static int nft_jhash_dump(struct sk_buff *skb, const struct nft_expr *expr) { @@ -247,18 +170,6 @@ nla_put_failure: return -1; } -static int nft_jhash_map_dump(struct sk_buff *skb, - const struct nft_expr *expr) -{ - const struct nft_jhash *priv = nft_expr_priv(expr); - - if (nft_jhash_dump(skb, expr) || - nla_put_string(skb, NFTA_HASH_SET_NAME, priv->map->name)) - return -1; - - return 0; -} - static int nft_symhash_dump(struct sk_buff *skb, const struct nft_expr *expr) { @@ -279,18 +190,6 @@ nla_put_failure: return -1; } -static int nft_symhash_map_dump(struct sk_buff *skb, - const struct nft_expr *expr) -{ - const struct nft_symhash *priv = nft_expr_priv(expr); - - if (nft_symhash_dump(skb, expr) || - nla_put_string(skb, NFTA_HASH_SET_NAME, priv->map->name)) - return -1; - - return 0; -} - static struct nft_expr_type nft_hash_type; static const struct nft_expr_ops nft_jhash_ops = { .type = &nft_hash_type, @@ -300,14 +199,6 @@ static const struct nft_expr_ops nft_jhash_ops = { .dump = nft_jhash_dump, }; -static const struct nft_expr_ops nft_jhash_map_ops = { - .type = &nft_hash_type, - .size = NFT_EXPR_SIZE(sizeof(struct nft_jhash)), - .eval = nft_jhash_map_eval, - .init = nft_jhash_map_init, - .dump = nft_jhash_map_dump, -}; - static const struct nft_expr_ops nft_symhash_ops = { .type = &nft_hash_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_symhash)), @@ -316,14 +207,6 @@ static const struct nft_expr_ops nft_symhash_ops = { .dump = nft_symhash_dump, }; -static const struct nft_expr_ops nft_symhash_map_ops = { - .type = &nft_hash_type, - .size = NFT_EXPR_SIZE(sizeof(struct nft_symhash)), - .eval = nft_symhash_map_eval, - .init = nft_symhash_map_init, - .dump = nft_symhash_map_dump, -}; - static const struct nft_expr_ops * nft_hash_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) @@ -336,12 +219,8 @@ nft_hash_select_ops(const struct nft_ctx *ctx, type = ntohl(nla_get_be32(tb[NFTA_HASH_TYPE])); switch (type) { case NFT_HASH_SYM: - if (tb[NFTA_HASH_SET_NAME]) - return &nft_symhash_map_ops; return &nft_symhash_ops; case NFT_HASH_JENKINS: - if (tb[NFTA_HASH_SET_NAME]) - return &nft_jhash_map_ops; return &nft_jhash_ops; default: break; From 81e01647fd2c6dcd592c4a005f47ba9ed5a52847 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 21 Jan 2019 14:46:47 +0100 Subject: [PATCH 27/33] netfilter: conntrack: fix IPV6=n builds Stephen Rothwell reports: After merging the netfilter-next tree, today's linux-next build (powerpc ppc64_defconfig) failed like this: ERROR: "nf_conntrack_invert_icmpv6_tuple" [nf_conntrack.ko] undefined! ERROR: "nf_conntrack_icmpv6_packet" [nf_conntrack.ko] undefined! ERROR: "nf_conntrack_icmpv6_init_net" [nf_conntrack.ko] undefined! ERROR: "icmpv6_pkt_to_tuple" [nf_conntrack.ko] undefined! ERROR: "nf_ct_gre_keymap_destroy" [nf_conntrack.ko] undefined! icmpv6 related errors are due to lack of IS_ENABLED(CONFIG_IPV6) (no icmpv6 support is builtin if kernel has CONFIG_IPV6=n), the nf_ct_gre_keymap_destroy error is due to lack of PROTO_GRE check. Fixes: a47c54048162 ("netfilter: conntrack: handle builtin l4proto packet functions via direct calls") Fixes: e2e48b471634 ("netfilter: conntrack: handle icmp pkt_to_tuple helper via direct calls") Fixes: 197c4300aec0 ("netfilter: conntrack: remove invert_tuple callback") Fixes: 2a389de86e4a ("netfilter: conntrack: remove l4proto init and get_net callbacks") Fixes: e56894356f60 ("netfilter: conntrack: remove l4proto destroy hook") Reported-by: Stephen Rothwell Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 8 ++++++++ net/netfilter/nf_conntrack_proto.c | 2 ++ 2 files changed, 10 insertions(+) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 171659aa69a1..a3e5232c2088 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -274,8 +274,10 @@ nf_ct_get_tuple(const struct sk_buff *skb, tuple->dst.dir = IP_CT_DIR_ORIGINAL; switch (protonum) { +#if IS_ENABLED(CONFIG_IPV6) case IPPROTO_ICMPV6: return icmpv6_pkt_to_tuple(skb, dataoff, net, tuple); +#endif case IPPROTO_ICMP: return icmp_pkt_to_tuple(skb, dataoff, net, tuple); #ifdef CONFIG_NF_CT_PROTO_GRE @@ -412,8 +414,10 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, switch (orig->dst.protonum) { case IPPROTO_ICMP: return nf_conntrack_invert_icmp_tuple(inverse, orig); +#if IS_ENABLED(CONFIG_IPV6) case IPPROTO_ICMPV6: return nf_conntrack_invert_icmpv6_tuple(inverse, orig); +#endif } inverse->src.u.all = orig->dst.u.all; @@ -526,10 +530,12 @@ EXPORT_SYMBOL_GPL(nf_ct_tmpl_free); static void destroy_gre_conntrack(struct nf_conn *ct) { +#ifdef CONFIG_NF_CT_PROTO_GRE struct nf_conn *master = ct->master; if (master) nf_ct_gre_keymap_destroy(master); +#endif } static void @@ -1553,8 +1559,10 @@ static int nf_conntrack_handle_packet(struct nf_conn *ct, ctinfo, state); case IPPROTO_ICMP: return nf_conntrack_icmp_packet(ct, skb, ctinfo, state); +#if IS_ENABLED(CONFIG_IPV6) case IPPROTO_ICMPV6: return nf_conntrack_icmpv6_packet(ct, skb, ctinfo, state); +#endif #ifdef CONFIG_NF_CT_PROTO_UDPLITE case IPPROTO_UDPLITE: return nf_conntrack_udplite_packet(ct, skb, dataoff, diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index aa8d3fe0b37f..b9403a266a2e 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -597,7 +597,9 @@ void nf_conntrack_proto_pernet_init(struct net *net) nf_conntrack_udp_init_net(net); nf_conntrack_tcp_init_net(net); nf_conntrack_icmp_init_net(net); +#if IS_ENABLED(CONFIG_IPV6) nf_conntrack_icmpv6_init_net(net); +#endif #ifdef CONFIG_NF_CT_PROTO_DCCP nf_conntrack_dccp_init_net(net); #endif From e2f7cc72cbf42e037229d8bd998966569662442f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 21 Jan 2019 14:46:48 +0100 Subject: [PATCH 28/33] netfilter: conntrack: fix bogus port values for other l4 protocols We must only extract l4 proto information if we can track the layer 4 protocol. Before removal of pkt_to_tuple callback, the code to extract port information was only reached for TCP/UDP/LITE/DCCP/SCTP. The other protocols were handled by the indirect call, and the 'generic' tracker took care of other protocols that have no notion of 'ports'. After removal of the callback we must be more strict here and only init port numbers for those protocols that have ports. Fixes: df5e1629087a ("netfilter: conntrack: remove pkt_to_tuple callback") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 46 +++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 11 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index a3e5232c2088..815956ac5a76 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -222,6 +222,24 @@ static u32 hash_conntrack(const struct net *net, return scale_hash(hash_conntrack_raw(tuple, net)); } +static bool nf_ct_get_tuple_ports(const struct sk_buff *skb, + unsigned int dataoff, + struct nf_conntrack_tuple *tuple) +{ struct { + __be16 sport; + __be16 dport; + } _inet_hdr, *inet_hdr; + + /* Actually only need first 4 bytes to get ports. */ + inet_hdr = skb_header_pointer(skb, dataoff, sizeof(_inet_hdr), &_inet_hdr); + if (!inet_hdr) + return false; + + tuple->src.u.udp.port = inet_hdr->sport; + tuple->dst.u.udp.port = inet_hdr->dport; + return true; +} + static bool nf_ct_get_tuple(const struct sk_buff *skb, unsigned int nhoff, @@ -234,10 +252,6 @@ nf_ct_get_tuple(const struct sk_buff *skb, unsigned int size; const __be32 *ap; __be32 _addrs[8]; - struct { - __be16 sport; - __be16 dport; - } _inet_hdr, *inet_hdr; memset(tuple, 0, sizeof(*tuple)); @@ -284,15 +298,25 @@ nf_ct_get_tuple(const struct sk_buff *skb, case IPPROTO_GRE: return gre_pkt_to_tuple(skb, dataoff, net, tuple); #endif + case IPPROTO_TCP: + case IPPROTO_UDP: /* fallthrough */ + return nf_ct_get_tuple_ports(skb, dataoff, tuple); +#ifdef CONFIG_NF_CT_PROTO_UDPLITE + case IPPROTO_UDPLITE: + return nf_ct_get_tuple_ports(skb, dataoff, tuple); +#endif +#ifdef CONFIG_NF_CT_PROTO_SCTP + case IPPROTO_SCTP: + return nf_ct_get_tuple_ports(skb, dataoff, tuple); +#endif +#ifdef CONFIG_NF_CT_PROTO_DCCP + case IPPROTO_DCCP: + return nf_ct_get_tuple_ports(skb, dataoff, tuple); +#endif + default: + break; } - /* Actually only need first 4 bytes to get ports. */ - inet_hdr = skb_header_pointer(skb, dataoff, sizeof(_inet_hdr), &_inet_hdr); - if (!inet_hdr) - return false; - - tuple->src.u.udp.port = inet_hdr->sport; - tuple->dst.u.udp.port = inet_hdr->dport; return true; } From fe19a8fea7cb59f202ddd30ec2fa9f5bd907b3d2 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Sat, 19 Jan 2019 15:22:38 +0100 Subject: [PATCH 29/33] ipvs: avoid indirect calls when calculating checksums The function pointer ip_vs_protocol->csum_check is only used in protocol specific code, and never in the generic one. Remove the function pointer from struct ip_vs_protocol and call the checksum functions directly. This reduces the performance impact of the Spectre mitigation, and should give a small improvement even with RETPOLINES disabled. Signed-off-by: Matteo Croce Acked-by: Julian Anastasov Acked-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- include/net/ip_vs.h | 3 --- net/netfilter/ipvs/ip_vs_proto_ah_esp.c | 2 -- net/netfilter/ipvs/ip_vs_proto_sctp.c | 8 +++++--- net/netfilter/ipvs/ip_vs_proto_tcp.c | 12 +++++++----- net/netfilter/ipvs/ip_vs_proto_udp.c | 12 +++++++----- 5 files changed, 19 insertions(+), 18 deletions(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index a0d2e0bb9a94..047f9a5ccaad 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -453,9 +453,6 @@ struct ip_vs_protocol { int (*dnat_handler)(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp, struct ip_vs_iphdr *iph); - int (*csum_check)(int af, struct sk_buff *skb, - struct ip_vs_protocol *pp); - const char *(*state_name)(int state); void (*state_transition)(struct ip_vs_conn *cp, int direction, diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c index 5320d39976e1..480598cb0f05 100644 --- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c +++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c @@ -129,7 +129,6 @@ struct ip_vs_protocol ip_vs_protocol_ah = { .conn_out_get = ah_esp_conn_out_get, .snat_handler = NULL, .dnat_handler = NULL, - .csum_check = NULL, .state_transition = NULL, .register_app = NULL, .unregister_app = NULL, @@ -152,7 +151,6 @@ struct ip_vs_protocol ip_vs_protocol_esp = { .conn_out_get = ah_esp_conn_out_get, .snat_handler = NULL, .dnat_handler = NULL, - .csum_check = NULL, .state_transition = NULL, .register_app = NULL, .unregister_app = NULL, diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index b0cd7d08f2a7..bc3d1625ecc8 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -9,6 +9,9 @@ #include #include +static int +sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp); + static int sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, @@ -105,7 +108,7 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, int ret; /* Some checks before mangling */ - if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) + if (!sctp_csum_check(cp->af, skb, pp)) return 0; /* Call application helper if needed */ @@ -152,7 +155,7 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, int ret; /* Some checks before mangling */ - if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) + if (!sctp_csum_check(cp->af, skb, pp)) return 0; /* Call application helper if needed */ @@ -587,7 +590,6 @@ struct ip_vs_protocol ip_vs_protocol_sctp = { .conn_out_get = ip_vs_conn_out_get_proto, .snat_handler = sctp_snat_handler, .dnat_handler = sctp_dnat_handler, - .csum_check = sctp_csum_check, .state_name = sctp_state_name, .state_transition = sctp_state_transition, .app_conn_bind = sctp_app_conn_bind, diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 1770fc6ce960..6a275f989085 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -31,6 +31,9 @@ #include +static int +tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp); + static int tcp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, @@ -166,7 +169,7 @@ tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, int ret; /* Some checks before mangling */ - if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) + if (!tcp_csum_check(cp->af, skb, pp)) return 0; /* Call application helper if needed */ @@ -192,7 +195,7 @@ tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr, cp->dport, cp->vport); if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->ip_summed = (cp->app && pp->csum_check) ? + skb->ip_summed = cp->app ? CHECKSUM_UNNECESSARY : CHECKSUM_NONE; } else { /* full checksum calculation */ @@ -244,7 +247,7 @@ tcp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, int ret; /* Some checks before mangling */ - if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) + if (!tcp_csum_check(cp->af, skb, pp)) return 0; /* @@ -275,7 +278,7 @@ tcp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr, cp->vport, cp->dport); if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->ip_summed = (cp->app && pp->csum_check) ? + skb->ip_summed = cp->app ? CHECKSUM_UNNECESSARY : CHECKSUM_NONE; } else { /* full checksum calculation */ @@ -736,7 +739,6 @@ struct ip_vs_protocol ip_vs_protocol_tcp = { .conn_out_get = ip_vs_conn_out_get_proto, .snat_handler = tcp_snat_handler, .dnat_handler = tcp_dnat_handler, - .csum_check = tcp_csum_check, .state_name = tcp_state_name, .state_transition = tcp_state_transition, .app_conn_bind = tcp_app_conn_bind, diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index 0f53c49025f8..3285718264d5 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -28,6 +28,9 @@ #include #include +static int +udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp); + static int udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, @@ -156,7 +159,7 @@ udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, int ret; /* Some checks before mangling */ - if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) + if (!udp_csum_check(cp->af, skb, pp)) return 0; /* @@ -186,7 +189,7 @@ udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr, cp->dport, cp->vport); if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->ip_summed = (cp->app && pp->csum_check) ? + skb->ip_summed = cp->app ? CHECKSUM_UNNECESSARY : CHECKSUM_NONE; } else { /* full checksum calculation */ @@ -239,7 +242,7 @@ udp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, int ret; /* Some checks before mangling */ - if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) + if (!udp_csum_check(cp->af, skb, pp)) return 0; /* @@ -270,7 +273,7 @@ udp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr, cp->vport, cp->dport); if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->ip_summed = (cp->app && pp->csum_check) ? + skb->ip_summed = cp->app ? CHECKSUM_UNNECESSARY : CHECKSUM_NONE; } else { /* full checksum calculation */ @@ -494,7 +497,6 @@ struct ip_vs_protocol ip_vs_protocol_udp = { .conn_out_get = ip_vs_conn_out_get_proto, .snat_handler = udp_snat_handler, .dnat_handler = udp_dnat_handler, - .csum_check = udp_csum_check, .state_transition = udp_state_transition, .state_name = udp_state_name, .register_app = udp_register_app, From 6ecd754883daffd14e1ecfc1e56aa3c070bb7a60 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Sat, 19 Jan 2019 15:25:35 +0100 Subject: [PATCH 30/33] ipvs: use indirect call wrappers Use the new indirect call wrappers in IPVS when calling the TCP or UDP protocol specific functions. This avoids an indirect calls in IPVS, and reduces the performance impact of the Spectre mitigation. Signed-off-by: Matteo Croce Acked-by: Julian Anastasov Acked-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_core.c | 49 +++++++++++++++++++++++----- net/netfilter/ipvs/ip_vs_proto_tcp.c | 3 +- net/netfilter/ipvs/ip_vs_proto_udp.c | 3 +- 3 files changed, 45 insertions(+), 10 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index fe9abf3cc10a..e969dad66991 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -53,6 +53,7 @@ #endif #include +#include EXPORT_SYMBOL(register_ip_vs_scheduler); @@ -70,6 +71,29 @@ EXPORT_SYMBOL(ip_vs_get_debug_level); #endif EXPORT_SYMBOL(ip_vs_new_conn_out); +#ifdef CONFIG_IP_VS_PROTO_TCP +INDIRECT_CALLABLE_DECLARE(int + tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, + struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)); +#endif + +#ifdef CONFIG_IP_VS_PROTO_UDP +INDIRECT_CALLABLE_DECLARE(int + udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, + struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)); +#endif + +#if defined(CONFIG_IP_VS_PROTO_TCP) && defined(CONFIG_IP_VS_PROTO_UDP) +#define SNAT_CALL(f, ...) \ + INDIRECT_CALL_2(f, tcp_snat_handler, udp_snat_handler, __VA_ARGS__) +#elif defined(CONFIG_IP_VS_PROTO_TCP) +#define SNAT_CALL(f, ...) INDIRECT_CALL_1(f, tcp_snat_handler, __VA_ARGS__) +#elif defined(CONFIG_IP_VS_PROTO_UDP) +#define SNAT_CALL(f, ...) INDIRECT_CALL_1(f, udp_snat_handler, __VA_ARGS__) +#else +#define SNAT_CALL(f, ...) f(__VA_ARGS__) +#endif + static unsigned int ip_vs_net_id __read_mostly; /* netns cnt used for uniqueness */ static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0); @@ -478,7 +502,9 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, */ if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK)) { iph->hdr_flags ^= IP_VS_HDR_INVERSE; - cp = pp->conn_in_get(svc->ipvs, svc->af, skb, iph); + cp = INDIRECT_CALL_1(pp->conn_in_get, + ip_vs_conn_in_get_proto, svc->ipvs, + svc->af, skb, iph); iph->hdr_flags ^= IP_VS_HDR_INVERSE; if (cp) { @@ -972,7 +998,8 @@ static int ip_vs_out_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, ip_vs_fill_iph_skb_icmp(AF_INET, skb, offset, true, &ciph); /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_out_get(ipvs, AF_INET, skb, &ciph); + cp = INDIRECT_CALL_1(pp->conn_out_get, ip_vs_conn_out_get_proto, + ipvs, AF_INET, skb, &ciph); if (!cp) return NF_ACCEPT; @@ -1028,7 +1055,8 @@ static int ip_vs_out_icmp_v6(struct netns_ipvs *ipvs, struct sk_buff *skb, return NF_ACCEPT; /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_out_get(ipvs, AF_INET6, skb, &ciph); + cp = INDIRECT_CALL_1(pp->conn_out_get, ip_vs_conn_out_get_proto, + ipvs, AF_INET6, skb, &ciph); if (!cp) return NF_ACCEPT; @@ -1263,7 +1291,8 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, goto drop; /* mangle the packet */ - if (pp->snat_handler && !pp->snat_handler(skb, pp, cp, iph)) + if (pp->snat_handler && + !SNAT_CALL(pp->snat_handler, skb, pp, cp, iph)) goto drop; #ifdef CONFIG_IP_VS_IPV6 @@ -1389,7 +1418,8 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in /* * Check if the packet belongs to an existing entry */ - cp = pp->conn_out_get(ipvs, af, skb, &iph); + cp = INDIRECT_CALL_1(pp->conn_out_get, ip_vs_conn_out_get_proto, + ipvs, af, skb, &iph); if (likely(cp)) { if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) @@ -1644,7 +1674,8 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related, /* The embedded headers contain source and dest in reverse order. * For IPIP this is error for request, not for reply. */ - cp = pp->conn_in_get(ipvs, AF_INET, skb, &ciph); + cp = INDIRECT_CALL_1(pp->conn_in_get, ip_vs_conn_in_get_proto, + ipvs, AF_INET, skb, &ciph); if (!cp) { int v; @@ -1796,7 +1827,8 @@ static int ip_vs_in_icmp_v6(struct netns_ipvs *ipvs, struct sk_buff *skb, /* The embedded headers contain source and dest in reverse order * if not from localhost */ - cp = pp->conn_in_get(ipvs, AF_INET6, skb, &ciph); + cp = INDIRECT_CALL_1(pp->conn_in_get, ip_vs_conn_in_get_proto, + ipvs, AF_INET6, skb, &ciph); if (!cp) { int v; @@ -1925,7 +1957,8 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int /* * Check if the packet belongs to an existing connection entry */ - cp = pp->conn_in_get(ipvs, af, skb, &iph); + cp = INDIRECT_CALL_1(pp->conn_in_get, ip_vs_conn_in_get_proto, + ipvs, af, skb, &iph); conn_reuse_mode = sysctl_conn_reuse_mode(ipvs); if (conn_reuse_mode && !iph.fragoffs && is_new_conn(skb, &iph) && cp) { diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 6a275f989085..479419759983 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -28,6 +28,7 @@ #include #include #include +#include #include @@ -146,7 +147,7 @@ tcp_partial_csum_update(int af, struct tcphdr *tcph, } -static int +INDIRECT_CALLABLE_SCOPE int tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) { diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index 3285718264d5..646c384910fb 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -136,7 +137,7 @@ udp_partial_csum_update(int af, struct udphdr *uhdr, } -static int +INDIRECT_CALLABLE_SCOPE int udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) { From dd03b1ad26c40195f410a4828fdac9c0b734c1ac Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Sat, 19 Jan 2019 22:50:24 +0100 Subject: [PATCH 31/33] netfilter: nft_counter: remove wrong __percpu of nft_counter_resest()'s arg nft_counter_rest() has its first argument declared as struct nft_counter_percpu_priv __percpu *priv but this structure is not percpu (it only countains a member 'counter' which is, correctly, a pointer to a percpu struct nft_counter). So, remove the '__percpu' from the argument's declaration. Signed-off-by: Luc Van Oostenryck Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_counter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c index a61d7edfc290..1a6b06ce6b5b 100644 --- a/net/netfilter/nft_counter.c +++ b/net/netfilter/nft_counter.c @@ -104,7 +104,7 @@ static void nft_counter_obj_destroy(const struct nft_ctx *ctx, nft_counter_do_destroy(priv); } -static void nft_counter_reset(struct nft_counter_percpu_priv __percpu *priv, +static void nft_counter_reset(struct nft_counter_percpu_priv *priv, struct nft_counter *total) { struct nft_counter *this_cpu; From ac088a88b5d544b7b82f00214b1588b3c88a7fc6 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 23 Jan 2019 12:58:57 -0800 Subject: [PATCH 32/33] netfilter: conntrack: fix error path in nf_conntrack_pernet_init() When nf_ct_netns_get() fails, it should clean up itself, its caller doesn't need to call nf_conntrack_fini_net(). nf_conntrack_init_net() is called after registering sysctl and proc, so its cleanup function should be called before unregistering sysctl and proc. Fixes: ba3fbe663635 ("netfilter: nf_conntrack: provide modparam to always register conntrack hooks") Fixes: b884fa461776 ("netfilter: conntrack: unify sysctl handling") Reported-and-tested-by: syzbot+fcee88b2d87f0539dfe9@syzkaller.appspotmail.com Signed-off-by: Cong Wang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_standalone.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 8928a4d0933e..c2ae14c720b4 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -1115,11 +1115,11 @@ static int nf_conntrack_pernet_init(struct net *net) return 0; out_hooks: - nf_conntrack_fini_net(net); + nf_conntrack_cleanup_net(net); out_init_net: nf_conntrack_standalone_fini_proc(net); out_proc: - nf_conntrack_cleanup_net(net); + nf_conntrack_standalone_fini_sysctl(net); return ret; } From 83f529281d7aa42b10c2c5cb64fcbd2c7cab4409 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 27 Jan 2019 19:18:57 +0100 Subject: [PATCH 33/33] netfilter: ipv4: remove useless export_symbol Only one caller; place it where needed and get rid of the EXPORT_SYMBOL. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv4.h | 6 ------ net/ipv4/netfilter.c | 18 ------------------ net/netfilter/utils.c | 19 +++++++++++++++++++ 3 files changed, 19 insertions(+), 24 deletions(-) diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h index 95ab5cc64422..082e2c41b7ff 100644 --- a/include/linux/netfilter_ipv4.h +++ b/include/linux/netfilter_ipv4.h @@ -25,7 +25,6 @@ __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol); int nf_ip_route(struct net *net, struct dst_entry **dst, struct flowi *fl, bool strict); -int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry); #else static inline __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol) @@ -37,11 +36,6 @@ static inline int nf_ip_route(struct net *net, struct dst_entry **dst, { return -EOPNOTSUPP; } -static inline int nf_ip_reroute(struct sk_buff *skb, - const struct nf_queue_entry *entry) -{ - return -EOPNOTSUPP; -} #endif /* CONFIG_INET */ #endif /*__LINUX_IP_NETFILTER_H*/ diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 8d2e5dc9a827..a058213b77a7 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -80,24 +80,6 @@ int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_t } EXPORT_SYMBOL(ip_route_me_harder); -int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry) -{ - const struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry); - - if (entry->state.hook == NF_INET_LOCAL_OUT) { - const struct iphdr *iph = ip_hdr(skb); - - if (!(iph->tos == rt_info->tos && - skb->mark == rt_info->mark && - iph->daddr == rt_info->daddr && - iph->saddr == rt_info->saddr)) - return ip_route_me_harder(entry->state.net, skb, - RTN_UNSPEC); - } - return 0; -} -EXPORT_SYMBOL_GPL(nf_ip_reroute); - int nf_ip_route(struct net *net, struct dst_entry **dst, struct flowi *fl, bool strict __always_unused) { diff --git a/net/netfilter/utils.c b/net/netfilter/utils.c index e8da9a9bba73..55af9f247993 100644 --- a/net/netfilter/utils.c +++ b/net/netfilter/utils.c @@ -180,6 +180,25 @@ int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl, } EXPORT_SYMBOL_GPL(nf_route); +static int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry) +{ +#ifdef CONFIG_INET + const struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry); + + if (entry->state.hook == NF_INET_LOCAL_OUT) { + const struct iphdr *iph = ip_hdr(skb); + + if (!(iph->tos == rt_info->tos && + skb->mark == rt_info->mark && + iph->daddr == rt_info->daddr && + iph->saddr == rt_info->saddr)) + return ip_route_me_harder(entry->state.net, skb, + RTN_UNSPEC); + } +#endif + return 0; +} + int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry) { const struct nf_ipv6_ops *v6ops;