diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 8ebb64193757..8ec48466410a 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -227,7 +227,7 @@ struct xt_table { unsigned int valid_hooks; /* Man behind the curtain... */ - struct xt_table_info __rcu *private; + struct xt_table_info *private; /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; @@ -376,7 +376,7 @@ static inline unsigned int xt_write_recseq_begin(void) * since addend is most likely 1 */ __this_cpu_add(xt_recseq.sequence, addend); - smp_wmb(); + smp_mb(); return addend; } @@ -448,9 +448,6 @@ xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu) struct nf_hook_ops *xt_hook_ops_alloc(const struct xt_table *, nf_hookfn *); -struct xt_table_info -*xt_table_get_private_protected(const struct xt_table *table); - #ifdef CONFIG_COMPAT #include diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index fdec57d862b7..5aaced6bf13e 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1536,6 +1536,7 @@ struct nft_trans_flowtable { struct nft_flowtable *flowtable; bool update; struct list_head hook_list; + u32 flags; }; #define nft_trans_flowtable(trans) \ @@ -1544,6 +1545,8 @@ struct nft_trans_flowtable { (((struct nft_trans_flowtable *)trans->data)->update) #define nft_trans_flowtable_hooks(trans) \ (((struct nft_trans_flowtable *)trans->data)->hook_list) +#define nft_trans_flowtable_flags(trans) \ + (((struct nft_trans_flowtable *)trans->data)->flags) int __init nft_chain_filter_init(void); void nft_chain_filter_fini(void); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index c576a63d09db..d1e04d2b5170 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -203,7 +203,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, local_bh_disable(); addend = xt_write_recseq_begin(); - private = rcu_access_pointer(table->private); + private = READ_ONCE(table->private); /* Address dependency. */ cpu = smp_processor_id(); table_base = private->entries; jumpstack = (struct arpt_entry **)private->jumpstack[cpu]; @@ -649,7 +649,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; - const struct xt_table_info *private = xt_table_get_private_protected(table); + const struct xt_table_info *private = table->private; /* We need atomic snapshot of counters: rest doesn't change * (other than comefrom, which userspace doesn't care @@ -673,7 +673,7 @@ static int copy_entries_to_user(unsigned int total_size, unsigned int off, num; const struct arpt_entry *e; struct xt_counters *counters; - struct xt_table_info *private = xt_table_get_private_protected(table); + struct xt_table_info *private = table->private; int ret = 0; void *loc_cpu_entry; @@ -807,7 +807,7 @@ static int get_info(struct net *net, void __user *user, const int *len) t = xt_request_find_table_lock(net, NFPROTO_ARP, name); if (!IS_ERR(t)) { struct arpt_getinfo info; - const struct xt_table_info *private = xt_table_get_private_protected(t); + const struct xt_table_info *private = t->private; #ifdef CONFIG_COMPAT struct xt_table_info tmp; @@ -860,7 +860,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr, t = xt_find_table_lock(net, NFPROTO_ARP, get.name); if (!IS_ERR(t)) { - const struct xt_table_info *private = xt_table_get_private_protected(t); + const struct xt_table_info *private = t->private; if (get.size == private->size) ret = copy_entries_to_user(private->size, @@ -1017,7 +1017,7 @@ static int do_add_counters(struct net *net, sockptr_t arg, unsigned int len) } local_bh_disable(); - private = xt_table_get_private_protected(t); + private = t->private; if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; @@ -1330,7 +1330,7 @@ static int compat_copy_entries_to_user(unsigned int total_size, void __user *userptr) { struct xt_counters *counters; - const struct xt_table_info *private = xt_table_get_private_protected(table); + const struct xt_table_info *private = table->private; void __user *pos; unsigned int size; int ret = 0; @@ -1379,7 +1379,7 @@ static int compat_get_entries(struct net *net, xt_compat_lock(NFPROTO_ARP); t = xt_find_table_lock(net, NFPROTO_ARP, get.name); if (!IS_ERR(t)) { - const struct xt_table_info *private = xt_table_get_private_protected(t); + const struct xt_table_info *private = t->private; struct xt_table_info info; ret = compat_table_info(private, &info); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index e8f6f9d86237..f15bc21d7301 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -258,7 +258,7 @@ ipt_do_table(struct sk_buff *skb, WARN_ON(!(table->valid_hooks & (1 << hook))); local_bh_disable(); addend = xt_write_recseq_begin(); - private = rcu_access_pointer(table->private); + private = READ_ONCE(table->private); /* Address dependency. */ cpu = smp_processor_id(); table_base = private->entries; jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; @@ -791,7 +791,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; - const struct xt_table_info *private = xt_table_get_private_protected(table); + const struct xt_table_info *private = table->private; /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care @@ -815,7 +815,7 @@ copy_entries_to_user(unsigned int total_size, unsigned int off, num; const struct ipt_entry *e; struct xt_counters *counters; - const struct xt_table_info *private = xt_table_get_private_protected(table); + const struct xt_table_info *private = table->private; int ret = 0; const void *loc_cpu_entry; @@ -964,7 +964,7 @@ static int get_info(struct net *net, void __user *user, const int *len) t = xt_request_find_table_lock(net, AF_INET, name); if (!IS_ERR(t)) { struct ipt_getinfo info; - const struct xt_table_info *private = xt_table_get_private_protected(t); + const struct xt_table_info *private = t->private; #ifdef CONFIG_COMPAT struct xt_table_info tmp; @@ -1018,7 +1018,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr, t = xt_find_table_lock(net, AF_INET, get.name); if (!IS_ERR(t)) { - const struct xt_table_info *private = xt_table_get_private_protected(t); + const struct xt_table_info *private = t->private; if (get.size == private->size) ret = copy_entries_to_user(private->size, t, uptr->entrytable); @@ -1173,7 +1173,7 @@ do_add_counters(struct net *net, sockptr_t arg, unsigned int len) } local_bh_disable(); - private = xt_table_get_private_protected(t); + private = t->private; if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; @@ -1543,7 +1543,7 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, void __user *userptr) { struct xt_counters *counters; - const struct xt_table_info *private = xt_table_get_private_protected(table); + const struct xt_table_info *private = table->private; void __user *pos; unsigned int size; int ret = 0; @@ -1589,7 +1589,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr, xt_compat_lock(AF_INET); t = xt_find_table_lock(net, AF_INET, get.name); if (!IS_ERR(t)) { - const struct xt_table_info *private = xt_table_get_private_protected(t); + const struct xt_table_info *private = t->private; struct xt_table_info info; ret = compat_table_info(private, &info); if (!ret && get.size == info.size) diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 0d453fa9e327..2e2119bfcf13 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -280,7 +280,7 @@ ip6t_do_table(struct sk_buff *skb, local_bh_disable(); addend = xt_write_recseq_begin(); - private = rcu_access_pointer(table->private); + private = READ_ONCE(table->private); /* Address dependency. */ cpu = smp_processor_id(); table_base = private->entries; jumpstack = (struct ip6t_entry **)private->jumpstack[cpu]; @@ -807,7 +807,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; - const struct xt_table_info *private = xt_table_get_private_protected(table); + const struct xt_table_info *private = table->private; /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care @@ -831,7 +831,7 @@ copy_entries_to_user(unsigned int total_size, unsigned int off, num; const struct ip6t_entry *e; struct xt_counters *counters; - const struct xt_table_info *private = xt_table_get_private_protected(table); + const struct xt_table_info *private = table->private; int ret = 0; const void *loc_cpu_entry; @@ -980,7 +980,7 @@ static int get_info(struct net *net, void __user *user, const int *len) t = xt_request_find_table_lock(net, AF_INET6, name); if (!IS_ERR(t)) { struct ip6t_getinfo info; - const struct xt_table_info *private = xt_table_get_private_protected(t); + const struct xt_table_info *private = t->private; #ifdef CONFIG_COMPAT struct xt_table_info tmp; @@ -1035,7 +1035,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr, t = xt_find_table_lock(net, AF_INET6, get.name); if (!IS_ERR(t)) { - struct xt_table_info *private = xt_table_get_private_protected(t); + struct xt_table_info *private = t->private; if (get.size == private->size) ret = copy_entries_to_user(private->size, t, uptr->entrytable); @@ -1189,7 +1189,7 @@ do_add_counters(struct net *net, sockptr_t arg, unsigned int len) } local_bh_disable(); - private = xt_table_get_private_protected(t); + private = t->private; if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; @@ -1552,7 +1552,7 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, void __user *userptr) { struct xt_counters *counters; - const struct xt_table_info *private = xt_table_get_private_protected(table); + const struct xt_table_info *private = table->private; void __user *pos; unsigned int size; int ret = 0; @@ -1598,7 +1598,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr, xt_compat_lock(AF_INET6); t = xt_find_table_lock(net, AF_INET6, get.name); if (!IS_ERR(t)) { - const struct xt_table_info *private = xt_table_get_private_protected(t); + const struct xt_table_info *private = t->private; struct xt_table_info info; ret = compat_table_info(private, &info); if (!ret && get.size == info.size) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 1469365bac7e..1d519b0e51a5 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2962,6 +2962,7 @@ static int ctnetlink_exp_dump_mask(struct sk_buff *skb, memset(&m, 0xFF, sizeof(m)); memcpy(&m.src.u3, &mask->src.u3, sizeof(m.src.u3)); m.src.u.all = mask->src.u.all; + m.src.l3num = tuple->src.l3num; m.dst.protonum = tuple->dst.protonum; nest_parms = nla_nest_start(skb, CTA_EXPECT_MASK); diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 5b05487a60d2..db11e403d818 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -218,9 +218,6 @@ int nf_conntrack_gre_packet(struct nf_conn *ct, enum ip_conntrack_info ctinfo, const struct nf_hook_state *state) { - if (state->pf != NFPROTO_IPV4) - return -NF_ACCEPT; - if (!nf_ct_is_confirmed(ct)) { unsigned int *timeouts = nf_ct_timeout_lookup(ct); diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 5fa657b8e03d..c77ba8690ed8 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -506,7 +506,7 @@ int nf_flow_table_init(struct nf_flowtable *flowtable) { int err; - INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc); + INIT_DELAYED_WORK(&flowtable->gc_work, nf_flow_offload_work_gc); flow_block_init(&flowtable->flow_block); init_rwsem(&flowtable->flow_block_lock); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 224c8e537cb3..f57f1a6ba96f 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -6783,6 +6783,9 @@ static int nft_register_flowtable_net_hooks(struct net *net, list_for_each_entry(hook, hook_list, list) { list_for_each_entry(ft, &table->flowtables, list) { + if (!nft_is_active_next(net, ft)) + continue; + list_for_each_entry(hook2, &ft->hook_list, list) { if (hook->ops.dev == hook2->ops.dev && hook->ops.pf == hook2->ops.pf) { @@ -6842,6 +6845,7 @@ static int nft_flowtable_update(struct nft_ctx *ctx, const struct nlmsghdr *nlh, struct nft_hook *hook, *next; struct nft_trans *trans; bool unregister = false; + u32 flags; int err; err = nft_flowtable_parse_hook(ctx, nla[NFTA_FLOWTABLE_HOOK], @@ -6856,6 +6860,17 @@ static int nft_flowtable_update(struct nft_ctx *ctx, const struct nlmsghdr *nlh, } } + if (nla[NFTA_FLOWTABLE_FLAGS]) { + flags = ntohl(nla_get_be32(nla[NFTA_FLOWTABLE_FLAGS])); + if (flags & ~NFT_FLOWTABLE_MASK) + return -EOPNOTSUPP; + if ((flowtable->data.flags & NFT_FLOWTABLE_HW_OFFLOAD) ^ + (flags & NFT_FLOWTABLE_HW_OFFLOAD)) + return -EOPNOTSUPP; + } else { + flags = flowtable->data.flags; + } + err = nft_register_flowtable_net_hooks(ctx->net, ctx->table, &flowtable_hook.list, flowtable); if (err < 0) @@ -6869,6 +6884,7 @@ static int nft_flowtable_update(struct nft_ctx *ctx, const struct nlmsghdr *nlh, goto err_flowtable_update_hook; } + nft_trans_flowtable_flags(trans) = flags; nft_trans_flowtable(trans) = flowtable; nft_trans_flowtable_update(trans) = true; INIT_LIST_HEAD(&nft_trans_flowtable_hooks(trans)); @@ -6963,8 +6979,10 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk, if (nla[NFTA_FLOWTABLE_FLAGS]) { flowtable->data.flags = ntohl(nla_get_be32(nla[NFTA_FLOWTABLE_FLAGS])); - if (flowtable->data.flags & ~NFT_FLOWTABLE_MASK) + if (flowtable->data.flags & ~NFT_FLOWTABLE_MASK) { + err = -EOPNOTSUPP; goto err3; + } } write_pnet(&flowtable->data.net, net); @@ -8176,6 +8194,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) break; case NFT_MSG_NEWFLOWTABLE: if (nft_trans_flowtable_update(trans)) { + nft_trans_flowtable(trans)->data.flags = + nft_trans_flowtable_flags(trans); nf_tables_flowtable_notify(&trans->ctx, nft_trans_flowtable(trans), &nft_trans_flowtable_hooks(trans), diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index bce6ca203d46..6bd31a7a27fc 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1351,14 +1351,6 @@ struct xt_counters *xt_counters_alloc(unsigned int counters) } EXPORT_SYMBOL(xt_counters_alloc); -struct xt_table_info -*xt_table_get_private_protected(const struct xt_table *table) -{ - return rcu_dereference_protected(table->private, - mutex_is_locked(&xt[table->af].mutex)); -} -EXPORT_SYMBOL(xt_table_get_private_protected); - struct xt_table_info * xt_replace_table(struct xt_table *table, unsigned int num_counters, @@ -1366,6 +1358,7 @@ xt_replace_table(struct xt_table *table, int *error) { struct xt_table_info *private; + unsigned int cpu; int ret; ret = xt_jumpstack_alloc(newinfo); @@ -1375,20 +1368,47 @@ xt_replace_table(struct xt_table *table, } /* Do the substitution. */ - private = xt_table_get_private_protected(table); + local_bh_disable(); + private = table->private; /* Check inside lock: is the old number correct? */ if (num_counters != private->number) { pr_debug("num_counters != table->private->number (%u/%u)\n", num_counters, private->number); + local_bh_enable(); *error = -EAGAIN; return NULL; } newinfo->initial_entries = private->initial_entries; + /* + * Ensure contents of newinfo are visible before assigning to + * private. + */ + smp_wmb(); + table->private = newinfo; - rcu_assign_pointer(table->private, newinfo); - synchronize_rcu(); + /* make sure all cpus see new ->private value */ + smp_mb(); + + /* + * Even though table entries have now been swapped, other CPU's + * may still be using the old entries... + */ + local_bh_enable(); + + /* ... so wait for even xt_recseq on all cpus */ + for_each_possible_cpu(cpu) { + seqcount_t *s = &per_cpu(xt_recseq, cpu); + u32 seq = raw_read_seqcount(s); + + if (seq & 1) { + do { + cond_resched(); + cpu_relax(); + } while (seq == raw_read_seqcount(s)); + } + } audit_log_nfcfg(table->name, table->af, private->number, !private->number ? AUDIT_XT_OP_REGISTER : @@ -1424,12 +1444,12 @@ struct xt_table *xt_register_table(struct net *net, } /* Simplifies replace_table code. */ - rcu_assign_pointer(table->private, bootstrap); + table->private = bootstrap; if (!xt_replace_table(table, 0, newinfo, &ret)) goto unlock; - private = xt_table_get_private_protected(table); + private = table->private; pr_debug("table->private->number = %u\n", private->number); /* save number of initial entries */ @@ -1452,8 +1472,7 @@ void *xt_unregister_table(struct xt_table *table) struct xt_table_info *private; mutex_lock(&xt[table->af].mutex); - private = xt_table_get_private_protected(table); - RCU_INIT_POINTER(table->private, NULL); + private = table->private; list_del(&table->list); mutex_unlock(&xt[table->af].mutex); audit_log_nfcfg(table->name, table->af, private->number,