Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next

Pablo Neira Ayuso says:

====================
Netfilter updates for net-next

The following patchset contains Netfilter updates for net-next:

1) Use nf_flow_offload_tuple() to fetch flow stats, from Paul Blakey.

2) Add new xt_IDLETIMER hard mode, from Manoj Basapathi.
   Follow up patch to clean up this new mode, from Dan Carpenter.

3) Add support for geneve tunnel options, from Xin Long.

4) Make sets built-in and remove modular infrastructure for sets,
   from Florian Westphal.

5) Remove unused TEMPLATE_NULLS_VAL, from Li RongQing.

6) Statify nft_pipapo_get, from Chen Wandun.

7) Use C99 flexible-array member, from Gustavo A. R. Silva.

8) More descriptive variable names for bitwise, from Jeremy Sowden.

9) Four patches to add tunnel device hardware offload to the flowtable
   infrastructure, from wenxu.

10) pipapo set supports for 8-bit grouping, from Stefano Brivio.

11) pipapo can switch between nibble and byte grouping, also from
    Stefano.

12) Add AVX2 vectorized version of pipapo, from Stefano Brivio.

13) Update pipapo to be use it for single ranges, from Stefano.

14) Add stateful expression support to elements via control plane,
    eg. counter per element.

15) Re-visit sysctls in unprivileged namespaces, from Florian Westphal.

15) Add new egress hook, from Lukas Wunner.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2020-03-17 23:51:31 -07:00
commit a58741ef1e
52 changed files with 2781 additions and 581 deletions

View file

@ -1751,6 +1751,7 @@ enum netdev_priv_flags {
* @xps_maps: XXX: need comments on this one
* @miniq_egress: clsact qdisc specific data for
* egress processing
* @nf_hooks_egress: netfilter hooks executed for egress packets
* @qdisc_hash: qdisc hash table
* @watchdog_timeo: Represents the timeout that is used by
* the watchdog (see dev_watchdog())
@ -2026,6 +2027,9 @@ struct net_device {
#ifdef CONFIG_NET_CLS_ACT
struct mini_Qdisc __rcu *miniq_egress;
#endif
#ifdef CONFIG_NETFILTER_EGRESS
struct nf_hook_entries __rcu *nf_hooks_egress;
#endif
#ifdef CONFIG_NET_SCHED
DECLARE_HASHTABLE (qdisc_hash, 4);

View file

@ -98,7 +98,7 @@ struct ip_set_counter {
struct ip_set_comment_rcu {
struct rcu_head rcu;
char str[0];
char str[];
};
struct ip_set_comment {

View file

@ -264,7 +264,7 @@ struct xt_table_info {
unsigned int stacksize;
void ***jumpstack;
unsigned char entries[0] __aligned(8);
unsigned char entries[] __aligned(8);
};
int xt_register_target(struct xt_target *target);
@ -464,7 +464,7 @@ struct compat_xt_entry_match {
} kernel;
u_int16_t match_size;
} u;
unsigned char data[0];
unsigned char data[];
};
struct compat_xt_entry_target {
@ -480,7 +480,7 @@ struct compat_xt_entry_target {
} kernel;
u_int16_t target_size;
} u;
unsigned char data[0];
unsigned char data[];
};
/* FIXME: this works only on 32 bit tasks
@ -494,7 +494,7 @@ struct compat_xt_counters {
struct compat_xt_counters_info {
char name[XT_TABLE_MAXNAMELEN];
compat_uint_t num_counters;
struct compat_xt_counters counters[0];
struct compat_xt_counters counters[];
};
struct _compat_xt_align {

View file

@ -67,7 +67,7 @@ struct compat_arpt_entry {
__u16 next_offset;
compat_uint_t comefrom;
struct compat_xt_counters counters;
unsigned char elems[0];
unsigned char elems[];
};
static inline struct xt_entry_target *

View file

@ -85,7 +85,7 @@ struct ebt_table_info {
/* room to maintain the stack used for jumping from and into udc */
struct ebt_chainstack **chainstack;
char *entries;
struct ebt_counter counters[0] ____cacheline_aligned;
struct ebt_counter counters[] ____cacheline_aligned;
};
struct ebt_table {

View file

@ -1,58 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _NETFILTER_INGRESS_H_
#define _NETFILTER_INGRESS_H_
#include <linux/netfilter.h>
#include <linux/netdevice.h>
#ifdef CONFIG_NETFILTER_INGRESS
static inline bool nf_hook_ingress_active(const struct sk_buff *skb)
{
#ifdef CONFIG_JUMP_LABEL
if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_INGRESS]))
return false;
#endif
return rcu_access_pointer(skb->dev->nf_hooks_ingress);
}
/* caller must hold rcu_read_lock */
static inline int nf_hook_ingress(struct sk_buff *skb)
{
struct nf_hook_entries *e = rcu_dereference(skb->dev->nf_hooks_ingress);
struct nf_hook_state state;
int ret;
/* Must recheck the ingress hook head, in the event it became NULL
* after the check in nf_hook_ingress_active evaluated to true.
*/
if (unlikely(!e))
return 0;
nf_hook_state_init(&state, NF_NETDEV_INGRESS,
NFPROTO_NETDEV, skb->dev, NULL, NULL,
dev_net(skb->dev), NULL);
ret = nf_hook_slow(skb, &state, e, 0);
if (ret == 0)
return -1;
return ret;
}
static inline void nf_hook_ingress_init(struct net_device *dev)
{
RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL);
}
#else /* CONFIG_NETFILTER_INGRESS */
static inline int nf_hook_ingress_active(struct sk_buff *skb)
{
return 0;
}
static inline int nf_hook_ingress(struct sk_buff *skb)
{
return 0;
}
static inline void nf_hook_ingress_init(struct net_device *dev) {}
#endif /* CONFIG_NETFILTER_INGRESS */
#endif /* _NETFILTER_INGRESS_H_ */

View file

@ -76,7 +76,7 @@ struct compat_ipt_entry {
__u16 next_offset;
compat_uint_t comefrom;
struct compat_xt_counters counters;
unsigned char elems[0];
unsigned char elems[];
};
/* Helper functions */

View file

@ -43,7 +43,7 @@ struct compat_ip6t_entry {
__u16 next_offset;
compat_uint_t comefrom;
struct compat_xt_counters counters;
unsigned char elems[0];
unsigned char elems[];
};
static inline struct xt_entry_target *

View file

@ -0,0 +1,102 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _NETFILTER_NETDEV_H_
#define _NETFILTER_NETDEV_H_
#include <linux/netfilter.h>
#include <linux/netdevice.h>
#ifdef CONFIG_NETFILTER
static __always_inline bool nf_hook_netdev_active(enum nf_dev_hooks hooknum,
struct nf_hook_entries __rcu *hooks)
{
#ifdef CONFIG_JUMP_LABEL
if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][hooknum]))
return false;
#endif
return rcu_access_pointer(hooks);
}
/* caller must hold rcu_read_lock */
static __always_inline int nf_hook_netdev(struct sk_buff *skb,
enum nf_dev_hooks hooknum,
struct nf_hook_entries __rcu *hooks)
{
struct nf_hook_entries *e = rcu_dereference(hooks);
struct nf_hook_state state;
int ret;
/* Must recheck the hook head, in the event it became NULL
* after the check in nf_hook_netdev_active evaluated to true.
*/
if (unlikely(!e))
return 0;
nf_hook_state_init(&state, hooknum,
NFPROTO_NETDEV, skb->dev, NULL, NULL,
dev_net(skb->dev), NULL);
ret = nf_hook_slow(skb, &state, e, 0);
if (ret == 0)
return -1;
return ret;
}
#endif /* CONFIG_NETFILTER */
static inline void nf_hook_netdev_init(struct net_device *dev)
{
#ifdef CONFIG_NETFILTER_INGRESS
RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL);
#endif
#ifdef CONFIG_NETFILTER_EGRESS
RCU_INIT_POINTER(dev->nf_hooks_egress, NULL);
#endif
}
#ifdef CONFIG_NETFILTER_INGRESS
static inline bool nf_hook_ingress_active(const struct sk_buff *skb)
{
return nf_hook_netdev_active(NF_NETDEV_INGRESS,
skb->dev->nf_hooks_ingress);
}
static inline int nf_hook_ingress(struct sk_buff *skb)
{
return nf_hook_netdev(skb, NF_NETDEV_INGRESS,
skb->dev->nf_hooks_ingress);
}
#else /* CONFIG_NETFILTER_INGRESS */
static inline int nf_hook_ingress_active(struct sk_buff *skb)
{
return 0;
}
static inline int nf_hook_ingress(struct sk_buff *skb)
{
return 0;
}
#endif /* CONFIG_NETFILTER_INGRESS */
#ifdef CONFIG_NETFILTER_EGRESS
static inline bool nf_hook_egress_active(const struct sk_buff *skb)
{
return nf_hook_netdev_active(NF_NETDEV_EGRESS,
skb->dev->nf_hooks_egress);
}
static inline int nf_hook_egress(struct sk_buff *skb)
{
return nf_hook_netdev(skb, NF_NETDEV_EGRESS,
skb->dev->nf_hooks_egress);
}
#else /* CONFIG_NETFILTER_EGRESS */
static inline int nf_hook_egress_active(struct sk_buff *skb)
{
return 0;
}
static inline int nf_hook_egress(struct sk_buff *skb)
{
return 0;
}
#endif /* CONFIG_NETFILTER_EGRESS */
#endif /* _NETFILTER_INGRESS_H_ */

View file

@ -45,7 +45,7 @@ enum nf_ct_ext_id {
struct nf_ct_ext {
u8 offset[NF_CT_EXT_NUM];
u8 len;
char data[0];
char data[];
};
static inline bool __nf_ct_ext_exist(const struct nf_ct_ext *ext, u8 id)

View file

@ -14,7 +14,7 @@
struct nf_ct_timeout {
__u16 l3num;
const struct nf_conntrack_l4proto *l4proto;
char data[0];
char data[];
};
struct ctnl_timeout {

View file

@ -19,11 +19,17 @@ enum flow_offload_tuple_dir;
struct nf_flow_key {
struct flow_dissector_key_meta meta;
struct flow_dissector_key_control control;
struct flow_dissector_key_control enc_control;
struct flow_dissector_key_basic basic;
union {
struct flow_dissector_key_ipv4_addrs ipv4;
struct flow_dissector_key_ipv6_addrs ipv6;
};
struct flow_dissector_key_keyid enc_key_id;
union {
struct flow_dissector_key_ipv4_addrs enc_ipv4;
struct flow_dissector_key_ipv6_addrs enc_ipv6;
};
struct flow_dissector_key_tcp tcp;
struct flow_dissector_key_ports tp;
} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */

View file

@ -224,7 +224,7 @@ int nft_validate_register_store(const struct nft_ctx *ctx,
*/
struct nft_userdata {
u8 len;
unsigned char data[0];
unsigned char data[];
};
/**
@ -385,21 +385,14 @@ struct nft_set_ops {
* struct nft_set_type - nf_tables set type
*
* @ops: set ops for this type
* @list: used internally
* @owner: module reference
* @features: features supported by the implementation
*/
struct nft_set_type {
const struct nft_set_ops ops;
struct list_head list;
struct module *owner;
u32 features;
};
#define to_set_type(o) container_of(o, struct nft_set_type, ops)
int nft_register_set(struct nft_set_type *type);
void nft_unregister_set(struct nft_set_type *type);
/**
* struct nft_set - nf_tables set instance
*
@ -572,7 +565,7 @@ struct nft_set_ext_tmpl {
struct nft_set_ext {
u8 genmask;
u8 offset[NFT_SET_EXT_NUM];
char data[0];
char data[];
};
static inline void nft_set_ext_prepare(struct nft_set_ext_tmpl *tmpl)
@ -673,6 +666,10 @@ static inline struct nft_object **nft_set_ext_obj(const struct nft_set_ext *ext)
return nft_set_ext(ext, NFT_SET_EXT_OBJREF);
}
struct nft_expr *nft_set_elem_expr_alloc(const struct nft_ctx *ctx,
const struct nft_set *set,
const struct nlattr *attr);
void *nft_set_elem_init(const struct nft_set *set,
const struct nft_set_ext_tmpl *tmpl,
const u32 *key, const u32 *key_end, const u32 *data,
@ -849,8 +846,6 @@ static inline void *nft_expr_priv(const struct nft_expr *expr)
return (void *)expr->data;
}
struct nft_expr *nft_expr_init(const struct nft_ctx *ctx,
const struct nlattr *nla);
void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr);
int nft_expr_dump(struct sk_buff *skb, unsigned int attr,
const struct nft_expr *expr);
@ -895,6 +890,18 @@ static inline struct nft_userdata *nft_userdata(const struct nft_rule *rule)
return (void *)&rule->data[rule->dlen];
}
static inline void nft_set_elem_update_expr(const struct nft_set_ext *ext,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
struct nft_expr *expr;
if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) {
expr = nft_set_ext_expr(ext);
expr->ops->eval(expr, regs, pkt);
}
}
/*
* The last pointer isn't really necessary, but the compiler isn't able to
* determine that the result of nft_expr_last() is always the same since it
@ -1253,9 +1260,6 @@ void nft_trace_notify(struct nft_traceinfo *info);
#define MODULE_ALIAS_NFT_EXPR(name) \
MODULE_ALIAS("nft-expr-" name)
#define MODULE_ALIAS_NFT_SET() \
MODULE_ALIAS("nft-set")
#define MODULE_ALIAS_NFT_OBJ(type) \
MODULE_ALIAS("nft-obj-" __stringify(type))
@ -1385,7 +1389,7 @@ struct nft_trans {
int msg_type;
bool put_net;
struct nft_ctx ctx;
char data[0];
char data[];
};
struct nft_trans_rule {

View file

@ -69,12 +69,13 @@ extern const struct nft_expr_ops nft_payload_fast_ops;
extern struct static_key_false nft_counters_enabled;
extern struct static_key_false nft_trace_enabled;
extern struct nft_set_type nft_set_rhash_type;
extern struct nft_set_type nft_set_hash_type;
extern struct nft_set_type nft_set_hash_fast_type;
extern struct nft_set_type nft_set_rbtree_type;
extern struct nft_set_type nft_set_bitmap_type;
extern struct nft_set_type nft_set_pipapo_type;
extern const struct nft_set_type nft_set_rhash_type;
extern const struct nft_set_type nft_set_hash_type;
extern const struct nft_set_type nft_set_hash_fast_type;
extern const struct nft_set_type nft_set_rbtree_type;
extern const struct nft_set_type nft_set_bitmap_type;
extern const struct nft_set_type nft_set_pipapo_type;
extern const struct nft_set_type nft_set_pipapo_avx2_type;
struct nft_expr;
struct nft_regs;

View file

@ -50,6 +50,7 @@ enum nf_inet_hooks {
enum nf_dev_hooks {
NF_NETDEV_INGRESS,
NF_NETDEV_EGRESS,
NF_NETDEV_NUMHOOKS
};

View file

@ -1770,6 +1770,7 @@ enum nft_tunnel_opts_attributes {
NFTA_TUNNEL_KEY_OPTS_UNSPEC,
NFTA_TUNNEL_KEY_OPTS_VXLAN,
NFTA_TUNNEL_KEY_OPTS_ERSPAN,
NFTA_TUNNEL_KEY_OPTS_GENEVE,
__NFTA_TUNNEL_KEY_OPTS_MAX
};
#define NFTA_TUNNEL_KEY_OPTS_MAX (__NFTA_TUNNEL_KEY_OPTS_MAX - 1)
@ -1791,6 +1792,15 @@ enum nft_tunnel_opts_erspan_attributes {
};
#define NFTA_TUNNEL_KEY_ERSPAN_MAX (__NFTA_TUNNEL_KEY_ERSPAN_MAX - 1)
enum nft_tunnel_opts_geneve_attributes {
NFTA_TUNNEL_KEY_GENEVE_UNSPEC,
NFTA_TUNNEL_KEY_GENEVE_CLASS,
NFTA_TUNNEL_KEY_GENEVE_TYPE,
NFTA_TUNNEL_KEY_GENEVE_DATA,
__NFTA_TUNNEL_KEY_GENEVE_MAX
};
#define NFTA_TUNNEL_KEY_GENEVE_MAX (__NFTA_TUNNEL_KEY_GENEVE_MAX - 1)
enum nft_tunnel_flags {
NFT_TUNNEL_F_ZERO_CSUM_TX = (1 << 0),
NFT_TUNNEL_F_DONT_FRAGMENT = (1 << 1),

View file

@ -1,4 +1,3 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
* linux/include/linux/netfilter/xt_IDLETIMER.h
*
@ -33,6 +32,7 @@
#include <linux/types.h>
#define MAX_IDLETIMER_LABEL_SIZE 28
#define XT_IDLETIMER_ALARM 0x01
struct idletimer_tg_info {
__u32 timeout;
@ -43,4 +43,14 @@ struct idletimer_tg_info {
struct idletimer_tg *timer __attribute__((aligned(8)));
};
struct idletimer_tg_info_v1 {
__u32 timeout;
char label[MAX_IDLETIMER_LABEL_SIZE];
__u8 timer_type;
/* for kernel module internal use only */
struct idletimer_tg *timer __attribute__((aligned(8)));
};
#endif

View file

@ -40,7 +40,7 @@ struct ebt_mac_wormhash_tuple {
struct ebt_mac_wormhash {
int table[257];
int poolsize;
struct ebt_mac_wormhash_tuple pool[0];
struct ebt_mac_wormhash_tuple pool[];
};
#define ebt_mac_wormhash_size(x) ((x) ? sizeof(struct ebt_mac_wormhash) \

View file

@ -1561,7 +1561,7 @@ struct compat_ebt_entry_mwt {
compat_uptr_t ptr;
} u;
compat_uint_t match_size;
compat_uint_t data[0] __attribute__ ((aligned (__alignof__(struct compat_ebt_replace))));
compat_uint_t data[] __aligned(__alignof__(struct compat_ebt_replace));
};
/* account for possible padding between match_size and ->data */

View file

@ -135,7 +135,7 @@
#include <linux/if_macvlan.h>
#include <linux/errqueue.h>
#include <linux/hrtimer.h>
#include <linux/netfilter_ingress.h>
#include <linux/netfilter_netdev.h>
#include <linux/crash_dump.h>
#include <linux/sctp.h>
#include <net/udp_tunnel.h>
@ -3773,6 +3773,7 @@ EXPORT_SYMBOL(dev_loopback_xmit);
static struct sk_buff *
sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
{
#ifdef CONFIG_NET_CLS_ACT
struct mini_Qdisc *miniq = rcu_dereference_bh(dev->miniq_egress);
struct tcf_result cl_res;
@ -3806,11 +3807,24 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
default:
break;
}
#endif /* CONFIG_NET_CLS_ACT */
return skb;
}
#endif /* CONFIG_NET_EGRESS */
static inline int nf_egress(struct sk_buff *skb)
{
if (nf_hook_egress_active(skb)) {
int ret;
rcu_read_lock();
ret = nf_hook_egress(skb);
rcu_read_unlock();
return ret;
}
return 0;
}
#ifdef CONFIG_XPS
static int __get_xps_queue_idx(struct net_device *dev, struct sk_buff *skb,
struct xps_dev_maps *dev_maps, unsigned int tci)
@ -3997,13 +4011,16 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
qdisc_pkt_len_init(skb);
#ifdef CONFIG_NET_CLS_ACT
skb->tc_at_ingress = 0;
# ifdef CONFIG_NET_EGRESS
#endif
#ifdef CONFIG_NET_EGRESS
if (static_branch_unlikely(&egress_needed_key)) {
if (nf_egress(skb) < 0)
goto out;
skb = sch_handle_egress(skb, &rc, dev);
if (!skb)
goto out;
}
# endif
#endif
/* If device/qdisc don't need skb->dst, release it right now while
* its hot in this cpu cache.
@ -9850,7 +9867,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
if (!dev->ethtool_ops)
dev->ethtool_ops = &default_ethtool_ops;
nf_hook_ingress_init(dev);
nf_hook_netdev_init(dev);
return dev;

View file

@ -1057,7 +1057,7 @@ struct compat_arpt_replace {
u32 underflow[NF_ARP_NUMHOOKS];
u32 num_counters;
compat_uptr_t counters;
struct compat_arpt_entry entries[0];
struct compat_arpt_entry entries[];
};
static inline void compat_release_entry(struct compat_arpt_entry *e)
@ -1383,7 +1383,7 @@ static int compat_copy_entries_to_user(unsigned int total_size,
struct compat_arpt_get_entries {
char name[XT_TABLE_MAXNAMELEN];
compat_uint_t size;
struct compat_arpt_entry entrytable[0];
struct compat_arpt_entry entrytable[];
};
static int compat_get_entries(struct net *net,

View file

@ -1211,7 +1211,7 @@ struct compat_ipt_replace {
u32 underflow[NF_INET_NUMHOOKS];
u32 num_counters;
compat_uptr_t counters; /* struct xt_counters * */
struct compat_ipt_entry entries[0];
struct compat_ipt_entry entries[];
};
static int
@ -1562,7 +1562,7 @@ compat_do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user,
struct compat_ipt_get_entries {
char name[XT_TABLE_MAXNAMELEN];
compat_uint_t size;
struct compat_ipt_entry entrytable[0];
struct compat_ipt_entry entrytable[];
};
static int

View file

@ -1227,7 +1227,7 @@ struct compat_ip6t_replace {
u32 underflow[NF_INET_NUMHOOKS];
u32 num_counters;
compat_uptr_t counters; /* struct xt_counters * */
struct compat_ip6t_entry entries[0];
struct compat_ip6t_entry entries[];
};
static int
@ -1571,7 +1571,7 @@ compat_do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user,
struct compat_ip6t_get_entries {
char name[XT_TABLE_MAXNAMELEN];
compat_uint_t size;
struct compat_ip6t_entry entrytable[0];
struct compat_ip6t_entry entrytable[];
};
static int

View file

@ -10,6 +10,14 @@ config NETFILTER_INGRESS
This allows you to classify packets from ingress using the Netfilter
infrastructure.
config NETFILTER_EGRESS
bool "Netfilter egress support"
default y
select NET_EGRESS
help
This allows you to classify packets before transmission using the
Netfilter infrastructure.
config NETFILTER_NETLINK
tristate
@ -455,14 +463,6 @@ config NF_TABLES
To compile it as a module, choose M here.
if NF_TABLES
config NF_TABLES_SET
tristate "Netfilter nf_tables set infrastructure"
help
This option enables the nf_tables set infrastructure that allows to
look up for elements in a set and to build one-way mappings between
matchings and actions.
config NF_TABLES_INET
depends on IPV6
select NF_TABLES_IPV4

View file

@ -78,14 +78,17 @@ nf_tables-objs := nf_tables_core.o nf_tables_api.o nft_chain_filter.o \
nf_tables_trace.o nft_immediate.o nft_cmp.o nft_range.o \
nft_bitwise.o nft_byteorder.o nft_payload.o nft_lookup.o \
nft_dynset.o nft_meta.o nft_rt.o nft_exthdr.o \
nft_chain_route.o nf_tables_offload.o
nft_chain_route.o nf_tables_offload.o \
nft_set_hash.o nft_set_bitmap.o nft_set_rbtree.o \
nft_set_pipapo.o
nf_tables_set-objs := nf_tables_set_core.o \
nft_set_hash.o nft_set_bitmap.o nft_set_rbtree.o \
nft_set_pipapo.o
ifdef CONFIG_X86_64
ifneq (,$(findstring -DCONFIG_AS_AVX2=1,$(KBUILD_CFLAGS)))
nf_tables-objs += nft_set_pipapo_avx2.o
endif
endif
obj-$(CONFIG_NF_TABLES) += nf_tables.o
obj-$(CONFIG_NF_TABLES_SET) += nf_tables_set.o
obj-$(CONFIG_NFT_COMPAT) += nft_compat.o
obj-$(CONFIG_NFT_CONNLIMIT) += nft_connlimit.o
obj-$(CONFIG_NFT_NUMGEN) += nft_numgen.o

View file

@ -306,6 +306,12 @@ nf_hook_entry_head(struct net *net, int pf, unsigned int hooknum,
if (dev && dev_net(dev) == net)
return &dev->nf_hooks_ingress;
}
#endif
#ifdef CONFIG_NETFILTER_EGRESS
if (hooknum == NF_NETDEV_EGRESS) {
if (dev && dev_net(dev) == net)
return &dev->nf_hooks_egress;
}
#endif
WARN_ON_ONCE(1);
return NULL;
@ -318,11 +324,13 @@ static int __nf_register_net_hook(struct net *net, int pf,
struct nf_hook_entries __rcu **pp;
if (pf == NFPROTO_NETDEV) {
#ifndef CONFIG_NETFILTER_INGRESS
if (reg->hooknum == NF_NETDEV_INGRESS)
if ((!IS_ENABLED(CONFIG_NETFILTER_INGRESS) &&
reg->hooknum == NF_NETDEV_INGRESS) ||
(!IS_ENABLED(CONFIG_NETFILTER_EGRESS) &&
reg->hooknum == NF_NETDEV_EGRESS))
return -EOPNOTSUPP;
#endif
if (reg->hooknum != NF_NETDEV_INGRESS ||
if ((reg->hooknum != NF_NETDEV_INGRESS &&
reg->hooknum != NF_NETDEV_EGRESS) ||
!reg->dev || dev_net(reg->dev) != net)
return -EINVAL;
}
@ -348,6 +356,10 @@ static int __nf_register_net_hook(struct net *net, int pf,
if (pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
net_inc_ingress_queue();
#endif
#ifdef CONFIG_NETFILTER_EGRESS
if (pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_EGRESS)
net_inc_egress_queue();
#endif
#ifdef CONFIG_JUMP_LABEL
static_key_slow_inc(&nf_hooks_needed[pf][reg->hooknum]);
#endif
@ -406,6 +418,10 @@ static void __nf_unregister_net_hook(struct net *net, int pf,
if (pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
net_dec_ingress_queue();
#endif
#ifdef CONFIG_NETFILTER_EGRESS
if (pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_EGRESS)
net_dec_egress_queue();
#endif
#ifdef CONFIG_JUMP_LABEL
static_key_slow_dec(&nf_hooks_needed[pf][reg->hooknum]);
#endif

View file

@ -46,7 +46,7 @@ struct bitmap_ip {
u8 netmask; /* subnet netmask */
struct timer_list gc; /* garbage collection */
struct ip_set *set; /* attached to this ip_set */
unsigned char extensions[0] /* data extensions */
unsigned char extensions[] /* data extensions */
__aligned(__alignof__(u64));
};

View file

@ -49,7 +49,7 @@ struct bitmap_ipmac {
size_t memsize; /* members size */
struct timer_list gc; /* garbage collector */
struct ip_set *set; /* attached to this ip_set */
unsigned char extensions[0] /* MAC + data extensions */
unsigned char extensions[] /* MAC + data extensions */
__aligned(__alignof__(u64));
};

View file

@ -37,7 +37,7 @@ struct bitmap_port {
size_t memsize; /* members size */
struct timer_list gc; /* garbage collection */
struct ip_set *set; /* attached to this ip_set */
unsigned char extensions[0] /* data extensions */
unsigned char extensions[] /* data extensions */
__aligned(__alignof__(u64));
};

View file

@ -76,7 +76,7 @@ struct hbucket {
DECLARE_BITMAP(used, AHASH_MAX_TUNED);
u8 size; /* size of the array */
u8 pos; /* position of the first free entry */
unsigned char value[0] /* the array of the values */
unsigned char value[] /* the array of the values */
__aligned(__alignof__(u64));
};
@ -109,7 +109,7 @@ struct htable {
u8 htable_bits; /* size of hash table == 2^htable_bits */
u32 maxelem; /* Maxelem per region */
struct ip_set_region *hregion; /* Region locks and ext sizes */
struct hbucket __rcu *bucket[0]; /* hashtable buckets */
struct hbucket __rcu *bucket[]; /* hashtable buckets */
};
#define hbucket(h, i) ((h)->bucket[i])

View file

@ -2633,7 +2633,6 @@ void nf_conntrack_init_end(void)
*/
#define UNCONFIRMED_NULLS_VAL ((1<<30)+0)
#define DYING_NULLS_VAL ((1<<30)+1)
#define TEMPLATE_NULLS_VAL ((1<<30)+2)
int nf_conntrack_init_net(struct net *net)
{

View file

@ -1054,21 +1054,18 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
nf_conntrack_standalone_init_dccp_sysctl(net, table);
nf_conntrack_standalone_init_gre_sysctl(net, table);
/* Don't export sysctls to unprivileged users */
/* Don't allow unprivileged users to alter certain sysctls */
if (net->user_ns != &init_user_ns) {
table[NF_SYSCTL_CT_MAX].procname = NULL;
table[NF_SYSCTL_CT_ACCT].procname = NULL;
table[NF_SYSCTL_CT_HELPER].procname = NULL;
#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
table[NF_SYSCTL_CT_TIMESTAMP].procname = NULL;
#endif
table[NF_SYSCTL_CT_MAX].mode = 0444;
table[NF_SYSCTL_CT_EXPECT_MAX].mode = 0444;
table[NF_SYSCTL_CT_HELPER].mode = 0444;
#ifdef CONFIG_NF_CONNTRACK_EVENTS
table[NF_SYSCTL_CT_EVENTS].procname = NULL;
table[NF_SYSCTL_CT_EVENTS].mode = 0444;
#endif
}
if (!net_eq(&init_net, net))
table[NF_SYSCTL_CT_BUCKETS].mode = 0444;
} else if (!net_eq(&init_net, net)) {
table[NF_SYSCTL_CT_BUCKETS].mode = 0444;
}
net->ct.sysctl_header = register_net_sysctl(net, "net/netfilter", table);
if (!net->ct.sysctl_header)

View file

@ -7,6 +7,7 @@
#include <linux/tc_act/tc_csum.h>
#include <net/flow_offload.h>
#include <net/netfilter/nf_flow_table.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_tuple.h>
@ -27,11 +28,61 @@ struct flow_offload_work {
(__match)->dissector.offset[__type] = \
offsetof(struct nf_flow_key, __field)
static int nf_flow_rule_match(struct nf_flow_match *match,
const struct flow_offload_tuple *tuple)
static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
struct ip_tunnel_info *tun_info)
{
struct nf_flow_key *mask = &match->mask;
struct nf_flow_key *key = &match->key;
unsigned int enc_keys;
if (!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX))
return;
NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_CONTROL, enc_control);
NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
key->enc_key_id.keyid = tunnel_id_to_key32(tun_info->key.tun_id);
mask->enc_key_id.keyid = 0xffffffff;
enc_keys = BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL);
if (ip_tunnel_info_af(tun_info) == AF_INET) {
NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
enc_ipv4);
key->enc_ipv4.src = tun_info->key.u.ipv4.dst;
key->enc_ipv4.dst = tun_info->key.u.ipv4.src;
if (key->enc_ipv4.src)
mask->enc_ipv4.src = 0xffffffff;
if (key->enc_ipv4.dst)
mask->enc_ipv4.dst = 0xffffffff;
enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS);
key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
} else {
memcpy(&key->enc_ipv6.src, &tun_info->key.u.ipv6.dst,
sizeof(struct in6_addr));
memcpy(&key->enc_ipv6.dst, &tun_info->key.u.ipv6.src,
sizeof(struct in6_addr));
if (memcmp(&key->enc_ipv6.src, &in6addr_any,
sizeof(struct in6_addr)))
memset(&key->enc_ipv6.src, 0xff,
sizeof(struct in6_addr));
if (memcmp(&key->enc_ipv6.dst, &in6addr_any,
sizeof(struct in6_addr)))
memset(&key->enc_ipv6.dst, 0xff,
sizeof(struct in6_addr));
enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS);
key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
}
match->dissector.used_keys |= enc_keys;
}
static int nf_flow_rule_match(struct nf_flow_match *match,
const struct flow_offload_tuple *tuple,
struct dst_entry *other_dst)
{
struct nf_flow_key *mask = &match->mask;
struct nf_flow_key *key = &match->key;
struct ip_tunnel_info *tun_info;
NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_META, meta);
NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CONTROL, control);
@ -41,6 +92,11 @@ static int nf_flow_rule_match(struct nf_flow_match *match,
NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_TCP, tcp);
NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_PORTS, tp);
if (other_dst->lwtstate) {
tun_info = lwt_tun_info(other_dst->lwtstate);
nf_flow_rule_lwt_match(match, tun_info);
}
key->meta.ingress_ifindex = tuple->iifidx;
mask->meta.ingress_ifindex = 0xffffffff;
@ -419,10 +475,52 @@ static void flow_offload_redirect(const struct flow_offload *flow,
dev_hold(rt->dst.dev);
}
static void flow_offload_encap_tunnel(const struct flow_offload *flow,
enum flow_offload_tuple_dir dir,
struct nf_flow_rule *flow_rule)
{
struct flow_action_entry *entry;
struct dst_entry *dst;
dst = flow->tuplehash[dir].tuple.dst_cache;
if (dst->lwtstate) {
struct ip_tunnel_info *tun_info;
tun_info = lwt_tun_info(dst->lwtstate);
if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
entry = flow_action_entry_next(flow_rule);
entry->id = FLOW_ACTION_TUNNEL_ENCAP;
entry->tunnel = tun_info;
}
}
}
static void flow_offload_decap_tunnel(const struct flow_offload *flow,
enum flow_offload_tuple_dir dir,
struct nf_flow_rule *flow_rule)
{
struct flow_action_entry *entry;
struct dst_entry *dst;
dst = flow->tuplehash[!dir].tuple.dst_cache;
if (dst->lwtstate) {
struct ip_tunnel_info *tun_info;
tun_info = lwt_tun_info(dst->lwtstate);
if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
entry = flow_action_entry_next(flow_rule);
entry->id = FLOW_ACTION_TUNNEL_DECAP;
}
}
}
int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
enum flow_offload_tuple_dir dir,
struct nf_flow_rule *flow_rule)
{
flow_offload_decap_tunnel(flow, dir, flow_rule);
flow_offload_encap_tunnel(flow, dir, flow_rule);
if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 ||
flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
return -1;
@ -449,6 +547,9 @@ int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
enum flow_offload_tuple_dir dir,
struct nf_flow_rule *flow_rule)
{
flow_offload_decap_tunnel(flow, dir, flow_rule);
flow_offload_encap_tunnel(flow, dir, flow_rule);
if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 ||
flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
return -1;
@ -479,6 +580,7 @@ nf_flow_offload_rule_alloc(struct net *net,
const struct flow_offload *flow = offload->flow;
const struct flow_offload_tuple *tuple;
struct nf_flow_rule *flow_rule;
struct dst_entry *other_dst;
int err = -ENOMEM;
flow_rule = kzalloc(sizeof(*flow_rule), GFP_KERNEL);
@ -494,7 +596,8 @@ nf_flow_offload_rule_alloc(struct net *net,
flow_rule->rule->match.key = &flow_rule->match.key;
tuple = &flow->tuplehash[dir].tuple;
err = nf_flow_rule_match(&flow_rule->match, tuple);
other_dst = flow->tuplehash[!dir].tuple.dst_cache;
err = nf_flow_rule_match(&flow_rule->match, tuple, other_dst);
if (err < 0)
goto err_flow_match;
@ -574,6 +677,7 @@ static int nf_flow_offload_tuple(struct nf_flowtable *flowtable,
struct nf_flow_rule *flow_rule,
enum flow_offload_tuple_dir dir,
int priority, int cmd,
struct flow_stats *stats,
struct list_head *block_cb_list)
{
struct flow_cls_offload cls_flow = {};
@ -598,6 +702,9 @@ static int nf_flow_offload_tuple(struct nf_flowtable *flowtable,
}
mutex_unlock(&flowtable->flow_block_lock);
if (cmd == FLOW_CLS_STATS)
memcpy(stats, &cls_flow.stats, sizeof(*stats));
return i;
}
@ -607,7 +714,7 @@ static int flow_offload_tuple_add(struct flow_offload_work *offload,
{
return nf_flow_offload_tuple(offload->flowtable, offload->flow,
flow_rule, dir, offload->priority,
FLOW_CLS_REPLACE,
FLOW_CLS_REPLACE, NULL,
&offload->flowtable->flow_block.cb_list);
}
@ -615,7 +722,7 @@ static void flow_offload_tuple_del(struct flow_offload_work *offload,
enum flow_offload_tuple_dir dir)
{
nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
offload->priority, FLOW_CLS_DESTROY,
offload->priority, FLOW_CLS_DESTROY, NULL,
&offload->flowtable->flow_block.cb_list);
}
@ -661,21 +768,9 @@ static void flow_offload_tuple_stats(struct flow_offload_work *offload,
enum flow_offload_tuple_dir dir,
struct flow_stats *stats)
{
struct nf_flowtable *flowtable = offload->flowtable;
struct flow_cls_offload cls_flow = {};
struct flow_block_cb *block_cb;
struct netlink_ext_ack extack;
__be16 proto = ETH_P_ALL;
nf_flow_offload_init(&cls_flow, proto, offload->priority,
FLOW_CLS_STATS,
&offload->flow->tuplehash[dir].tuple, &extack);
mutex_lock(&flowtable->flow_block_lock);
list_for_each_entry(block_cb, &flowtable->flow_block.cb_list, list)
block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow, block_cb->cb_priv);
mutex_unlock(&flowtable->flow_block_lock);
memcpy(stats, &cls_flow.stats, sizeof(*stats));
nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
offload->priority, FLOW_CLS_STATS, stats,
&offload->flowtable->flow_block.cb_list);
}
static void flow_offload_work_stats(struct flow_offload_work *offload)
@ -820,6 +915,37 @@ static int nf_flow_table_block_setup(struct nf_flowtable *flowtable,
return err;
}
static void nf_flow_table_block_offload_init(struct flow_block_offload *bo,
struct net *net,
enum flow_block_command cmd,
struct nf_flowtable *flowtable,
struct netlink_ext_ack *extack)
{
memset(bo, 0, sizeof(*bo));
bo->net = net;
bo->block = &flowtable->flow_block;
bo->command = cmd;
bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
bo->extack = extack;
INIT_LIST_HEAD(&bo->cb_list);
}
static int nf_flow_table_indr_offload_cmd(struct flow_block_offload *bo,
struct nf_flowtable *flowtable,
struct net_device *dev,
enum flow_block_command cmd,
struct netlink_ext_ack *extack)
{
nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
extack);
flow_indr_block_call(dev, bo, cmd);
if (list_empty(&bo->cb_list))
return -EOPNOTSUPP;
return 0;
}
static int nf_flow_table_offload_cmd(struct flow_block_offload *bo,
struct nf_flowtable *flowtable,
struct net_device *dev,
@ -828,17 +954,8 @@ static int nf_flow_table_offload_cmd(struct flow_block_offload *bo,
{
int err;
if (!dev->netdev_ops->ndo_setup_tc)
return -EOPNOTSUPP;
memset(bo, 0, sizeof(*bo));
bo->net = dev_net(dev);
bo->block = &flowtable->flow_block;
bo->command = cmd;
bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
bo->extack = extack;
INIT_LIST_HEAD(&bo->cb_list);
nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
extack);
err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo);
if (err < 0)
return err;
@ -857,7 +974,12 @@ int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
if (!nf_flowtable_hw_offload(flowtable))
return 0;
err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd, &extack);
if (dev->netdev_ops->ndo_setup_tc)
err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd,
&extack);
else
err = nf_flow_table_indr_offload_cmd(&bo, flowtable, dev, cmd,
&extack);
if (err < 0)
return err;
@ -865,10 +987,75 @@ int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
}
EXPORT_SYMBOL_GPL(nf_flow_table_offload_setup);
static void nf_flow_table_indr_block_ing_cmd(struct net_device *dev,
struct nf_flowtable *flowtable,
flow_indr_block_bind_cb_t *cb,
void *cb_priv,
enum flow_block_command cmd)
{
struct netlink_ext_ack extack = {};
struct flow_block_offload bo;
if (!flowtable)
return;
nf_flow_table_block_offload_init(&bo, dev_net(dev), cmd, flowtable,
&extack);
cb(dev, cb_priv, TC_SETUP_FT, &bo);
nf_flow_table_block_setup(flowtable, &bo, cmd);
}
static void nf_flow_table_indr_block_cb_cmd(struct nf_flowtable *flowtable,
struct net_device *dev,
flow_indr_block_bind_cb_t *cb,
void *cb_priv,
enum flow_block_command cmd)
{
if (!(flowtable->flags & NF_FLOWTABLE_HW_OFFLOAD))
return;
nf_flow_table_indr_block_ing_cmd(dev, flowtable, cb, cb_priv, cmd);
}
static void nf_flow_table_indr_block_cb(struct net_device *dev,
flow_indr_block_bind_cb_t *cb,
void *cb_priv,
enum flow_block_command cmd)
{
struct net *net = dev_net(dev);
struct nft_flowtable *nft_ft;
struct nft_table *table;
struct nft_hook *hook;
mutex_lock(&net->nft.commit_mutex);
list_for_each_entry(table, &net->nft.tables, list) {
list_for_each_entry(nft_ft, &table->flowtables, list) {
list_for_each_entry(hook, &nft_ft->hook_list, list) {
if (hook->ops.dev != dev)
continue;
nf_flow_table_indr_block_cb_cmd(&nft_ft->data,
dev, cb,
cb_priv, cmd);
}
}
}
mutex_unlock(&net->nft.commit_mutex);
}
static struct flow_indr_block_entry block_ing_entry = {
.cb = nf_flow_table_indr_block_cb,
.list = LIST_HEAD_INIT(block_ing_entry.list),
};
int nf_flow_table_offload_init(void)
{
INIT_WORK(&nf_flow_offload_work, flow_offload_work_handler);
flow_indr_add_block_cb(&block_ing_entry);
return 0;
}
@ -877,6 +1064,8 @@ void nf_flow_table_offload_exit(void)
struct flow_offload_work *offload, *next;
LIST_HEAD(offload_pending_list);
flow_indr_del_block_cb(&block_ing_entry);
cancel_work_sync(&nf_flow_offload_work);
list_for_each_entry_safe(offload, next, &offload_pending_list, list) {

View file

@ -2523,8 +2523,8 @@ static void nf_tables_expr_destroy(const struct nft_ctx *ctx,
module_put(type->owner);
}
struct nft_expr *nft_expr_init(const struct nft_ctx *ctx,
const struct nlattr *nla)
static struct nft_expr *nft_expr_init(const struct nft_ctx *ctx,
const struct nlattr *nla)
{
struct nft_expr_info info;
struct nft_expr *expr;
@ -3266,25 +3266,17 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk,
/*
* Sets
*/
static LIST_HEAD(nf_tables_set_types);
int nft_register_set(struct nft_set_type *type)
{
nfnl_lock(NFNL_SUBSYS_NFTABLES);
list_add_tail_rcu(&type->list, &nf_tables_set_types);
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
return 0;
}
EXPORT_SYMBOL_GPL(nft_register_set);
void nft_unregister_set(struct nft_set_type *type)
{
nfnl_lock(NFNL_SUBSYS_NFTABLES);
list_del_rcu(&type->list);
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
}
EXPORT_SYMBOL_GPL(nft_unregister_set);
static const struct nft_set_type *nft_set_types[] = {
&nft_set_hash_fast_type,
&nft_set_hash_type,
&nft_set_rhash_type,
&nft_set_bitmap_type,
&nft_set_rbtree_type,
#if defined(CONFIG_X86_64) && defined(CONFIG_AS_AVX2)
&nft_set_pipapo_avx2_type,
#endif
&nft_set_pipapo_type,
};
#define NFT_SET_FEATURES (NFT_SET_INTERVAL | NFT_SET_MAP | \
NFT_SET_TIMEOUT | NFT_SET_OBJECT | \
@ -3310,15 +3302,11 @@ nft_select_set_ops(const struct nft_ctx *ctx,
struct nft_set_estimate est, best;
const struct nft_set_type *type;
u32 flags = 0;
int i;
lockdep_assert_held(&ctx->net->nft.commit_mutex);
lockdep_nfnl_nft_mutex_not_held();
#ifdef CONFIG_MODULES
if (list_empty(&nf_tables_set_types)) {
if (nft_request_module(ctx->net, "nft-set") == -EAGAIN)
return ERR_PTR(-EAGAIN);
}
#endif
if (nla[NFTA_SET_FLAGS] != NULL)
flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS]));
@ -3327,7 +3315,8 @@ nft_select_set_ops(const struct nft_ctx *ctx,
best.lookup = ~0;
best.space = ~0;
list_for_each_entry(type, &nf_tables_set_types, list) {
for (i = 0; i < ARRAY_SIZE(nft_set_types); i++) {
type = nft_set_types[i];
ops = &type->ops;
if (!nft_set_ops_candidate(type, flags))
@ -3358,11 +3347,6 @@ nft_select_set_ops(const struct nft_ctx *ctx,
break;
}
if (!try_module_get(type->owner))
continue;
if (bops != NULL)
module_put(to_set_type(bops)->owner);
bops = ops;
best = est;
}
@ -4061,10 +4045,8 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
size = ops->privsize(nla, &desc);
set = kvzalloc(sizeof(*set) + size + udlen, GFP_KERNEL);
if (!set) {
err = -ENOMEM;
goto err1;
}
if (!set)
return -ENOMEM;
name = nla_strdup(nla[NFTA_SET_NAME], GFP_KERNEL);
if (!name) {
@ -4123,8 +4105,6 @@ err3:
kfree(set->name);
err2:
kvfree(set);
err1:
module_put(to_set_type(ops)->owner);
return err;
}
@ -4134,7 +4114,6 @@ static void nft_set_destroy(struct nft_set *set)
return;
set->ops->destroy(set);
module_put(to_set_type(set->ops)->owner);
kfree(set->name);
kvfree(set);
}
@ -4312,7 +4291,6 @@ const struct nft_set_ext_type nft_set_ext_types[] = {
.align = __alignof__(u32),
},
};
EXPORT_SYMBOL_GPL(nft_set_ext_types);
/*
* Set elements
@ -4801,6 +4779,36 @@ static struct nft_trans *nft_trans_elem_alloc(struct nft_ctx *ctx,
return trans;
}
struct nft_expr *nft_set_elem_expr_alloc(const struct nft_ctx *ctx,
const struct nft_set *set,
const struct nlattr *attr)
{
struct nft_expr *expr;
int err;
expr = nft_expr_init(ctx, attr);
if (IS_ERR(expr))
return expr;
err = -EOPNOTSUPP;
if (!(expr->ops->type->flags & NFT_EXPR_STATEFUL))
goto err_set_elem_expr;
if (expr->ops->type->flags & NFT_EXPR_GC) {
if (set->flags & NFT_SET_TIMEOUT)
goto err_set_elem_expr;
if (!set->ops->gc_init)
goto err_set_elem_expr;
set->ops->gc_init(set);
}
return expr;
err_set_elem_expr:
nft_expr_destroy(ctx, expr);
return ERR_PTR(err);
}
void *nft_set_elem_init(const struct nft_set *set,
const struct nft_set_ext_tmpl *tmpl,
const u32 *key, const u32 *key_end,
@ -4883,6 +4891,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_elem elem;
struct nft_set_binding *binding;
struct nft_object *obj = NULL;
struct nft_expr *expr = NULL;
struct nft_userdata *udata;
struct nft_data_desc desc;
struct nft_data data;
@ -4950,10 +4959,17 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
return err;
}
if (nla[NFTA_SET_ELEM_EXPR] != NULL) {
expr = nft_set_elem_expr_alloc(ctx, set,
nla[NFTA_SET_ELEM_EXPR]);
if (IS_ERR(expr))
return PTR_ERR(expr);
}
err = nft_setelem_parse_key(ctx, set, &elem.key.val,
nla[NFTA_SET_ELEM_KEY]);
if (err < 0)
return err;
goto err_set_elem_expr;
nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen);
@ -4972,6 +4988,10 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT);
}
if (expr)
nft_set_ext_add_length(&tmpl, NFT_SET_EXT_EXPR,
expr->ops->size);
if (nla[NFTA_SET_ELEM_OBJREF] != NULL) {
if (!(set->flags & NFT_SET_OBJECT)) {
err = -EINVAL;
@ -5056,6 +5076,10 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
*nft_set_ext_obj(ext) = obj;
obj->use++;
}
if (expr) {
memcpy(nft_set_ext_expr(ext), expr, expr->ops->size);
kfree(expr);
}
trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set);
if (trans == NULL)
@ -5111,6 +5135,9 @@ err_parse_key_end:
nft_data_release(&elem.key_end.val, NFT_DATA_VALUE);
err_parse_key:
nft_data_release(&elem.key.val, NFT_DATA_VALUE);
err_set_elem_expr:
if (expr != NULL)
nft_expr_destroy(ctx, expr);
return err;
}
@ -5365,7 +5392,6 @@ void nft_set_gc_batch_release(struct rcu_head *rcu)
nft_set_elem_destroy(gcb->head.set, gcb->elems[i], true);
kfree(gcb);
}
EXPORT_SYMBOL_GPL(nft_set_gc_batch_release);
struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set,
gfp_t gfp)
@ -5378,7 +5404,6 @@ struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set,
gcb->head.set = set;
return gcb;
}
EXPORT_SYMBOL_GPL(nft_set_gc_batch_alloc);
/*
* Stateful objects

View file

@ -1,31 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/module.h>
#include <net/netfilter/nf_tables_core.h>
static int __init nf_tables_set_module_init(void)
{
nft_register_set(&nft_set_hash_fast_type);
nft_register_set(&nft_set_hash_type);
nft_register_set(&nft_set_rhash_type);
nft_register_set(&nft_set_bitmap_type);
nft_register_set(&nft_set_rbtree_type);
nft_register_set(&nft_set_pipapo_type);
return 0;
}
static void __exit nf_tables_set_module_exit(void)
{
nft_unregister_set(&nft_set_pipapo_type);
nft_unregister_set(&nft_set_rbtree_type);
nft_unregister_set(&nft_set_bitmap_type);
nft_unregister_set(&nft_set_rhash_type);
nft_unregister_set(&nft_set_hash_type);
nft_unregister_set(&nft_set_hash_fast_type);
}
module_init(nf_tables_set_module_init);
module_exit(nf_tables_set_module_exit);
MODULE_LICENSE("GPL");
MODULE_ALIAS_NFT_SET();

View file

@ -33,7 +33,7 @@ struct nf_acct {
refcount_t refcnt;
char name[NFACCT_NAME_MAX];
struct rcu_head rcu_head;
char data[0];
char data[];
};
struct nfacct_filter {

View file

@ -93,7 +93,7 @@ static const struct nla_policy nft_bitwise_policy[NFTA_BITWISE_MAX + 1] = {
static int nft_bitwise_init_bool(struct nft_bitwise *priv,
const struct nlattr *const tb[])
{
struct nft_data_desc d1, d2;
struct nft_data_desc mask, xor;
int err;
if (tb[NFTA_BITWISE_DATA])
@ -103,29 +103,29 @@ static int nft_bitwise_init_bool(struct nft_bitwise *priv,
!tb[NFTA_BITWISE_XOR])
return -EINVAL;
err = nft_data_init(NULL, &priv->mask, sizeof(priv->mask), &d1,
err = nft_data_init(NULL, &priv->mask, sizeof(priv->mask), &mask,
tb[NFTA_BITWISE_MASK]);
if (err < 0)
return err;
if (d1.type != NFT_DATA_VALUE || d1.len != priv->len) {
if (mask.type != NFT_DATA_VALUE || mask.len != priv->len) {
err = -EINVAL;
goto err1;
}
err = nft_data_init(NULL, &priv->xor, sizeof(priv->xor), &d2,
err = nft_data_init(NULL, &priv->xor, sizeof(priv->xor), &xor,
tb[NFTA_BITWISE_XOR]);
if (err < 0)
goto err1;
if (d2.type != NFT_DATA_VALUE || d2.len != priv->len) {
if (xor.type != NFT_DATA_VALUE || xor.len != priv->len) {
err = -EINVAL;
goto err2;
}
return 0;
err2:
nft_data_release(&priv->xor, d2.type);
nft_data_release(&priv->xor, xor.type);
err1:
nft_data_release(&priv->mask, d1.type);
nft_data_release(&priv->mask, mask.type);
return err;
}

View file

@ -277,9 +277,11 @@ static const struct nft_chain_type nft_chain_filter_netdev = {
.name = "filter",
.type = NFT_CHAIN_T_DEFAULT,
.family = NFPROTO_NETDEV,
.hook_mask = (1 << NF_NETDEV_INGRESS),
.hook_mask = (1 << NF_NETDEV_INGRESS) |
(1 << NF_NETDEV_EGRESS),
.hooks = {
[NF_NETDEV_INGRESS] = nft_do_chain_netdev,
[NF_NETDEV_EGRESS] = nft_do_chain_netdev,
},
};

View file

@ -81,7 +81,6 @@ void nft_dynset_eval(const struct nft_expr *expr,
const struct nft_dynset *priv = nft_expr_priv(expr);
struct nft_set *set = priv->set;
const struct nft_set_ext *ext;
const struct nft_expr *sexpr;
u64 timeout;
if (priv->op == NFT_DYNSET_OP_DELETE) {
@ -91,18 +90,13 @@ void nft_dynset_eval(const struct nft_expr *expr,
if (set->ops->update(set, &regs->data[priv->sreg_key], nft_dynset_new,
expr, regs, &ext)) {
sexpr = NULL;
if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR))
sexpr = nft_set_ext_expr(ext);
if (priv->op == NFT_DYNSET_OP_UPDATE &&
nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
timeout = priv->timeout ? : set->timeout;
*nft_set_ext_expiration(ext) = get_jiffies_64() + timeout;
}
if (sexpr != NULL)
sexpr->ops->eval(sexpr, regs, pkt);
nft_set_elem_update_expr(ext, regs, pkt);
if (priv->invert)
regs->verdict.code = NFT_BREAK;
@ -206,21 +200,10 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
if (!(set->flags & NFT_SET_EVAL))
return -EINVAL;
priv->expr = nft_expr_init(ctx, tb[NFTA_DYNSET_EXPR]);
priv->expr = nft_set_elem_expr_alloc(ctx, set,
tb[NFTA_DYNSET_EXPR]);
if (IS_ERR(priv->expr))
return PTR_ERR(priv->expr);
err = -EOPNOTSUPP;
if (!(priv->expr->ops->type->flags & NFT_EXPR_STATEFUL))
goto err1;
if (priv->expr->ops->type->flags & NFT_EXPR_GC) {
if (set->flags & NFT_SET_TIMEOUT)
goto err1;
if (!set->ops->gc_init)
goto err1;
set->ops->gc_init(set);
}
}
nft_set_ext_prepare(&priv->tmpl);

View file

@ -43,6 +43,7 @@ void nft_lookup_eval(const struct nft_expr *expr,
nft_data_copy(&regs->data[priv->dreg],
nft_set_ext_data(ext), set->dlen);
nft_set_elem_update_expr(ext, regs, pkt);
}
static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = {

View file

@ -293,8 +293,7 @@ static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features,
return true;
}
struct nft_set_type nft_set_bitmap_type __read_mostly = {
.owner = THIS_MODULE,
const struct nft_set_type nft_set_bitmap_type = {
.ops = {
.privsize = nft_bitmap_privsize,
.elemsize = offsetof(struct nft_bitmap_elem, ext),

View file

@ -662,8 +662,7 @@ static bool nft_hash_fast_estimate(const struct nft_set_desc *desc, u32 features
return true;
}
struct nft_set_type nft_set_rhash_type __read_mostly = {
.owner = THIS_MODULE,
const struct nft_set_type nft_set_rhash_type = {
.features = NFT_SET_MAP | NFT_SET_OBJECT |
NFT_SET_TIMEOUT | NFT_SET_EVAL,
.ops = {
@ -686,8 +685,7 @@ struct nft_set_type nft_set_rhash_type __read_mostly = {
},
};
struct nft_set_type nft_set_hash_type __read_mostly = {
.owner = THIS_MODULE,
const struct nft_set_type nft_set_hash_type = {
.features = NFT_SET_MAP | NFT_SET_OBJECT,
.ops = {
.privsize = nft_hash_privsize,
@ -706,8 +704,7 @@ struct nft_set_type nft_set_hash_type __read_mostly = {
},
};
struct nft_set_type nft_set_hash_fast_type __read_mostly = {
.owner = THIS_MODULE,
const struct nft_set_type nft_set_hash_fast_type = {
.features = NFT_SET_MAP | NFT_SET_OBJECT,
.ops = {
.privsize = nft_hash_privsize,

View file

@ -330,143 +330,21 @@
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/log2.h>
#include <linux/module.h>
#include <linux/netlink.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_core.h>
#include <uapi/linux/netfilter/nf_tables.h>
#include <net/ipv6.h> /* For the maximum length of a field */
#include <linux/bitmap.h>
#include <linux/bitops.h>
/* Count of concatenated fields depends on count of 32-bit nftables registers */
#define NFT_PIPAPO_MAX_FIELDS NFT_REG32_COUNT
/* Largest supported field size */
#define NFT_PIPAPO_MAX_BYTES (sizeof(struct in6_addr))
#define NFT_PIPAPO_MAX_BITS (NFT_PIPAPO_MAX_BYTES * BITS_PER_BYTE)
/* Number of bits to be grouped together in lookup table buckets, arbitrary */
#define NFT_PIPAPO_GROUP_BITS 4
#define NFT_PIPAPO_GROUPS_PER_BYTE (BITS_PER_BYTE / NFT_PIPAPO_GROUP_BITS)
/* Fields are padded to 32 bits in input registers */
#define NFT_PIPAPO_GROUPS_PADDED_SIZE(x) \
(round_up((x) / NFT_PIPAPO_GROUPS_PER_BYTE, sizeof(u32)))
#define NFT_PIPAPO_GROUPS_PADDING(x) \
(NFT_PIPAPO_GROUPS_PADDED_SIZE((x)) - (x) / NFT_PIPAPO_GROUPS_PER_BYTE)
/* Number of buckets, given by 2 ^ n, with n grouped bits */
#define NFT_PIPAPO_BUCKETS (1 << NFT_PIPAPO_GROUP_BITS)
/* Each n-bit range maps to up to n * 2 rules */
#define NFT_PIPAPO_MAP_NBITS (const_ilog2(NFT_PIPAPO_MAX_BITS * 2))
/* Use the rest of mapping table buckets for rule indices, but it makes no sense
* to exceed 32 bits
*/
#if BITS_PER_LONG == 64
#define NFT_PIPAPO_MAP_TOBITS 32
#else
#define NFT_PIPAPO_MAP_TOBITS (BITS_PER_LONG - NFT_PIPAPO_MAP_NBITS)
#endif
/* ...which gives us the highest allowed index for a rule */
#define NFT_PIPAPO_RULE0_MAX ((1UL << (NFT_PIPAPO_MAP_TOBITS - 1)) \
- (1UL << NFT_PIPAPO_MAP_NBITS))
#define nft_pipapo_for_each_field(field, index, match) \
for ((field) = (match)->f, (index) = 0; \
(index) < (match)->field_count; \
(index)++, (field)++)
/**
* union nft_pipapo_map_bucket - Bucket of mapping table
* @to: First rule number (in next field) this rule maps to
* @n: Number of rules (in next field) this rule maps to
* @e: If there's no next field, pointer to element this rule maps to
*/
union nft_pipapo_map_bucket {
struct {
#if BITS_PER_LONG == 64
static_assert(NFT_PIPAPO_MAP_TOBITS <= 32);
u32 to;
static_assert(NFT_PIPAPO_MAP_NBITS <= 32);
u32 n;
#else
unsigned long to:NFT_PIPAPO_MAP_TOBITS;
unsigned long n:NFT_PIPAPO_MAP_NBITS;
#endif
};
struct nft_pipapo_elem *e;
};
/**
* struct nft_pipapo_field - Lookup, mapping tables and related data for a field
* @groups: Amount of 4-bit groups
* @rules: Number of inserted rules
* @bsize: Size of each bucket in lookup table, in longs
* @lt: Lookup table: 'groups' rows of NFT_PIPAPO_BUCKETS buckets
* @mt: Mapping table: one bucket per rule
*/
struct nft_pipapo_field {
int groups;
unsigned long rules;
size_t bsize;
unsigned long *lt;
union nft_pipapo_map_bucket *mt;
};
/**
* struct nft_pipapo_match - Data used for lookup and matching
* @field_count Amount of fields in set
* @scratch: Preallocated per-CPU maps for partial matching results
* @bsize_max: Maximum lookup table bucket size of all fields, in longs
* @rcu Matching data is swapped on commits
* @f: Fields, with lookup and mapping tables
*/
struct nft_pipapo_match {
int field_count;
unsigned long * __percpu *scratch;
size_t bsize_max;
struct rcu_head rcu;
struct nft_pipapo_field f[0];
};
#include "nft_set_pipapo_avx2.h"
#include "nft_set_pipapo.h"
/* Current working bitmap index, toggled between field matches */
static DEFINE_PER_CPU(bool, nft_pipapo_scratch_index);
/**
* struct nft_pipapo - Representation of a set
* @match: Currently in-use matching data
* @clone: Copy where pending insertions and deletions are kept
* @groups: Total amount of 4-bit groups for fields in this set
* @width: Total bytes to be matched for one packet, including padding
* @dirty: Working copy has pending insertions or deletions
* @last_gc: Timestamp of last garbage collection run, jiffies
*/
struct nft_pipapo {
struct nft_pipapo_match __rcu *match;
struct nft_pipapo_match *clone;
int groups;
int width;
bool dirty;
unsigned long last_gc;
};
struct nft_pipapo_elem;
/**
* struct nft_pipapo_elem - API-facing representation of single set element
* @ext: nftables API extensions
*/
struct nft_pipapo_elem {
struct nft_set_ext ext;
};
/**
* pipapo_refill() - For each set bit, set bits from selected mapping table item
* @map: Bitmap to be scanned for set bits
@ -484,9 +362,8 @@ struct nft_pipapo_elem {
*
* Return: -1 on no match, bit position on 'match_only', 0 otherwise.
*/
static int pipapo_refill(unsigned long *map, int len, int rules,
unsigned long *dst, union nft_pipapo_map_bucket *mt,
bool match_only)
int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst,
union nft_pipapo_map_bucket *mt, bool match_only)
{
unsigned long bitset;
int k, ret = -1;
@ -559,26 +436,18 @@ static bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
nft_pipapo_for_each_field(f, i, m) {
bool last = i == m->field_count - 1;
unsigned long *lt = f->lt;
int b, group;
int b;
/* For each 4-bit group: select lookup table bucket depending on
/* For each bit group: select lookup table bucket depending on
* packet bytes value, then AND bucket value
*/
for (group = 0; group < f->groups; group += 2) {
u8 v;
if (likely(f->bb == 8))
pipapo_and_field_buckets_8bit(f, res_map, rp);
else
pipapo_and_field_buckets_4bit(f, res_map, rp);
NFT_PIPAPO_GROUP_BITS_ARE_8_OR_4;
v = *rp >> 4;
__bitmap_and(res_map, res_map, lt + v * f->bsize,
f->bsize * BITS_PER_LONG);
lt += f->bsize * NFT_PIPAPO_BUCKETS;
v = *rp & 0x0f;
rp++;
__bitmap_and(res_map, res_map, lt + v * f->bsize,
f->bsize * BITS_PER_LONG);
lt += f->bsize * NFT_PIPAPO_BUCKETS;
}
rp += f->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f);
/* Now populate the bitmap for the next field, unless this is
* the last field, in which case return the matched 'ext'
@ -621,7 +490,7 @@ next_match:
map_index = !map_index;
swap(res_map, fill_map);
rp += NFT_PIPAPO_GROUPS_PADDING(f->groups);
rp += NFT_PIPAPO_GROUPS_PADDING(f);
}
out:
@ -669,26 +538,19 @@ static struct nft_pipapo_elem *pipapo_get(const struct net *net,
nft_pipapo_for_each_field(f, i, m) {
bool last = i == m->field_count - 1;
unsigned long *lt = f->lt;
int b, group;
int b;
/* For each 4-bit group: select lookup table bucket depending on
/* For each bit group: select lookup table bucket depending on
* packet bytes value, then AND bucket value
*/
for (group = 0; group < f->groups; group++) {
u8 v;
if (f->bb == 8)
pipapo_and_field_buckets_8bit(f, res_map, data);
else if (f->bb == 4)
pipapo_and_field_buckets_4bit(f, res_map, data);
else
BUG();
if (group % 2) {
v = *data & 0x0f;
data++;
} else {
v = *data >> 4;
}
__bitmap_and(res_map, res_map, lt + v * f->bsize,
f->bsize * BITS_PER_LONG);
lt += f->bsize * NFT_PIPAPO_BUCKETS;
}
data += f->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f);
/* Now populate the bitmap for the next field, unless this is
* the last field, in which case return the matched 'ext'
@ -713,7 +575,7 @@ next_match:
goto out;
}
data += NFT_PIPAPO_GROUPS_PADDING(f->groups);
data += NFT_PIPAPO_GROUPS_PADDING(f);
/* Swap bitmap indices: fill_map will be the initial bitmap for
* the next field (i.e. the new res_map), and res_map is
@ -736,8 +598,8 @@ out:
* @elem: nftables API element representation containing key data
* @flags: Unused
*/
void *nft_pipapo_get(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem, unsigned int flags)
static void *nft_pipapo_get(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem, unsigned int flags)
{
return pipapo_get(net, set, (const u8 *)elem->key.val.data,
nft_genmask_cur(net));
@ -763,6 +625,10 @@ static int pipapo_resize(struct nft_pipapo_field *f, int old_rules, int rules)
int group, bucket;
new_bucket_size = DIV_ROUND_UP(rules, BITS_PER_LONG);
#ifdef NFT_PIPAPO_ALIGN
new_bucket_size = roundup(new_bucket_size,
NFT_PIPAPO_ALIGN / sizeof(*new_lt));
#endif
if (new_bucket_size == f->bsize)
goto mt;
@ -772,15 +638,18 @@ static int pipapo_resize(struct nft_pipapo_field *f, int old_rules, int rules)
else
copy = new_bucket_size;
new_lt = kvzalloc(f->groups * NFT_PIPAPO_BUCKETS * new_bucket_size *
sizeof(*new_lt), GFP_KERNEL);
new_lt = kvzalloc(f->groups * NFT_PIPAPO_BUCKETS(f->bb) *
new_bucket_size * sizeof(*new_lt) +
NFT_PIPAPO_ALIGN_HEADROOM,
GFP_KERNEL);
if (!new_lt)
return -ENOMEM;
new_p = new_lt;
old_p = old_lt;
new_p = NFT_PIPAPO_LT_ALIGN(new_lt);
old_p = NFT_PIPAPO_LT_ALIGN(old_lt);
for (group = 0; group < f->groups; group++) {
for (bucket = 0; bucket < NFT_PIPAPO_BUCKETS; bucket++) {
for (bucket = 0; bucket < NFT_PIPAPO_BUCKETS(f->bb); bucket++) {
memcpy(new_p, old_p, copy * sizeof(*new_p));
new_p += copy;
old_p += copy;
@ -807,7 +676,7 @@ mt:
if (new_lt) {
f->bsize = new_bucket_size;
f->lt = new_lt;
NFT_PIPAPO_LT_ASSIGN(f, new_lt);
kvfree(old_lt);
}
@ -829,12 +698,195 @@ static void pipapo_bucket_set(struct nft_pipapo_field *f, int rule, int group,
{
unsigned long *pos;
pos = f->lt + f->bsize * NFT_PIPAPO_BUCKETS * group;
pos = NFT_PIPAPO_LT_ALIGN(f->lt);
pos += f->bsize * NFT_PIPAPO_BUCKETS(f->bb) * group;
pos += f->bsize * v;
__set_bit(rule, pos);
}
/**
* pipapo_lt_4b_to_8b() - Switch lookup table group width from 4 bits to 8 bits
* @old_groups: Number of current groups
* @bsize: Size of one bucket, in longs
* @old_lt: Pointer to the current lookup table
* @new_lt: Pointer to the new, pre-allocated lookup table
*
* Each bucket with index b in the new lookup table, belonging to group g, is
* filled with the bit intersection between:
* - bucket with index given by the upper 4 bits of b, from group g, and
* - bucket with index given by the lower 4 bits of b, from group g + 1
*
* That is, given buckets from the new lookup table N(x, y) and the old lookup
* table O(x, y), with x bucket index, and y group index:
*
* N(b, g) := O(b / 16, g) & O(b % 16, g + 1)
*
* This ensures equivalence of the matching results on lookup. Two examples in
* pictures:
*
* bucket
* group 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 ... 254 255
* 0 ^
* 1 | ^
* ... ( & ) |
* / \ |
* / \ .-( & )-.
* / bucket \ | |
* group 0 / 1 2 3 \ 4 5 6 7 8 9 10 11 12 13 |14 15 |
* 0 / \ | |
* 1 \ | |
* 2 | --'
* 3 '-
* ...
*/
static void pipapo_lt_4b_to_8b(int old_groups, int bsize,
unsigned long *old_lt, unsigned long *new_lt)
{
int g, b, i;
for (g = 0; g < old_groups / 2; g++) {
int src_g0 = g * 2, src_g1 = g * 2 + 1;
for (b = 0; b < NFT_PIPAPO_BUCKETS(8); b++) {
int src_b0 = b / NFT_PIPAPO_BUCKETS(4);
int src_b1 = b % NFT_PIPAPO_BUCKETS(4);
int src_i0 = src_g0 * NFT_PIPAPO_BUCKETS(4) + src_b0;
int src_i1 = src_g1 * NFT_PIPAPO_BUCKETS(4) + src_b1;
for (i = 0; i < bsize; i++) {
*new_lt = old_lt[src_i0 * bsize + i] &
old_lt[src_i1 * bsize + i];
new_lt++;
}
}
}
}
/**
* pipapo_lt_8b_to_4b() - Switch lookup table group width from 8 bits to 4 bits
* @old_groups: Number of current groups
* @bsize: Size of one bucket, in longs
* @old_lt: Pointer to the current lookup table
* @new_lt: Pointer to the new, pre-allocated lookup table
*
* Each bucket with index b in the new lookup table, belonging to group g, is
* filled with the bit union of:
* - all the buckets with index such that the upper four bits of the lower byte
* equal b, from group g, with g odd
* - all the buckets with index such that the lower four bits equal b, from
* group g, with g even
*
* That is, given buckets from the new lookup table N(x, y) and the old lookup
* table O(x, y), with x bucket index, and y group index:
*
* - with g odd: N(b, g) := U(O(x, g) for each x : x = (b & 0xf0) >> 4)
* - with g even: N(b, g) := U(O(x, g) for each x : x = b & 0x0f)
*
* where U() denotes the arbitrary union operation (binary OR of n terms). This
* ensures equivalence of the matching results on lookup.
*/
static void pipapo_lt_8b_to_4b(int old_groups, int bsize,
unsigned long *old_lt, unsigned long *new_lt)
{
int g, b, bsrc, i;
memset(new_lt, 0, old_groups * 2 * NFT_PIPAPO_BUCKETS(4) * bsize *
sizeof(unsigned long));
for (g = 0; g < old_groups * 2; g += 2) {
int src_g = g / 2;
for (b = 0; b < NFT_PIPAPO_BUCKETS(4); b++) {
for (bsrc = NFT_PIPAPO_BUCKETS(8) * src_g;
bsrc < NFT_PIPAPO_BUCKETS(8) * (src_g + 1);
bsrc++) {
if (((bsrc & 0xf0) >> 4) != b)
continue;
for (i = 0; i < bsize; i++)
new_lt[i] |= old_lt[bsrc * bsize + i];
}
new_lt += bsize;
}
for (b = 0; b < NFT_PIPAPO_BUCKETS(4); b++) {
for (bsrc = NFT_PIPAPO_BUCKETS(8) * src_g;
bsrc < NFT_PIPAPO_BUCKETS(8) * (src_g + 1);
bsrc++) {
if ((bsrc & 0x0f) != b)
continue;
for (i = 0; i < bsize; i++)
new_lt[i] |= old_lt[bsrc * bsize + i];
}
new_lt += bsize;
}
}
}
/**
* pipapo_lt_bits_adjust() - Adjust group size for lookup table if needed
* @f: Field containing lookup table
*/
static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f)
{
unsigned long *new_lt;
int groups, bb;
size_t lt_size;
lt_size = f->groups * NFT_PIPAPO_BUCKETS(f->bb) * f->bsize *
sizeof(*f->lt);
if (f->bb == NFT_PIPAPO_GROUP_BITS_SMALL_SET &&
lt_size > NFT_PIPAPO_LT_SIZE_HIGH) {
groups = f->groups * 2;
bb = NFT_PIPAPO_GROUP_BITS_LARGE_SET;
lt_size = groups * NFT_PIPAPO_BUCKETS(bb) * f->bsize *
sizeof(*f->lt);
} else if (f->bb == NFT_PIPAPO_GROUP_BITS_LARGE_SET &&
lt_size < NFT_PIPAPO_LT_SIZE_LOW) {
groups = f->groups / 2;
bb = NFT_PIPAPO_GROUP_BITS_SMALL_SET;
lt_size = groups * NFT_PIPAPO_BUCKETS(bb) * f->bsize *
sizeof(*f->lt);
/* Don't increase group width if the resulting lookup table size
* would exceed the upper size threshold for a "small" set.
*/
if (lt_size > NFT_PIPAPO_LT_SIZE_HIGH)
return;
} else {
return;
}
new_lt = kvzalloc(lt_size + NFT_PIPAPO_ALIGN_HEADROOM, GFP_KERNEL);
if (!new_lt)
return;
NFT_PIPAPO_GROUP_BITS_ARE_8_OR_4;
if (f->bb == 4 && bb == 8) {
pipapo_lt_4b_to_8b(f->groups, f->bsize,
NFT_PIPAPO_LT_ALIGN(f->lt),
NFT_PIPAPO_LT_ALIGN(new_lt));
} else if (f->bb == 8 && bb == 4) {
pipapo_lt_8b_to_4b(f->groups, f->bsize,
NFT_PIPAPO_LT_ALIGN(f->lt),
NFT_PIPAPO_LT_ALIGN(new_lt));
} else {
BUG();
}
f->groups = groups;
f->bb = bb;
kvfree(f->lt);
NFT_PIPAPO_LT_ASSIGN(f, new_lt);
}
/**
* pipapo_insert() - Insert new rule in field given input key and mask length
* @f: Field containing lookup table
@ -849,7 +901,7 @@ static void pipapo_bucket_set(struct nft_pipapo_field *f, int rule, int group,
static int pipapo_insert(struct nft_pipapo_field *f, const uint8_t *k,
int mask_bits)
{
int rule = f->rules++, group, ret;
int rule = f->rules++, group, ret, bit_offset = 0;
ret = pipapo_resize(f, f->rules - 1, f->rules);
if (ret)
@ -859,28 +911,33 @@ static int pipapo_insert(struct nft_pipapo_field *f, const uint8_t *k,
int i, v;
u8 mask;
if (group % 2)
v = k[group / 2] & 0x0f;
else
v = k[group / 2] >> 4;
v = k[group / (BITS_PER_BYTE / f->bb)];
v &= GENMASK(BITS_PER_BYTE - bit_offset - 1, 0);
v >>= (BITS_PER_BYTE - bit_offset) - f->bb;
if (mask_bits >= (group + 1) * 4) {
bit_offset += f->bb;
bit_offset %= BITS_PER_BYTE;
if (mask_bits >= (group + 1) * f->bb) {
/* Not masked */
pipapo_bucket_set(f, rule, group, v);
} else if (mask_bits <= group * 4) {
} else if (mask_bits <= group * f->bb) {
/* Completely masked */
for (i = 0; i < NFT_PIPAPO_BUCKETS; i++)
for (i = 0; i < NFT_PIPAPO_BUCKETS(f->bb); i++)
pipapo_bucket_set(f, rule, group, i);
} else {
/* The mask limit falls on this group */
mask = 0x0f >> (mask_bits - group * 4);
for (i = 0; i < NFT_PIPAPO_BUCKETS; i++) {
mask = GENMASK(f->bb - 1, 0);
mask >>= mask_bits - group * f->bb;
for (i = 0; i < NFT_PIPAPO_BUCKETS(f->bb); i++) {
if ((i & ~mask) == (v & ~mask))
pipapo_bucket_set(f, rule, group, i);
}
}
}
pipapo_lt_bits_adjust(f);
return 1;
}
@ -1053,8 +1110,12 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
for_each_possible_cpu(i) {
unsigned long *scratch;
#ifdef NFT_PIPAPO_ALIGN
unsigned long *scratch_aligned;
#endif
scratch = kzalloc_node(bsize_max * sizeof(*scratch) * 2,
scratch = kzalloc_node(bsize_max * sizeof(*scratch) * 2 +
NFT_PIPAPO_ALIGN_HEADROOM,
GFP_KERNEL, cpu_to_node(i));
if (!scratch) {
/* On failure, there's no need to undo previous
@ -1070,6 +1131,11 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
kfree(*per_cpu_ptr(clone->scratch, i));
*per_cpu_ptr(clone->scratch, i) = scratch;
#ifdef NFT_PIPAPO_ALIGN
scratch_aligned = NFT_PIPAPO_LT_ALIGN(scratch);
*per_cpu_ptr(clone->scratch_aligned, i) = scratch_aligned;
#endif
}
return 0;
@ -1123,11 +1189,11 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
return -ENOSPC;
if (memcmp(start_p, end_p,
f->groups / NFT_PIPAPO_GROUPS_PER_BYTE) > 0)
f->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f)) > 0)
return -EINVAL;
start_p += NFT_PIPAPO_GROUPS_PADDED_SIZE(f->groups);
end_p += NFT_PIPAPO_GROUPS_PADDED_SIZE(f->groups);
start_p += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
end_p += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
}
/* Insert */
@ -1141,22 +1207,19 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
rulemap[i].to = f->rules;
ret = memcmp(start, end,
f->groups / NFT_PIPAPO_GROUPS_PER_BYTE);
if (!ret) {
ret = pipapo_insert(f, start,
f->groups * NFT_PIPAPO_GROUP_BITS);
} else {
ret = pipapo_expand(f, start, end,
f->groups * NFT_PIPAPO_GROUP_BITS);
}
f->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f));
if (!ret)
ret = pipapo_insert(f, start, f->groups * f->bb);
else
ret = pipapo_expand(f, start, end, f->groups * f->bb);
if (f->bsize > bsize_max)
bsize_max = f->bsize;
rulemap[i].n = ret;
start += NFT_PIPAPO_GROUPS_PADDED_SIZE(f->groups);
end += NFT_PIPAPO_GROUPS_PADDED_SIZE(f->groups);
start += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
end += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
}
if (!*this_cpu_ptr(m->scratch) || bsize_max > m->bsize_max) {
@ -1200,23 +1263,35 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old)
if (!new->scratch)
goto out_scratch;
#ifdef NFT_PIPAPO_ALIGN
new->scratch_aligned = alloc_percpu(*new->scratch_aligned);
if (!new->scratch_aligned)
goto out_scratch;
#endif
rcu_head_init(&new->rcu);
src = old->f;
dst = new->f;
for (i = 0; i < old->field_count; i++) {
unsigned long *new_lt;
memcpy(dst, src, offsetof(struct nft_pipapo_field, lt));
dst->lt = kvzalloc(src->groups * NFT_PIPAPO_BUCKETS *
src->bsize * sizeof(*dst->lt),
GFP_KERNEL);
if (!dst->lt)
new_lt = kvzalloc(src->groups * NFT_PIPAPO_BUCKETS(src->bb) *
src->bsize * sizeof(*dst->lt) +
NFT_PIPAPO_ALIGN_HEADROOM,
GFP_KERNEL);
if (!new_lt)
goto out_lt;
memcpy(dst->lt, src->lt,
NFT_PIPAPO_LT_ASSIGN(dst, new_lt);
memcpy(NFT_PIPAPO_LT_ALIGN(new_lt),
NFT_PIPAPO_LT_ALIGN(src->lt),
src->bsize * sizeof(*dst->lt) *
src->groups * NFT_PIPAPO_BUCKETS);
src->groups * NFT_PIPAPO_BUCKETS(src->bb));
dst->mt = kvmalloc(src->rules * sizeof(*src->mt), GFP_KERNEL);
if (!dst->mt)
@ -1237,8 +1312,11 @@ out_lt:
kvfree(dst->lt);
dst--;
}
free_percpu(new->scratch);
#ifdef NFT_PIPAPO_ALIGN
free_percpu(new->scratch_aligned);
#endif
out_scratch:
free_percpu(new->scratch);
kfree(new);
return ERR_PTR(-ENOMEM);
@ -1394,9 +1472,10 @@ static void pipapo_drop(struct nft_pipapo_match *m,
unsigned long *pos;
int b;
pos = f->lt + g * NFT_PIPAPO_BUCKETS * f->bsize;
pos = NFT_PIPAPO_LT_ALIGN(f->lt) + g *
NFT_PIPAPO_BUCKETS(f->bb) * f->bsize;
for (b = 0; b < NFT_PIPAPO_BUCKETS; b++) {
for (b = 0; b < NFT_PIPAPO_BUCKETS(f->bb); b++) {
bitmap_cut(pos, pos, rulemap[i].to,
rulemap[i].n,
f->bsize * BITS_PER_LONG);
@ -1414,6 +1493,8 @@ static void pipapo_drop(struct nft_pipapo_match *m,
;
}
f->rules -= rulemap[i].n;
pipapo_lt_bits_adjust(f);
}
}
@ -1498,6 +1579,9 @@ static void pipapo_reclaim_match(struct rcu_head *rcu)
for_each_possible_cpu(i)
kfree(*per_cpu_ptr(m->scratch, i));
#ifdef NFT_PIPAPO_ALIGN
free_percpu(m->scratch_aligned);
#endif
free_percpu(m->scratch);
pipapo_free_fields(m);
@ -1690,30 +1774,33 @@ static bool nft_pipapo_flush(const struct net *net, const struct nft_set *set,
static int pipapo_get_boundaries(struct nft_pipapo_field *f, int first_rule,
int rule_count, u8 *left, u8 *right)
{
int g, mask_len = 0, bit_offset = 0;
u8 *l = left, *r = right;
int g, mask_len = 0;
for (g = 0; g < f->groups; g++) {
int b, x0, x1;
x0 = -1;
x1 = -1;
for (b = 0; b < NFT_PIPAPO_BUCKETS; b++) {
for (b = 0; b < NFT_PIPAPO_BUCKETS(f->bb); b++) {
unsigned long *pos;
pos = f->lt + (g * NFT_PIPAPO_BUCKETS + b) * f->bsize;
pos = NFT_PIPAPO_LT_ALIGN(f->lt) +
(g * NFT_PIPAPO_BUCKETS(f->bb) + b) * f->bsize;
if (test_bit(first_rule, pos) && x0 == -1)
x0 = b;
if (test_bit(first_rule + rule_count - 1, pos))
x1 = b;
}
if (g % 2) {
*(l++) |= x0 & 0x0f;
*(r++) |= x1 & 0x0f;
} else {
*l |= x0 << 4;
*r |= x1 << 4;
*l |= x0 << (BITS_PER_BYTE - f->bb - bit_offset);
*r |= x1 << (BITS_PER_BYTE - f->bb - bit_offset);
bit_offset += f->bb;
if (bit_offset >= BITS_PER_BYTE) {
bit_offset %= BITS_PER_BYTE;
l++;
r++;
}
if (x1 - x0 == 0)
@ -1748,8 +1835,9 @@ static bool pipapo_match_field(struct nft_pipapo_field *f,
pipapo_get_boundaries(f, first_rule, rule_count, left, right);
return !memcmp(start, left, f->groups / NFT_PIPAPO_GROUPS_PER_BYTE) &&
!memcmp(end, right, f->groups / NFT_PIPAPO_GROUPS_PER_BYTE);
return !memcmp(start, left,
f->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f)) &&
!memcmp(end, right, f->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f));
}
/**
@ -1801,8 +1889,8 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
rules_fx = f->mt[start].n;
start = f->mt[start].to;
match_start += NFT_PIPAPO_GROUPS_PADDED_SIZE(f->groups);
match_end += NFT_PIPAPO_GROUPS_PADDED_SIZE(f->groups);
match_start += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
match_end += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
}
if (i == m->field_count) {
@ -1885,56 +1973,24 @@ static u64 nft_pipapo_privsize(const struct nlattr * const nla[],
}
/**
* nft_pipapo_estimate() - Estimate set size, space and lookup complexity
* @desc: Set description, element count and field description used here
* nft_pipapo_estimate() - Set size, space and lookup complexity
* @desc: Set description, element count and field description used
* @features: Flags: NFT_SET_INTERVAL needs to be there
* @est: Storage for estimation data
*
* The size for this set type can vary dramatically, as it depends on the number
* of rules (composing netmasks) the entries expand to. We compute the worst
* case here.
*
* In general, for a non-ranged entry or a single composing netmask, we need
* one bit in each of the sixteen NFT_PIPAPO_BUCKETS, for each 4-bit group (that
* is, each input bit needs four bits of matching data), plus a bucket in the
* mapping table for each field.
*
* Return: true only for compatible range concatenations
* Return: true if set description is compatible, false otherwise
*/
static bool nft_pipapo_estimate(const struct nft_set_desc *desc, u32 features,
struct nft_set_estimate *est)
{
unsigned long entry_size;
int i;
if (!(features & NFT_SET_INTERVAL) || desc->field_count <= 1)
if (!(features & NFT_SET_INTERVAL) ||
desc->field_count < NFT_PIPAPO_MIN_FIELDS)
return false;
for (i = 0, entry_size = 0; i < desc->field_count; i++) {
unsigned long rules;
if (desc->field_len[i] > NFT_PIPAPO_MAX_BYTES)
return false;
/* Worst-case ranges for each concatenated field: each n-bit
* field can expand to up to n * 2 rules in each bucket, and
* each rule also needs a mapping bucket.
*/
rules = ilog2(desc->field_len[i] * BITS_PER_BYTE) * 2;
entry_size += rules * NFT_PIPAPO_BUCKETS / BITS_PER_BYTE;
entry_size += rules * sizeof(union nft_pipapo_map_bucket);
}
/* Rules in lookup and mapping tables are needed for each entry */
est->size = desc->size * entry_size;
if (est->size && div_u64(est->size, desc->size) != entry_size)
est->size = pipapo_estimate_size(desc);
if (!est->size)
return false;
est->size += sizeof(struct nft_pipapo) +
sizeof(struct nft_pipapo_match) * 2;
est->size += sizeof(struct nft_pipapo_field) * desc->field_count;
est->lookup = NFT_SET_CLASS_O_LOG_N;
est->space = NFT_SET_CLASS_O_N;
@ -1961,38 +2017,52 @@ static int nft_pipapo_init(const struct nft_set *set,
struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_match *m;
struct nft_pipapo_field *f;
int err, i;
int err, i, field_count;
if (desc->field_count > NFT_PIPAPO_MAX_FIELDS)
field_count = desc->field_count ? : 1;
if (field_count > NFT_PIPAPO_MAX_FIELDS)
return -EINVAL;
m = kmalloc(sizeof(*priv->match) + sizeof(*f) * desc->field_count,
m = kmalloc(sizeof(*priv->match) + sizeof(*f) * field_count,
GFP_KERNEL);
if (!m)
return -ENOMEM;
m->field_count = desc->field_count;
m->field_count = field_count;
m->bsize_max = 0;
m->scratch = alloc_percpu(unsigned long *);
if (!m->scratch) {
err = -ENOMEM;
goto out_free;
goto out_scratch;
}
for_each_possible_cpu(i)
*per_cpu_ptr(m->scratch, i) = NULL;
#ifdef NFT_PIPAPO_ALIGN
m->scratch_aligned = alloc_percpu(unsigned long *);
if (!m->scratch_aligned) {
err = -ENOMEM;
goto out_free;
}
for_each_possible_cpu(i)
*per_cpu_ptr(m->scratch_aligned, i) = NULL;
#endif
rcu_head_init(&m->rcu);
nft_pipapo_for_each_field(f, i, m) {
f->groups = desc->field_len[i] * NFT_PIPAPO_GROUPS_PER_BYTE;
priv->groups += f->groups;
int len = desc->field_len[i] ? : set->klen;
priv->width += round_up(desc->field_len[i], sizeof(u32));
f->bb = NFT_PIPAPO_GROUP_BITS_INIT;
f->groups = len * NFT_PIPAPO_GROUPS_PER_BYTE(f);
priv->width += round_up(len, sizeof(u32));
f->bsize = 0;
f->rules = 0;
f->lt = NULL;
NFT_PIPAPO_LT_ASSIGN(f, NULL);
f->mt = NULL;
}
@ -2010,7 +2080,11 @@ static int nft_pipapo_init(const struct nft_set *set,
return 0;
out_free:
#ifdef NFT_PIPAPO_ALIGN
free_percpu(m->scratch_aligned);
#endif
free_percpu(m->scratch);
out_scratch:
kfree(m);
return err;
@ -2045,16 +2119,21 @@ static void nft_pipapo_destroy(const struct nft_set *set)
nft_set_elem_destroy(set, e, true);
}
#ifdef NFT_PIPAPO_ALIGN
free_percpu(m->scratch_aligned);
#endif
for_each_possible_cpu(cpu)
kfree(*per_cpu_ptr(m->scratch, cpu));
free_percpu(m->scratch);
pipapo_free_fields(m);
kfree(m);
priv->match = NULL;
}
if (priv->clone) {
#ifdef NFT_PIPAPO_ALIGN
free_percpu(priv->clone->scratch_aligned);
#endif
for_each_possible_cpu(cpu)
kfree(*per_cpu_ptr(priv->clone->scratch, cpu));
free_percpu(priv->clone->scratch);
@ -2081,8 +2160,7 @@ static void nft_pipapo_gc_init(const struct nft_set *set)
priv->last_gc = jiffies;
}
struct nft_set_type nft_set_pipapo_type __read_mostly = {
.owner = THIS_MODULE,
const struct nft_set_type nft_set_pipapo_type = {
.features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT |
NFT_SET_TIMEOUT,
.ops = {
@ -2102,3 +2180,26 @@ struct nft_set_type nft_set_pipapo_type __read_mostly = {
.elemsize = offsetof(struct nft_pipapo_elem, ext),
},
};
#if defined(CONFIG_X86_64) && defined(CONFIG_AS_AVX2)
const struct nft_set_type nft_set_pipapo_avx2_type = {
.features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT |
NFT_SET_TIMEOUT,
.ops = {
.lookup = nft_pipapo_avx2_lookup,
.insert = nft_pipapo_insert,
.activate = nft_pipapo_activate,
.deactivate = nft_pipapo_deactivate,
.flush = nft_pipapo_flush,
.remove = nft_pipapo_remove,
.walk = nft_pipapo_walk,
.get = nft_pipapo_get,
.privsize = nft_pipapo_privsize,
.estimate = nft_pipapo_avx2_estimate,
.init = nft_pipapo_init,
.destroy = nft_pipapo_destroy,
.gc_init = nft_pipapo_gc_init,
.elemsize = offsetof(struct nft_pipapo_elem, ext),
},
};
#endif

View file

@ -0,0 +1,280 @@
// SPDX-License-Identifier: GPL-2.0-only
#ifndef _NFT_SET_PIPAPO_H
#include <linux/log2.h>
#include <net/ipv6.h> /* For the maximum length of a field */
/* Count of concatenated fields depends on count of 32-bit nftables registers */
#define NFT_PIPAPO_MAX_FIELDS NFT_REG32_COUNT
/* Restrict usage to multiple fields, make sure rbtree is used otherwise */
#define NFT_PIPAPO_MIN_FIELDS 2
/* Largest supported field size */
#define NFT_PIPAPO_MAX_BYTES (sizeof(struct in6_addr))
#define NFT_PIPAPO_MAX_BITS (NFT_PIPAPO_MAX_BYTES * BITS_PER_BYTE)
/* Bits to be grouped together in table buckets depending on set size */
#define NFT_PIPAPO_GROUP_BITS_INIT NFT_PIPAPO_GROUP_BITS_SMALL_SET
#define NFT_PIPAPO_GROUP_BITS_SMALL_SET 8
#define NFT_PIPAPO_GROUP_BITS_LARGE_SET 4
#define NFT_PIPAPO_GROUP_BITS_ARE_8_OR_4 \
BUILD_BUG_ON((NFT_PIPAPO_GROUP_BITS_SMALL_SET != 8) || \
(NFT_PIPAPO_GROUP_BITS_LARGE_SET != 4))
#define NFT_PIPAPO_GROUPS_PER_BYTE(f) (BITS_PER_BYTE / (f)->bb)
/* If a lookup table gets bigger than NFT_PIPAPO_LT_SIZE_HIGH, switch to the
* small group width, and switch to the big group width if the table gets
* smaller than NFT_PIPAPO_LT_SIZE_LOW.
*
* Picking 2MiB as threshold (for a single table) avoids as much as possible
* crossing page boundaries on most architectures (x86-64 and MIPS huge pages,
* ARMv7 supersections, POWER "large" pages, SPARC Level 1 regions, etc.), which
* keeps performance nice in case kvmalloc() gives us non-contiguous areas.
*/
#define NFT_PIPAPO_LT_SIZE_THRESHOLD (1 << 21)
#define NFT_PIPAPO_LT_SIZE_HYSTERESIS (1 << 16)
#define NFT_PIPAPO_LT_SIZE_HIGH NFT_PIPAPO_LT_SIZE_THRESHOLD
#define NFT_PIPAPO_LT_SIZE_LOW NFT_PIPAPO_LT_SIZE_THRESHOLD - \
NFT_PIPAPO_LT_SIZE_HYSTERESIS
/* Fields are padded to 32 bits in input registers */
#define NFT_PIPAPO_GROUPS_PADDED_SIZE(f) \
(round_up((f)->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f), sizeof(u32)))
#define NFT_PIPAPO_GROUPS_PADDING(f) \
(NFT_PIPAPO_GROUPS_PADDED_SIZE(f) - (f)->groups / \
NFT_PIPAPO_GROUPS_PER_BYTE(f))
/* Number of buckets given by 2 ^ n, with n bucket bits */
#define NFT_PIPAPO_BUCKETS(bb) (1 << (bb))
/* Each n-bit range maps to up to n * 2 rules */
#define NFT_PIPAPO_MAP_NBITS (const_ilog2(NFT_PIPAPO_MAX_BITS * 2))
/* Use the rest of mapping table buckets for rule indices, but it makes no sense
* to exceed 32 bits
*/
#if BITS_PER_LONG == 64
#define NFT_PIPAPO_MAP_TOBITS 32
#else
#define NFT_PIPAPO_MAP_TOBITS (BITS_PER_LONG - NFT_PIPAPO_MAP_NBITS)
#endif
/* ...which gives us the highest allowed index for a rule */
#define NFT_PIPAPO_RULE0_MAX ((1UL << (NFT_PIPAPO_MAP_TOBITS - 1)) \
- (1UL << NFT_PIPAPO_MAP_NBITS))
/* Definitions for vectorised implementations */
#ifdef NFT_PIPAPO_ALIGN
#define NFT_PIPAPO_ALIGN_HEADROOM \
(NFT_PIPAPO_ALIGN - ARCH_KMALLOC_MINALIGN)
#define NFT_PIPAPO_LT_ALIGN(lt) (PTR_ALIGN((lt), NFT_PIPAPO_ALIGN))
#define NFT_PIPAPO_LT_ASSIGN(field, x) \
do { \
(field)->lt_aligned = NFT_PIPAPO_LT_ALIGN(x); \
(field)->lt = (x); \
} while (0)
#else
#define NFT_PIPAPO_ALIGN_HEADROOM 0
#define NFT_PIPAPO_LT_ALIGN(lt) (lt)
#define NFT_PIPAPO_LT_ASSIGN(field, x) ((field)->lt = (x))
#endif /* NFT_PIPAPO_ALIGN */
#define nft_pipapo_for_each_field(field, index, match) \
for ((field) = (match)->f, (index) = 0; \
(index) < (match)->field_count; \
(index)++, (field)++)
/**
* union nft_pipapo_map_bucket - Bucket of mapping table
* @to: First rule number (in next field) this rule maps to
* @n: Number of rules (in next field) this rule maps to
* @e: If there's no next field, pointer to element this rule maps to
*/
union nft_pipapo_map_bucket {
struct {
#if BITS_PER_LONG == 64
static_assert(NFT_PIPAPO_MAP_TOBITS <= 32);
u32 to;
static_assert(NFT_PIPAPO_MAP_NBITS <= 32);
u32 n;
#else
unsigned long to:NFT_PIPAPO_MAP_TOBITS;
unsigned long n:NFT_PIPAPO_MAP_NBITS;
#endif
};
struct nft_pipapo_elem *e;
};
/**
* struct nft_pipapo_field - Lookup, mapping tables and related data for a field
* @groups: Amount of bit groups
* @rules: Number of inserted rules
* @bsize: Size of each bucket in lookup table, in longs
* @bb: Number of bits grouped together in lookup table buckets
* @lt: Lookup table: 'groups' rows of buckets
* @lt_aligned: Version of @lt aligned to NFT_PIPAPO_ALIGN bytes
* @mt: Mapping table: one bucket per rule
*/
struct nft_pipapo_field {
int groups;
unsigned long rules;
size_t bsize;
int bb;
#ifdef NFT_PIPAPO_ALIGN
unsigned long *lt_aligned;
#endif
unsigned long *lt;
union nft_pipapo_map_bucket *mt;
};
/**
* struct nft_pipapo_match - Data used for lookup and matching
* @field_count Amount of fields in set
* @scratch: Preallocated per-CPU maps for partial matching results
* @scratch_aligned: Version of @scratch aligned to NFT_PIPAPO_ALIGN bytes
* @bsize_max: Maximum lookup table bucket size of all fields, in longs
* @rcu Matching data is swapped on commits
* @f: Fields, with lookup and mapping tables
*/
struct nft_pipapo_match {
int field_count;
#ifdef NFT_PIPAPO_ALIGN
unsigned long * __percpu *scratch_aligned;
#endif
unsigned long * __percpu *scratch;
size_t bsize_max;
struct rcu_head rcu;
struct nft_pipapo_field f[];
};
/**
* struct nft_pipapo - Representation of a set
* @match: Currently in-use matching data
* @clone: Copy where pending insertions and deletions are kept
* @width: Total bytes to be matched for one packet, including padding
* @dirty: Working copy has pending insertions or deletions
* @last_gc: Timestamp of last garbage collection run, jiffies
*/
struct nft_pipapo {
struct nft_pipapo_match __rcu *match;
struct nft_pipapo_match *clone;
int width;
bool dirty;
unsigned long last_gc;
};
struct nft_pipapo_elem;
/**
* struct nft_pipapo_elem - API-facing representation of single set element
* @ext: nftables API extensions
*/
struct nft_pipapo_elem {
struct nft_set_ext ext;
};
int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst,
union nft_pipapo_map_bucket *mt, bool match_only);
/**
* pipapo_and_field_buckets_4bit() - Intersect 4-bit buckets
* @f: Field including lookup table
* @dst: Area to store result
* @data: Input data selecting table buckets
*/
static inline void pipapo_and_field_buckets_4bit(struct nft_pipapo_field *f,
unsigned long *dst,
const u8 *data)
{
unsigned long *lt = NFT_PIPAPO_LT_ALIGN(f->lt);
int group;
for (group = 0; group < f->groups; group += BITS_PER_BYTE / 4, data++) {
u8 v;
v = *data >> 4;
__bitmap_and(dst, dst, lt + v * f->bsize,
f->bsize * BITS_PER_LONG);
lt += f->bsize * NFT_PIPAPO_BUCKETS(4);
v = *data & 0x0f;
__bitmap_and(dst, dst, lt + v * f->bsize,
f->bsize * BITS_PER_LONG);
lt += f->bsize * NFT_PIPAPO_BUCKETS(4);
}
}
/**
* pipapo_and_field_buckets_8bit() - Intersect 8-bit buckets
* @f: Field including lookup table
* @dst: Area to store result
* @data: Input data selecting table buckets
*/
static inline void pipapo_and_field_buckets_8bit(struct nft_pipapo_field *f,
unsigned long *dst,
const u8 *data)
{
unsigned long *lt = NFT_PIPAPO_LT_ALIGN(f->lt);
int group;
for (group = 0; group < f->groups; group++, data++) {
__bitmap_and(dst, dst, lt + *data * f->bsize,
f->bsize * BITS_PER_LONG);
lt += f->bsize * NFT_PIPAPO_BUCKETS(8);
}
}
/**
* pipapo_estimate_size() - Estimate worst-case for set size
* @desc: Set description, element count and field description used here
*
* The size for this set type can vary dramatically, as it depends on the number
* of rules (composing netmasks) the entries expand to. We compute the worst
* case here.
*
* In general, for a non-ranged entry or a single composing netmask, we need
* one bit in each of the sixteen NFT_PIPAPO_BUCKETS, for each 4-bit group (that
* is, each input bit needs four bits of matching data), plus a bucket in the
* mapping table for each field.
*
* Return: worst-case set size in bytes, 0 on any overflow
*/
static u64 pipapo_estimate_size(const struct nft_set_desc *desc)
{
unsigned long entry_size;
u64 size;
int i;
for (i = 0, entry_size = 0; i < desc->field_count; i++) {
unsigned long rules;
if (desc->field_len[i] > NFT_PIPAPO_MAX_BYTES)
return 0;
/* Worst-case ranges for each concatenated field: each n-bit
* field can expand to up to n * 2 rules in each bucket, and
* each rule also needs a mapping bucket.
*/
rules = ilog2(desc->field_len[i] * BITS_PER_BYTE) * 2;
entry_size += rules *
NFT_PIPAPO_BUCKETS(NFT_PIPAPO_GROUP_BITS_INIT) /
BITS_PER_BYTE;
entry_size += rules * sizeof(union nft_pipapo_map_bucket);
}
/* Rules in lookup and mapping tables are needed for each entry */
size = desc->size * entry_size;
if (size && div_u64(size, desc->size) != entry_size)
return 0;
size += sizeof(struct nft_pipapo) + sizeof(struct nft_pipapo_match) * 2;
size += sizeof(struct nft_pipapo_field) * desc->field_count;
return size;
}
#endif /* _NFT_SET_PIPAPO_H */

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,14 @@
/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef _NFT_SET_PIPAPO_AVX2_H
#ifdef CONFIG_AS_AVX2
#include <asm/fpu/xstate.h>
#define NFT_PIPAPO_ALIGN (XSAVE_YMM_SIZE / BITS_PER_BYTE)
bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext);
bool nft_pipapo_avx2_estimate(const struct nft_set_desc *desc, u32 features,
struct nft_set_estimate *est);
#endif /* CONFIG_AS_AVX2 */
#endif /* _NFT_SET_PIPAPO_AVX2_H */

View file

@ -481,8 +481,7 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
return true;
}
struct nft_set_type nft_set_rbtree_type __read_mostly = {
.owner = THIS_MODULE,
const struct nft_set_type nft_set_rbtree_type = {
.features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT,
.ops = {
.privsize = nft_rbtree_privsize,

View file

@ -11,6 +11,7 @@
#include <net/ip_tunnels.h>
#include <net/vxlan.h>
#include <net/erspan.h>
#include <net/geneve.h>
struct nft_tunnel {
enum nft_tunnel_keys key:8;
@ -144,6 +145,7 @@ struct nft_tunnel_opts {
union {
struct vxlan_metadata vxlan;
struct erspan_metadata erspan;
u8 data[IP_TUNNEL_OPTS_MAX];
} u;
u32 len;
__be16 flags;
@ -301,9 +303,53 @@ static int nft_tunnel_obj_erspan_init(const struct nlattr *attr,
return 0;
}
static const struct nla_policy nft_tunnel_opts_geneve_policy[NFTA_TUNNEL_KEY_GENEVE_MAX + 1] = {
[NFTA_TUNNEL_KEY_GENEVE_CLASS] = { .type = NLA_U16 },
[NFTA_TUNNEL_KEY_GENEVE_TYPE] = { .type = NLA_U8 },
[NFTA_TUNNEL_KEY_GENEVE_DATA] = { .type = NLA_BINARY, .len = 128 },
};
static int nft_tunnel_obj_geneve_init(const struct nlattr *attr,
struct nft_tunnel_opts *opts)
{
struct geneve_opt *opt = (struct geneve_opt *)opts->u.data + opts->len;
struct nlattr *tb[NFTA_TUNNEL_KEY_GENEVE_MAX + 1];
int err, data_len;
err = nla_parse_nested(tb, NFTA_TUNNEL_KEY_GENEVE_MAX, attr,
nft_tunnel_opts_geneve_policy, NULL);
if (err < 0)
return err;
if (!tb[NFTA_TUNNEL_KEY_GENEVE_CLASS] ||
!tb[NFTA_TUNNEL_KEY_GENEVE_TYPE] ||
!tb[NFTA_TUNNEL_KEY_GENEVE_DATA])
return -EINVAL;
attr = tb[NFTA_TUNNEL_KEY_GENEVE_DATA];
data_len = nla_len(attr);
if (data_len % 4)
return -EINVAL;
opts->len += sizeof(*opt) + data_len;
if (opts->len > IP_TUNNEL_OPTS_MAX)
return -EINVAL;
memcpy(opt->opt_data, nla_data(attr), data_len);
opt->length = data_len / 4;
opt->opt_class = nla_get_be16(tb[NFTA_TUNNEL_KEY_GENEVE_CLASS]);
opt->type = nla_get_u8(tb[NFTA_TUNNEL_KEY_GENEVE_TYPE]);
opts->flags = TUNNEL_GENEVE_OPT;
return 0;
}
static const struct nla_policy nft_tunnel_opts_policy[NFTA_TUNNEL_KEY_OPTS_MAX + 1] = {
[NFTA_TUNNEL_KEY_OPTS_UNSPEC] = {
.strict_start_type = NFTA_TUNNEL_KEY_OPTS_GENEVE },
[NFTA_TUNNEL_KEY_OPTS_VXLAN] = { .type = NLA_NESTED, },
[NFTA_TUNNEL_KEY_OPTS_ERSPAN] = { .type = NLA_NESTED, },
[NFTA_TUNNEL_KEY_OPTS_GENEVE] = { .type = NLA_NESTED, },
};
static int nft_tunnel_obj_opts_init(const struct nft_ctx *ctx,
@ -311,22 +357,43 @@ static int nft_tunnel_obj_opts_init(const struct nft_ctx *ctx,
struct ip_tunnel_info *info,
struct nft_tunnel_opts *opts)
{
struct nlattr *tb[NFTA_TUNNEL_KEY_OPTS_MAX + 1];
int err;
int err, rem, type = 0;
struct nlattr *nla;
err = nla_parse_nested_deprecated(tb, NFTA_TUNNEL_KEY_OPTS_MAX, attr,
nft_tunnel_opts_policy, NULL);
err = nla_validate_nested_deprecated(attr, NFTA_TUNNEL_KEY_OPTS_MAX,
nft_tunnel_opts_policy, NULL);
if (err < 0)
return err;
if (tb[NFTA_TUNNEL_KEY_OPTS_VXLAN]) {
err = nft_tunnel_obj_vxlan_init(tb[NFTA_TUNNEL_KEY_OPTS_VXLAN],
opts);
} else if (tb[NFTA_TUNNEL_KEY_OPTS_ERSPAN]) {
err = nft_tunnel_obj_erspan_init(tb[NFTA_TUNNEL_KEY_OPTS_ERSPAN],
opts);
} else {
return -EOPNOTSUPP;
nla_for_each_attr(nla, nla_data(attr), nla_len(attr), rem) {
switch (nla_type(nla)) {
case NFTA_TUNNEL_KEY_OPTS_VXLAN:
if (type)
return -EINVAL;
err = nft_tunnel_obj_vxlan_init(nla, opts);
if (err)
return err;
type = TUNNEL_VXLAN_OPT;
break;
case NFTA_TUNNEL_KEY_OPTS_ERSPAN:
if (type)
return -EINVAL;
err = nft_tunnel_obj_erspan_init(nla, opts);
if (err)
return err;
type = TUNNEL_ERSPAN_OPT;
break;
case NFTA_TUNNEL_KEY_OPTS_GENEVE:
if (type && type != TUNNEL_GENEVE_OPT)
return -EINVAL;
err = nft_tunnel_obj_geneve_init(nla, opts);
if (err)
return err;
type = TUNNEL_GENEVE_OPT;
break;
default:
return -EOPNOTSUPP;
}
}
return err;
@ -518,6 +585,25 @@ static int nft_tunnel_opts_dump(struct sk_buff *skb,
break;
}
nla_nest_end(skb, inner);
} else if (opts->flags & TUNNEL_GENEVE_OPT) {
struct geneve_opt *opt;
int offset = 0;
inner = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_OPTS_GENEVE);
if (!inner)
goto failure;
while (opts->len > offset) {
opt = (struct geneve_opt *)opts->u.data + offset;
if (nla_put_be16(skb, NFTA_TUNNEL_KEY_GENEVE_CLASS,
opt->opt_class) ||
nla_put_u8(skb, NFTA_TUNNEL_KEY_GENEVE_TYPE,
opt->type) ||
nla_put(skb, NFTA_TUNNEL_KEY_GENEVE_DATA,
opt->length * 4, opt->opt_data))
goto inner_failure;
offset += sizeof(*opt) + opt->length * 4;
}
nla_nest_end(skb, inner);
}
nla_nest_end(skb, nest);
return 0;

View file

@ -18,6 +18,7 @@
#include <linux/module.h>
#include <linux/timer.h>
#include <linux/alarmtimer.h>
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/netfilter.h>
@ -30,6 +31,7 @@
struct idletimer_tg {
struct list_head entry;
struct alarm alarm;
struct timer_list timer;
struct work_struct work;
@ -37,6 +39,7 @@ struct idletimer_tg {
struct device_attribute attr;
unsigned int refcnt;
u8 timer_type;
};
static LIST_HEAD(idletimer_tg_list);
@ -62,20 +65,29 @@ static ssize_t idletimer_tg_show(struct device *dev,
{
struct idletimer_tg *timer;
unsigned long expires = 0;
struct timespec64 ktimespec = {};
long time_diff = 0;
mutex_lock(&list_mutex);
timer = __idletimer_tg_find_by_label(attr->attr.name);
if (timer)
expires = timer->timer.expires;
if (timer) {
if (timer->timer_type & XT_IDLETIMER_ALARM) {
ktime_t expires_alarm = alarm_expires_remaining(&timer->alarm);
ktimespec = ktime_to_timespec64(expires_alarm);
time_diff = ktimespec.tv_sec;
} else {
expires = timer->timer.expires;
time_diff = jiffies_to_msecs(expires - jiffies) / 1000;
}
}
mutex_unlock(&list_mutex);
if (time_after(expires, jiffies))
return sprintf(buf, "%u\n",
jiffies_to_msecs(expires - jiffies) / 1000);
if (time_after(expires, jiffies) || ktimespec.tv_sec > 0)
return snprintf(buf, PAGE_SIZE, "%ld\n", time_diff);
return sprintf(buf, "0\n");
return snprintf(buf, PAGE_SIZE, "0\n");
}
static void idletimer_tg_work(struct work_struct *work)
@ -95,6 +107,16 @@ static void idletimer_tg_expired(struct timer_list *t)
schedule_work(&timer->work);
}
static enum alarmtimer_restart idletimer_tg_alarmproc(struct alarm *alarm,
ktime_t now)
{
struct idletimer_tg *timer = alarm->data;
pr_debug("alarm %s expired\n", timer->attr.attr.name);
schedule_work(&timer->work);
return ALARMTIMER_NORESTART;
}
static int idletimer_check_sysfs_name(const char *name, unsigned int size)
{
int ret;
@ -160,6 +182,68 @@ out:
return ret;
}
static int idletimer_tg_create_v1(struct idletimer_tg_info_v1 *info)
{
int ret;
info->timer = kmalloc(sizeof(*info->timer), GFP_KERNEL);
if (!info->timer) {
ret = -ENOMEM;
goto out;
}
ret = idletimer_check_sysfs_name(info->label, sizeof(info->label));
if (ret < 0)
goto out_free_timer;
sysfs_attr_init(&info->timer->attr.attr);
info->timer->attr.attr.name = kstrdup(info->label, GFP_KERNEL);
if (!info->timer->attr.attr.name) {
ret = -ENOMEM;
goto out_free_timer;
}
info->timer->attr.attr.mode = 0444;
info->timer->attr.show = idletimer_tg_show;
ret = sysfs_create_file(idletimer_tg_kobj, &info->timer->attr.attr);
if (ret < 0) {
pr_debug("couldn't add file to sysfs");
goto out_free_attr;
}
/* notify userspace */
kobject_uevent(idletimer_tg_kobj,KOBJ_ADD);
list_add(&info->timer->entry, &idletimer_tg_list);
pr_debug("timer type value is %u", info->timer_type);
info->timer->timer_type = info->timer_type;
info->timer->refcnt = 1;
INIT_WORK(&info->timer->work, idletimer_tg_work);
if (info->timer->timer_type & XT_IDLETIMER_ALARM) {
ktime_t tout;
alarm_init(&info->timer->alarm, ALARM_BOOTTIME,
idletimer_tg_alarmproc);
info->timer->alarm.data = info->timer;
tout = ktime_set(info->timeout, 0);
alarm_start_relative(&info->timer->alarm, tout);
} else {
timer_setup(&info->timer->timer, idletimer_tg_expired, 0);
mod_timer(&info->timer->timer,
msecs_to_jiffies(info->timeout * 1000) + jiffies);
}
return 0;
out_free_attr:
kfree(info->timer->attr.attr.name);
out_free_timer:
kfree(info->timer);
out:
return ret;
}
/*
* The actual xt_tables plugin.
*/
@ -177,13 +261,30 @@ static unsigned int idletimer_tg_target(struct sk_buff *skb,
return XT_CONTINUE;
}
static int idletimer_tg_checkentry(const struct xt_tgchk_param *par)
/*
* The actual xt_tables plugin.
*/
static unsigned int idletimer_tg_target_v1(struct sk_buff *skb,
const struct xt_action_param *par)
{
struct idletimer_tg_info *info = par->targinfo;
int ret;
const struct idletimer_tg_info_v1 *info = par->targinfo;
pr_debug("checkentry targinfo%s\n", info->label);
pr_debug("resetting timer %s, timeout period %u\n",
info->label, info->timeout);
if (info->timer->timer_type & XT_IDLETIMER_ALARM) {
ktime_t tout = ktime_set(info->timeout, 0);
alarm_start_relative(&info->timer->alarm, tout);
} else {
mod_timer(&info->timer->timer,
msecs_to_jiffies(info->timeout * 1000) + jiffies);
}
return XT_CONTINUE;
}
static int idletimer_tg_helper(struct idletimer_tg_info *info)
{
if (info->timeout == 0) {
pr_debug("timeout value is zero\n");
return -EINVAL;
@ -198,7 +299,23 @@ static int idletimer_tg_checkentry(const struct xt_tgchk_param *par)
pr_debug("label is empty or not nul-terminated\n");
return -EINVAL;
}
return 0;
}
static int idletimer_tg_checkentry(const struct xt_tgchk_param *par)
{
struct idletimer_tg_info *info = par->targinfo;
int ret;
pr_debug("checkentry targinfo%s\n", info->label);
ret = idletimer_tg_helper(info);
if(ret < 0)
{
pr_debug("checkentry helper return invalid\n");
return -EINVAL;
}
mutex_lock(&list_mutex);
info->timer = __idletimer_tg_find_by_label(info->label);
@ -222,6 +339,65 @@ static int idletimer_tg_checkentry(const struct xt_tgchk_param *par)
return 0;
}
static int idletimer_tg_checkentry_v1(const struct xt_tgchk_param *par)
{
struct idletimer_tg_info_v1 *info = par->targinfo;
int ret;
pr_debug("checkentry targinfo%s\n", info->label);
ret = idletimer_tg_helper((struct idletimer_tg_info *)info);
if(ret < 0)
{
pr_debug("checkentry helper return invalid\n");
return -EINVAL;
}
if (info->timer_type > XT_IDLETIMER_ALARM) {
pr_debug("invalid value for timer type\n");
return -EINVAL;
}
mutex_lock(&list_mutex);
info->timer = __idletimer_tg_find_by_label(info->label);
if (info->timer) {
if (info->timer->timer_type != info->timer_type) {
pr_debug("Adding/Replacing rule with same label and different timer type is not allowed\n");
mutex_unlock(&list_mutex);
return -EINVAL;
}
info->timer->refcnt++;
if (info->timer_type & XT_IDLETIMER_ALARM) {
/* calculate remaining expiry time */
ktime_t tout = alarm_expires_remaining(&info->timer->alarm);
struct timespec64 ktimespec = ktime_to_timespec64(tout);
if (ktimespec.tv_sec > 0) {
pr_debug("time_expiry_remaining %lld\n",
ktimespec.tv_sec);
alarm_start_relative(&info->timer->alarm, tout);
}
} else {
mod_timer(&info->timer->timer,
msecs_to_jiffies(info->timeout * 1000) + jiffies);
}
pr_debug("increased refcnt of timer %s to %u\n",
info->label, info->timer->refcnt);
} else {
ret = idletimer_tg_create_v1(info);
if (ret < 0) {
pr_debug("failed to create timer\n");
mutex_unlock(&list_mutex);
return ret;
}
}
mutex_unlock(&list_mutex);
return 0;
}
static void idletimer_tg_destroy(const struct xt_tgdtor_param *par)
{
const struct idletimer_tg_info *info = par->targinfo;
@ -247,7 +423,38 @@ static void idletimer_tg_destroy(const struct xt_tgdtor_param *par)
mutex_unlock(&list_mutex);
}
static struct xt_target idletimer_tg __read_mostly = {
static void idletimer_tg_destroy_v1(const struct xt_tgdtor_param *par)
{
const struct idletimer_tg_info_v1 *info = par->targinfo;
pr_debug("destroy targinfo %s\n", info->label);
mutex_lock(&list_mutex);
if (--info->timer->refcnt == 0) {
pr_debug("deleting timer %s\n", info->label);
list_del(&info->timer->entry);
if (info->timer->timer_type & XT_IDLETIMER_ALARM) {
alarm_cancel(&info->timer->alarm);
} else {
del_timer_sync(&info->timer->timer);
}
cancel_work_sync(&info->timer->work);
sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr);
kfree(info->timer->attr.attr.name);
kfree(info->timer);
} else {
pr_debug("decreased refcnt of timer %s to %u\n",
info->label, info->timer->refcnt);
}
mutex_unlock(&list_mutex);
}
static struct xt_target idletimer_tg[] __read_mostly = {
{
.name = "IDLETIMER",
.family = NFPROTO_UNSPEC,
.target = idletimer_tg_target,
@ -256,6 +463,20 @@ static struct xt_target idletimer_tg __read_mostly = {
.checkentry = idletimer_tg_checkentry,
.destroy = idletimer_tg_destroy,
.me = THIS_MODULE,
},
{
.name = "IDLETIMER",
.family = NFPROTO_UNSPEC,
.revision = 1,
.target = idletimer_tg_target_v1,
.targetsize = sizeof(struct idletimer_tg_info_v1),
.usersize = offsetof(struct idletimer_tg_info_v1, timer),
.checkentry = idletimer_tg_checkentry_v1,
.destroy = idletimer_tg_destroy_v1,
.me = THIS_MODULE,
},
};
static struct class *idletimer_tg_class;
@ -283,7 +504,8 @@ static int __init idletimer_tg_init(void)
idletimer_tg_kobj = &idletimer_tg_device->kobj;
err = xt_register_target(&idletimer_tg);
err = xt_register_targets(idletimer_tg, ARRAY_SIZE(idletimer_tg));
if (err < 0) {
pr_debug("couldn't register xt target\n");
goto out_dev;
@ -300,7 +522,7 @@ out:
static void __exit idletimer_tg_exit(void)
{
xt_unregister_target(&idletimer_tg);
xt_unregister_targets(idletimer_tg, ARRAY_SIZE(idletimer_tg));
device_destroy(idletimer_tg_class, MKDEV(0, 0));
class_destroy(idletimer_tg_class);

View file

@ -21,8 +21,6 @@ MODULE_DESCRIPTION("Xtables: packet security mark modification");
MODULE_ALIAS("ipt_SECMARK");
MODULE_ALIAS("ip6t_SECMARK");
#define PFX "SECMARK: "
static u8 mode;
static unsigned int

View file

@ -132,7 +132,7 @@ struct xt_hashlimit_htable {
const char *name;
struct net *net;
struct hlist_head hash[0]; /* hashtable itself */
struct hlist_head hash[]; /* hashtable itself */
};
static int

View file

@ -71,7 +71,7 @@ struct recent_entry {
u_int8_t ttl;
u_int8_t index;
u_int16_t nstamps;
unsigned long stamps[0];
unsigned long stamps[];
};
struct recent_table {
@ -82,7 +82,7 @@ struct recent_table {
unsigned int entries;
u8 nstamps_max_mask;
struct list_head lru_list;
struct list_head iphash[0];
struct list_head iphash[];
};
struct recent_net {