From 795aa6ef6a1aba99050735eadd0c2341b789b53b Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 10 Oct 2013 09:21:55 +0200 Subject: [PATCH 01/17] netfilter: pass hook ops to hookfn Pass the hook ops to the hookfn to allow for generic hook functions. This change is required by nf_tables. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 3 +- net/bridge/br_netfilter.c | 22 ++++++---- net/bridge/netfilter/ebtable_filter.c | 16 ++++--- net/bridge/netfilter/ebtable_nat.c | 16 ++++--- net/decnet/netfilter/dn_rtmsg.c | 2 +- net/ipv4/netfilter/arptable_filter.c | 5 ++- net/ipv4/netfilter/ipt_CLUSTERIP.c | 2 +- net/ipv4/netfilter/ipt_SYNPROXY.c | 2 +- net/ipv4/netfilter/iptable_filter.c | 7 ++-- net/ipv4/netfilter/iptable_mangle.c | 10 ++--- net/ipv4/netfilter/iptable_nat.c | 26 ++++++------ net/ipv4/netfilter/iptable_raw.c | 6 +-- net/ipv4/netfilter/iptable_security.c | 7 ++-- .../netfilter/nf_conntrack_l3proto_ipv4.c | 12 +++--- net/ipv4/netfilter/nf_defrag_ipv4.c | 6 ++- net/ipv6/netfilter/ip6t_SYNPROXY.c | 2 +- net/ipv6/netfilter/ip6table_filter.c | 5 ++- net/ipv6/netfilter/ip6table_mangle.c | 10 ++--- net/ipv6/netfilter/ip6table_nat.c | 27 ++++++------ net/ipv6/netfilter/ip6table_raw.c | 5 ++- net/ipv6/netfilter/ip6table_security.c | 5 ++- .../netfilter/nf_conntrack_l3proto_ipv6.c | 14 ++++--- net/ipv6/netfilter/nf_defrag_ipv6_hooks.c | 6 +-- net/netfilter/core.c | 2 +- net/netfilter/ipvs/ip_vs_core.c | 42 +++++++++---------- security/selinux/hooks.c | 10 ++--- 26 files changed, 148 insertions(+), 122 deletions(-) diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 61223c52414f..fef7e67f7101 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -42,7 +42,8 @@ int netfilter_init(void); struct sk_buff; -typedef unsigned int nf_hookfn(unsigned int hooknum, +struct nf_hook_ops; +typedef unsigned int nf_hookfn(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index f87736270eaa..878f008afefa 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -619,7 +619,7 @@ bad: /* Replicate the checks that IPv6 does on packet reception and pass the packet * to ip6tables, which doesn't support NAT, so things are fairly simple. */ -static unsigned int br_nf_pre_routing_ipv6(unsigned int hook, +static unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -669,7 +669,8 @@ static unsigned int br_nf_pre_routing_ipv6(unsigned int hook, * receiving device) to make netfilter happy, the REDIRECT * target in particular. Save the original destination IP * address to be able to detect DNAT afterwards. */ -static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb, +static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -691,7 +692,7 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb, return NF_ACCEPT; nf_bridge_pull_encap_header_rcsum(skb); - return br_nf_pre_routing_ipv6(hook, skb, in, out, okfn); + return br_nf_pre_routing_ipv6(ops, skb, in, out, okfn); } if (!brnf_call_iptables && !br->nf_call_iptables) @@ -727,7 +728,8 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb, * took place when the packet entered the bridge), but we * register an IPv4 PRE_ROUTING 'sabotage' hook that will * prevent this from happening. */ -static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff *skb, +static unsigned int br_nf_local_in(const struct nf_hook_ops *ops, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -765,7 +767,8 @@ static int br_nf_forward_finish(struct sk_buff *skb) * but we are still able to filter on the 'real' indev/outdev * because of the physdev module. For ARP, indev and outdev are the * bridge ports. */ -static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb, +static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -818,7 +821,8 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb, return NF_STOLEN; } -static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb, +static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -878,7 +882,8 @@ static int br_nf_dev_queue_xmit(struct sk_buff *skb) #endif /* PF_BRIDGE/POST_ROUTING ********************************************/ -static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb, +static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -923,7 +928,8 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb, /* IP/SABOTAGE *****************************************************/ /* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING * for the second time. */ -static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff *skb, +static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 94b2b700cff8..bb2da7b706e7 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -60,17 +60,21 @@ static const struct ebt_table frame_filter = }; static unsigned int -ebt_in_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, int (*okfn)(struct sk_buff *)) +ebt_in_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) { - return ebt_do_table(hook, skb, in, out, dev_net(in)->xt.frame_filter); + return ebt_do_table(ops->hooknum, skb, in, out, + dev_net(in)->xt.frame_filter); } static unsigned int -ebt_out_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, int (*okfn)(struct sk_buff *)) +ebt_out_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) { - return ebt_do_table(hook, skb, in, out, dev_net(out)->xt.frame_filter); + return ebt_do_table(ops->hooknum, skb, in, out, + dev_net(out)->xt.frame_filter); } static struct nf_hook_ops ebt_ops_filter[] __read_mostly = { diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 322555acdd40..bd238f1f105b 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -60,17 +60,21 @@ static struct ebt_table frame_nat = }; static unsigned int -ebt_nat_in(unsigned int hook, struct sk_buff *skb, const struct net_device *in - , const struct net_device *out, int (*okfn)(struct sk_buff *)) +ebt_nat_in(const struct nf_hook_ops *ops, struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) { - return ebt_do_table(hook, skb, in, out, dev_net(in)->xt.frame_nat); + return ebt_do_table(ops->hooknum, skb, in, out, + dev_net(in)->xt.frame_nat); } static unsigned int -ebt_nat_out(unsigned int hook, struct sk_buff *skb, const struct net_device *in - , const struct net_device *out, int (*okfn)(struct sk_buff *)) +ebt_nat_out(const struct nf_hook_ops *ops, struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) { - return ebt_do_table(hook, skb, in, out, dev_net(out)->xt.frame_nat); + return ebt_do_table(ops->hooknum, skb, in, out, + dev_net(out)->xt.frame_nat); } static struct nf_hook_ops ebt_ops_nat[] __read_mostly = { diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index 2a7efe388344..e83015cecfa7 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -87,7 +87,7 @@ static void dnrmg_send_peer(struct sk_buff *skb) } -static unsigned int dnrmg_hook(unsigned int hook, +static unsigned int dnrmg_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index a865f6f94013..802ddecb30b8 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -27,13 +27,14 @@ static const struct xt_table packet_filter = { /* The work comes in here from netfilter.c */ static unsigned int -arptable_filter_hook(unsigned int hook, struct sk_buff *skb, +arptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { const struct net *net = dev_net((in != NULL) ? in : out); - return arpt_do_table(skb, hook, in, out, net->ipv4.arptable_filter); + return arpt_do_table(skb, ops->hooknum, in, out, + net->ipv4.arptable_filter); } static struct nf_hook_ops *arpfilter_ops __read_mostly; diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 0b732efd32e2..a2e2b61cd7da 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -483,7 +483,7 @@ static void arp_print(struct arp_payload *payload) #endif static unsigned int -arp_mangle(unsigned int hook, +arp_mangle(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index b6346bf2fde3..01cffeaa0085 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -297,7 +297,7 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par) return XT_CONTINUE; } -static unsigned int ipv4_synproxy_hook(unsigned int hooknum, +static unsigned int ipv4_synproxy_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 50af5b45c050..e08a74a243a8 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -33,20 +33,21 @@ static const struct xt_table packet_filter = { }; static unsigned int -iptable_filter_hook(unsigned int hook, struct sk_buff *skb, +iptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { const struct net *net; - if (hook == NF_INET_LOCAL_OUT && + if (ops->hooknum == NF_INET_LOCAL_OUT && (skb->len < sizeof(struct iphdr) || ip_hdrlen(skb) < sizeof(struct iphdr))) /* root is playing with raw sockets. */ return NF_ACCEPT; net = dev_net((in != NULL) ? in : out); - return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_filter); + return ipt_do_table(skb, ops->hooknum, in, out, + net->ipv4.iptable_filter); } static struct nf_hook_ops *filter_ops __read_mostly; diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 0d8cd82e0fad..6a5079c34bb3 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -79,19 +79,19 @@ ipt_mangle_out(struct sk_buff *skb, const struct net_device *out) /* The work comes in here from netfilter.c. */ static unsigned int -iptable_mangle_hook(unsigned int hook, +iptable_mangle_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - if (hook == NF_INET_LOCAL_OUT) + if (ops->hooknum == NF_INET_LOCAL_OUT) return ipt_mangle_out(skb, out); - if (hook == NF_INET_POST_ROUTING) - return ipt_do_table(skb, hook, in, out, + if (ops->hooknum == NF_INET_POST_ROUTING) + return ipt_do_table(skb, ops->hooknum, in, out, dev_net(out)->ipv4.iptable_mangle); /* PREROUTING/INPUT/FORWARD: */ - return ipt_do_table(skb, hook, in, out, + return ipt_do_table(skb, ops->hooknum, in, out, dev_net(in)->ipv4.iptable_mangle); } diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index 683bfaffed65..ee2886126e3d 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -61,7 +61,7 @@ static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum, } static unsigned int -nf_nat_ipv4_fn(unsigned int hooknum, +nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -71,7 +71,7 @@ nf_nat_ipv4_fn(unsigned int hooknum, enum ip_conntrack_info ctinfo; struct nf_conn_nat *nat; /* maniptype == SRC for postrouting. */ - enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); + enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum); /* We never see fragments: conntrack defrags on pre-routing * and local-out, and nf_nat_out protects post-routing. @@ -108,7 +108,7 @@ nf_nat_ipv4_fn(unsigned int hooknum, case IP_CT_RELATED_REPLY: if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, - hooknum)) + ops->hooknum)) return NF_DROP; else return NF_ACCEPT; @@ -121,14 +121,14 @@ nf_nat_ipv4_fn(unsigned int hooknum, if (!nf_nat_initialized(ct, maniptype)) { unsigned int ret; - ret = nf_nat_rule_find(skb, hooknum, in, out, ct); + ret = nf_nat_rule_find(skb, ops->hooknum, in, out, ct); if (ret != NF_ACCEPT) return ret; } else { pr_debug("Already setup manip %s for ct %p\n", maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", ct); - if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) + if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out)) goto oif_changed; } break; @@ -137,11 +137,11 @@ nf_nat_ipv4_fn(unsigned int hooknum, /* ESTABLISHED */ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || ctinfo == IP_CT_ESTABLISHED_REPLY); - if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) + if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out)) goto oif_changed; } - return nf_nat_packet(ct, ctinfo, hooknum, skb); + return nf_nat_packet(ct, ctinfo, ops->hooknum, skb); oif_changed: nf_ct_kill_acct(ct, ctinfo, skb); @@ -149,7 +149,7 @@ oif_changed: } static unsigned int -nf_nat_ipv4_in(unsigned int hooknum, +nf_nat_ipv4_in(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -158,7 +158,7 @@ nf_nat_ipv4_in(unsigned int hooknum, unsigned int ret; __be32 daddr = ip_hdr(skb)->daddr; - ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn); + ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn); if (ret != NF_DROP && ret != NF_STOLEN && daddr != ip_hdr(skb)->daddr) skb_dst_drop(skb); @@ -167,7 +167,7 @@ nf_nat_ipv4_in(unsigned int hooknum, } static unsigned int -nf_nat_ipv4_out(unsigned int hooknum, +nf_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -185,7 +185,7 @@ nf_nat_ipv4_out(unsigned int hooknum, ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn); + ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn); #ifdef CONFIG_XFRM if (ret != NF_DROP && ret != NF_STOLEN && !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && @@ -207,7 +207,7 @@ nf_nat_ipv4_out(unsigned int hooknum, } static unsigned int -nf_nat_ipv4_local_fn(unsigned int hooknum, +nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -223,7 +223,7 @@ nf_nat_ipv4_local_fn(unsigned int hooknum, ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn); + ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn); if (ret != NF_DROP && ret != NF_STOLEN && (ct = nf_ct_get(skb, &ctinfo)) != NULL) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 1f82aea11df6..b2f7e8f98316 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -20,20 +20,20 @@ static const struct xt_table packet_raw = { /* The work comes in here from netfilter.c. */ static unsigned int -iptable_raw_hook(unsigned int hook, struct sk_buff *skb, +iptable_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { const struct net *net; - if (hook == NF_INET_LOCAL_OUT && + if (ops->hooknum == NF_INET_LOCAL_OUT && (skb->len < sizeof(struct iphdr) || ip_hdrlen(skb) < sizeof(struct iphdr))) /* root is playing with raw sockets. */ return NF_ACCEPT; net = dev_net((in != NULL) ? in : out); - return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_raw); + return ipt_do_table(skb, ops->hooknum, in, out, net->ipv4.iptable_raw); } static struct nf_hook_ops *rawtable_ops __read_mostly; diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index f867a8d38bf7..c86647ed2078 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -37,21 +37,22 @@ static const struct xt_table security_table = { }; static unsigned int -iptable_security_hook(unsigned int hook, struct sk_buff *skb, +iptable_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { const struct net *net; - if (hook == NF_INET_LOCAL_OUT && + if (ops->hooknum == NF_INET_LOCAL_OUT && (skb->len < sizeof(struct iphdr) || ip_hdrlen(skb) < sizeof(struct iphdr))) /* Somebody is playing with raw sockets. */ return NF_ACCEPT; net = dev_net((in != NULL) ? in : out); - return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_security); + return ipt_do_table(skb, ops->hooknum, in, out, + net->ipv4.iptable_security); } static struct nf_hook_ops *sectbl_ops __read_mostly; diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 86f5b34a4ed1..ecd8bec411c9 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -92,7 +92,7 @@ static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, return NF_ACCEPT; } -static unsigned int ipv4_helper(unsigned int hooknum, +static unsigned int ipv4_helper(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -121,7 +121,7 @@ static unsigned int ipv4_helper(unsigned int hooknum, ct, ctinfo); } -static unsigned int ipv4_confirm(unsigned int hooknum, +static unsigned int ipv4_confirm(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -147,16 +147,16 @@ out: return nf_conntrack_confirm(skb); } -static unsigned int ipv4_conntrack_in(unsigned int hooknum, +static unsigned int ipv4_conntrack_in(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return nf_conntrack_in(dev_net(in), PF_INET, hooknum, skb); + return nf_conntrack_in(dev_net(in), PF_INET, ops->hooknum, skb); } -static unsigned int ipv4_conntrack_local(unsigned int hooknum, +static unsigned int ipv4_conntrack_local(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -166,7 +166,7 @@ static unsigned int ipv4_conntrack_local(unsigned int hooknum, if (skb->len < sizeof(struct iphdr) || ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - return nf_conntrack_in(dev_net(out), PF_INET, hooknum, skb); + return nf_conntrack_in(dev_net(out), PF_INET, ops->hooknum, skb); } /* Connection tracking may drop packets, but never alters them, so diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index 742815518b0f..12e13bd82b5b 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -60,7 +60,7 @@ static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum, return IP_DEFRAG_CONNTRACK_OUT + zone; } -static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, +static unsigned int ipv4_conntrack_defrag(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -83,7 +83,9 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, #endif /* Gather fragments. */ if (ip_is_fragment(ip_hdr(skb))) { - enum ip_defrag_users user = nf_ct_defrag_user(hooknum, skb); + enum ip_defrag_users user = + nf_ct_defrag_user(ops->hooknum, skb); + if (nf_ct_ipv4_gather_frags(skb, user)) return NF_STOLEN; } diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index 2748b042da72..bf9f612c1bc2 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -312,7 +312,7 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par) return XT_CONTINUE; } -static unsigned int ipv6_synproxy_hook(unsigned int hooknum, +static unsigned int ipv6_synproxy_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 29b44b14c5ea..ca7f6c128086 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -32,13 +32,14 @@ static const struct xt_table packet_filter = { /* The work comes in here from netfilter.c. */ static unsigned int -ip6table_filter_hook(unsigned int hook, struct sk_buff *skb, +ip6table_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { const struct net *net = dev_net((in != NULL) ? in : out); - return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_filter); + return ip6t_do_table(skb, ops->hooknum, in, out, + net->ipv6.ip6table_filter); } static struct nf_hook_ops *filter_ops __read_mostly; diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index c705907ae6ab..307bbb782d14 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -76,17 +76,17 @@ ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out) /* The work comes in here from netfilter.c. */ static unsigned int -ip6table_mangle_hook(unsigned int hook, struct sk_buff *skb, +ip6table_mangle_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - if (hook == NF_INET_LOCAL_OUT) + if (ops->hooknum == NF_INET_LOCAL_OUT) return ip6t_mangle_out(skb, out); - if (hook == NF_INET_POST_ROUTING) - return ip6t_do_table(skb, hook, in, out, + if (ops->hooknum == NF_INET_POST_ROUTING) + return ip6t_do_table(skb, ops->hooknum, in, out, dev_net(out)->ipv6.ip6table_mangle); /* INPUT/FORWARD */ - return ip6t_do_table(skb, hook, in, out, + return ip6t_do_table(skb, ops->hooknum, in, out, dev_net(in)->ipv6.ip6table_mangle); } diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index 9b076d2d3a7b..84c7f33d0cf8 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -63,7 +63,7 @@ static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum, } static unsigned int -nf_nat_ipv6_fn(unsigned int hooknum, +nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -72,7 +72,7 @@ nf_nat_ipv6_fn(unsigned int hooknum, struct nf_conn *ct; enum ip_conntrack_info ctinfo; struct nf_conn_nat *nat; - enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); + enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum); __be16 frag_off; int hdrlen; u8 nexthdr; @@ -111,7 +111,8 @@ nf_nat_ipv6_fn(unsigned int hooknum, if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo, - hooknum, hdrlen)) + ops->hooknum, + hdrlen)) return NF_DROP; else return NF_ACCEPT; @@ -124,14 +125,14 @@ nf_nat_ipv6_fn(unsigned int hooknum, if (!nf_nat_initialized(ct, maniptype)) { unsigned int ret; - ret = nf_nat_rule_find(skb, hooknum, in, out, ct); + ret = nf_nat_rule_find(skb, ops->hooknum, in, out, ct); if (ret != NF_ACCEPT) return ret; } else { pr_debug("Already setup manip %s for ct %p\n", maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", ct); - if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) + if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out)) goto oif_changed; } break; @@ -140,11 +141,11 @@ nf_nat_ipv6_fn(unsigned int hooknum, /* ESTABLISHED */ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || ctinfo == IP_CT_ESTABLISHED_REPLY); - if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) + if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out)) goto oif_changed; } - return nf_nat_packet(ct, ctinfo, hooknum, skb); + return nf_nat_packet(ct, ctinfo, ops->hooknum, skb); oif_changed: nf_ct_kill_acct(ct, ctinfo, skb); @@ -152,7 +153,7 @@ oif_changed: } static unsigned int -nf_nat_ipv6_in(unsigned int hooknum, +nf_nat_ipv6_in(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -161,7 +162,7 @@ nf_nat_ipv6_in(unsigned int hooknum, unsigned int ret; struct in6_addr daddr = ipv6_hdr(skb)->daddr; - ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn); + ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn); if (ret != NF_DROP && ret != NF_STOLEN && ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr)) skb_dst_drop(skb); @@ -170,7 +171,7 @@ nf_nat_ipv6_in(unsigned int hooknum, } static unsigned int -nf_nat_ipv6_out(unsigned int hooknum, +nf_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -187,7 +188,7 @@ nf_nat_ipv6_out(unsigned int hooknum, if (skb->len < sizeof(struct ipv6hdr)) return NF_ACCEPT; - ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn); + ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn); #ifdef CONFIG_XFRM if (ret != NF_DROP && ret != NF_STOLEN && !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && @@ -209,7 +210,7 @@ nf_nat_ipv6_out(unsigned int hooknum, } static unsigned int -nf_nat_ipv6_local_fn(unsigned int hooknum, +nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -224,7 +225,7 @@ nf_nat_ipv6_local_fn(unsigned int hooknum, if (skb->len < sizeof(struct ipv6hdr)) return NF_ACCEPT; - ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn); + ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn); if (ret != NF_DROP && ret != NF_STOLEN && (ct = nf_ct_get(skb, &ctinfo)) != NULL) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 9a626d86720f..5274740acecc 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -19,13 +19,14 @@ static const struct xt_table packet_raw = { /* The work comes in here from netfilter.c. */ static unsigned int -ip6table_raw_hook(unsigned int hook, struct sk_buff *skb, +ip6table_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { const struct net *net = dev_net((in != NULL) ? in : out); - return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_raw); + return ip6t_do_table(skb, ops->hooknum, in, out, + net->ipv6.ip6table_raw); } static struct nf_hook_ops *rawtable_ops __read_mostly; diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index ce88d1d7e525..ab3b0219ecfa 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -36,14 +36,15 @@ static const struct xt_table security_table = { }; static unsigned int -ip6table_security_hook(unsigned int hook, struct sk_buff *skb, +ip6table_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { const struct net *net = dev_net((in != NULL) ? in : out); - return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_security); + return ip6t_do_table(skb, ops->hooknum, in, out, + net->ipv6.ip6table_security); } static struct nf_hook_ops *sectbl_ops __read_mostly; diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 54b75ead5a69..486545eb42ce 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -95,7 +95,7 @@ static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, return NF_ACCEPT; } -static unsigned int ipv6_helper(unsigned int hooknum, +static unsigned int ipv6_helper(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -133,7 +133,7 @@ static unsigned int ipv6_helper(unsigned int hooknum, return helper->help(skb, protoff, ct, ctinfo); } -static unsigned int ipv6_confirm(unsigned int hooknum, +static unsigned int ipv6_confirm(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -219,16 +219,17 @@ static unsigned int __ipv6_conntrack_in(struct net *net, return nf_conntrack_in(net, PF_INET6, hooknum, skb); } -static unsigned int ipv6_conntrack_in(unsigned int hooknum, +static unsigned int ipv6_conntrack_in(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return __ipv6_conntrack_in(dev_net(in), hooknum, skb, in, out, okfn); + return __ipv6_conntrack_in(dev_net(in), ops->hooknum, skb, in, out, + okfn); } -static unsigned int ipv6_conntrack_local(unsigned int hooknum, +static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -239,7 +240,8 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum, net_notice_ratelimited("ipv6_conntrack_local: packet too short\n"); return NF_ACCEPT; } - return __ipv6_conntrack_in(dev_net(out), hooknum, skb, in, out, okfn); + return __ipv6_conntrack_in(dev_net(out), ops->hooknum, skb, in, out, + okfn); } static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index aacd121fe8c5..ec483aa3f60f 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -52,7 +52,7 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, } -static unsigned int ipv6_defrag(unsigned int hooknum, +static unsigned int ipv6_defrag(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -66,7 +66,7 @@ static unsigned int ipv6_defrag(unsigned int hooknum, return NF_ACCEPT; #endif - reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(hooknum, skb)); + reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(ops->hooknum, skb)); /* queued */ if (reasm == NULL) return NF_STOLEN; @@ -75,7 +75,7 @@ static unsigned int ipv6_defrag(unsigned int hooknum, if (reasm == skb) return NF_ACCEPT; - nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in, + nf_ct_frag6_output(ops->hooknum, reasm, (struct net_device *)in, (struct net_device *)out, okfn); return NF_STOLEN; diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 593b16ea45e0..1fbab0cdd302 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -146,7 +146,7 @@ unsigned int nf_iterate(struct list_head *head, /* Optimization: we don't need to hold module reference here, since function can't sleep. --RR */ repeat: - verdict = (*elemp)->hook(hook, skb, indev, outdev, okfn); + verdict = (*elemp)->hook(*elemp, skb, indev, outdev, okfn); if (verdict != NF_ACCEPT) { #ifdef CONFIG_NETFILTER_DEBUG if (unlikely((verdict & NF_VERDICT_MASK) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 74fd00c27210..34fda62f40f6 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1239,11 +1239,11 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) * Check if packet is reply for established ip_vs_conn. */ static unsigned int -ip_vs_reply4(unsigned int hooknum, struct sk_buff *skb, +ip_vs_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip_vs_out(hooknum, skb, AF_INET); + return ip_vs_out(ops->hooknum, skb, AF_INET); } /* @@ -1251,11 +1251,11 @@ ip_vs_reply4(unsigned int hooknum, struct sk_buff *skb, * Check if packet is reply for established ip_vs_conn. */ static unsigned int -ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb, +ip_vs_local_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip_vs_out(hooknum, skb, AF_INET); + return ip_vs_out(ops->hooknum, skb, AF_INET); } #ifdef CONFIG_IP_VS_IPV6 @@ -1266,11 +1266,11 @@ ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb, * Check if packet is reply for established ip_vs_conn. */ static unsigned int -ip_vs_reply6(unsigned int hooknum, struct sk_buff *skb, +ip_vs_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip_vs_out(hooknum, skb, AF_INET6); + return ip_vs_out(ops->hooknum, skb, AF_INET6); } /* @@ -1278,11 +1278,11 @@ ip_vs_reply6(unsigned int hooknum, struct sk_buff *skb, * Check if packet is reply for established ip_vs_conn. */ static unsigned int -ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb, +ip_vs_local_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip_vs_out(hooknum, skb, AF_INET6); + return ip_vs_out(ops->hooknum, skb, AF_INET6); } #endif @@ -1733,12 +1733,12 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) * Schedule and forward packets from remote clients */ static unsigned int -ip_vs_remote_request4(unsigned int hooknum, struct sk_buff *skb, +ip_vs_remote_request4(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip_vs_in(hooknum, skb, AF_INET); + return ip_vs_in(ops->hooknum, skb, AF_INET); } /* @@ -1746,11 +1746,11 @@ ip_vs_remote_request4(unsigned int hooknum, struct sk_buff *skb, * Schedule and forward packets from local clients */ static unsigned int -ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb, +ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip_vs_in(hooknum, skb, AF_INET); + return ip_vs_in(ops->hooknum, skb, AF_INET); } #ifdef CONFIG_IP_VS_IPV6 @@ -1760,7 +1760,7 @@ ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb, * Copy info from first fragment, to the rest of them. */ static unsigned int -ip_vs_preroute_frag6(unsigned int hooknum, struct sk_buff *skb, +ip_vs_preroute_frag6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -1792,12 +1792,12 @@ ip_vs_preroute_frag6(unsigned int hooknum, struct sk_buff *skb, * Schedule and forward packets from remote clients */ static unsigned int -ip_vs_remote_request6(unsigned int hooknum, struct sk_buff *skb, +ip_vs_remote_request6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip_vs_in(hooknum, skb, AF_INET6); + return ip_vs_in(ops->hooknum, skb, AF_INET6); } /* @@ -1805,11 +1805,11 @@ ip_vs_remote_request6(unsigned int hooknum, struct sk_buff *skb, * Schedule and forward packets from local clients */ static unsigned int -ip_vs_local_request6(unsigned int hooknum, struct sk_buff *skb, +ip_vs_local_request6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip_vs_in(hooknum, skb, AF_INET6); + return ip_vs_in(ops->hooknum, skb, AF_INET6); } #endif @@ -1825,7 +1825,7 @@ ip_vs_local_request6(unsigned int hooknum, struct sk_buff *skb, * and send them to ip_vs_in_icmp. */ static unsigned int -ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb, +ip_vs_forward_icmp(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { @@ -1842,12 +1842,12 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb, if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) return NF_ACCEPT; - return ip_vs_in_icmp(skb, &r, hooknum); + return ip_vs_in_icmp(skb, &r, ops->hooknum); } #ifdef CONFIG_IP_VS_IPV6 static unsigned int -ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb, +ip_vs_forward_icmp_v6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { @@ -1866,7 +1866,7 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb, if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) return NF_ACCEPT; - return ip_vs_in_icmp_v6(skb, &r, hooknum, &iphdr); + return ip_vs_in_icmp_v6(skb, &r, ops->hooknum, &iphdr); } #endif diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 568c7699abf1..3f224d7795f5 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -4668,7 +4668,7 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb, int ifindex, return NF_ACCEPT; } -static unsigned int selinux_ipv4_forward(unsigned int hooknum, +static unsigned int selinux_ipv4_forward(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -4678,7 +4678,7 @@ static unsigned int selinux_ipv4_forward(unsigned int hooknum, } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -static unsigned int selinux_ipv6_forward(unsigned int hooknum, +static unsigned int selinux_ipv6_forward(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -4710,7 +4710,7 @@ static unsigned int selinux_ip_output(struct sk_buff *skb, return NF_ACCEPT; } -static unsigned int selinux_ipv4_output(unsigned int hooknum, +static unsigned int selinux_ipv4_output(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -4837,7 +4837,7 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, int ifindex, return NF_ACCEPT; } -static unsigned int selinux_ipv4_postroute(unsigned int hooknum, +static unsigned int selinux_ipv4_postroute(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -4847,7 +4847,7 @@ static unsigned int selinux_ipv4_postroute(unsigned int hooknum, } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -static unsigned int selinux_ipv6_postroute(unsigned int hooknum, +static unsigned int selinux_ipv6_postroute(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, From f59cb0453cd885736daa11ae2445982c5ab2fc83 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 14 Oct 2013 10:57:04 +0200 Subject: [PATCH 02/17] netfilter: nf_nat: move alloc_null_binding to nf_nat_core.c Similar to nat_decode_session, alloc_null_binding is needed for both ip_tables and nf_tables, so move it to nf_nat_core.c. This change is required by nf_tables. This is an adapted version of the original patch from Patrick McHardy. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_nat.h | 3 +++ net/netfilter/nf_nat_core.c | 20 ++++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index c29b4e545f87..07eaaf604092 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -45,6 +45,9 @@ unsigned int nf_nat_setup_info(struct nf_conn *ct, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype); +extern unsigned int nf_nat_alloc_null_binding(struct nf_conn *ct, + unsigned int hooknum); + /* Is this tuple already taken? (not by us)*/ int nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple, const struct nf_conn *ignored_conntrack); diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 6f0f4f7f68a5..63a815402211 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -432,6 +432,26 @@ nf_nat_setup_info(struct nf_conn *ct, } EXPORT_SYMBOL(nf_nat_setup_info); +unsigned int +nf_nat_alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) +{ + /* Force range to this IP; let proto decide mapping for + * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). + * Use reply in case it's already been mangled (eg local packet). + */ + union nf_inet_addr ip = + (HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ? + ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3 : + ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3); + struct nf_nat_range range = { + .flags = NF_NAT_RANGE_MAP_IPS, + .min_addr = ip, + .max_addr = ip, + }; + return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum)); +} +EXPORT_SYMBOL_GPL(nf_nat_alloc_null_binding); + /* Do packet manipulations according to nf_nat_setup_info. */ unsigned int nf_nat_packet(struct nf_conn *ct, enum ip_conntrack_info ctinfo, From 96518518cc417bb0a8c80b9fb736202e28acdf96 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 14 Oct 2013 11:00:02 +0200 Subject: [PATCH 03/17] netfilter: add nftables This patch adds nftables which is the intended successor of iptables. This packet filtering framework reuses the existing netfilter hooks, the connection tracking system, the NAT subsystem, the transparent proxying engine, the logging infrastructure and the userspace packet queueing facilities. In a nutshell, nftables provides a pseudo-state machine with 4 general purpose registers of 128 bits and 1 specific purpose register to store verdicts. This pseudo-machine comes with an extensible instruction set, a.k.a. "expressions" in the nftables jargon. The expressions included in this patch provide the basic functionality, they are: * bitwise: to perform bitwise operations. * byteorder: to change from host/network endianess. * cmp: to compare data with the content of the registers. * counter: to enable counters on rules. * ct: to store conntrack keys into register. * exthdr: to match IPv6 extension headers. * immediate: to load data into registers. * limit: to limit matching based on packet rate. * log: to log packets. * meta: to match metainformation that usually comes with the skbuff. * nat: to perform Network Address Translation. * payload: to fetch data from the packet payload and store it into registers. * reject (IPv4 only): to explicitly close connection, eg. TCP RST. Using this instruction-set, the userspace utility 'nft' can transform the rules expressed in human-readable text representation (using a new syntax, inspired by tcpdump) to nftables bytecode. nftables also inherits the table, chain and rule objects from iptables, but in a more configurable way, and it also includes the original datatype-agnostic set infrastructure with mapping support. This set infrastructure is enhanced in the follow up patch (netfilter: nf_tables: add netlink set API). This patch includes the following components: * the netlink API: net/netfilter/nf_tables_api.c and include/uapi/netfilter/nf_tables.h * the packet filter core: net/netfilter/nf_tables_core.c * the expressions (described above): net/netfilter/nft_*.c * the filter tables: arp, IPv4, IPv6 and bridge: net/ipv4/netfilter/nf_tables_ipv4.c net/ipv6/netfilter/nf_tables_ipv6.c net/ipv4/netfilter/nf_tables_arp.c net/bridge/netfilter/nf_tables_bridge.c * the NAT table (IPv4 only): net/ipv4/netfilter/nf_table_nat_ipv4.c * the route table (similar to mangle): net/ipv4/netfilter/nf_table_route_ipv4.c net/ipv6/netfilter/nf_table_route_ipv6.c * internal definitions under: include/net/netfilter/nf_tables.h include/net/netfilter/nf_tables_core.h * It also includes an skeleton expression: net/netfilter/nft_expr_template.c and the preliminary implementation of the meta target net/netfilter/nft_meta_target.c It also includes a change in struct nf_hook_ops to add a new pointer to store private data to the hook, that is used to store the rule list per chain. This patch is based on the patch from Patrick McHardy, plus merged accumulated cleanups, fixes and small enhancements to the nftables code that has been done since 2009, which are: From Patrick McHardy: * nf_tables: adjust netlink handler function signatures * nf_tables: only retry table lookup after successful table module load * nf_tables: fix event notification echo and avoid unnecessary messages * nft_ct: add l3proto support * nf_tables: pass expression context to nft_validate_data_load() * nf_tables: remove redundant definition * nft_ct: fix maxattr initialization * nf_tables: fix invalid event type in nf_tables_getrule() * nf_tables: simplify nft_data_init() usage * nf_tables: build in more core modules * nf_tables: fix double lookup expression unregistation * nf_tables: move expression initialization to nf_tables_core.c * nf_tables: build in payload module * nf_tables: use NFPROTO constants * nf_tables: rename pid variables to portid * nf_tables: save 48 bits per rule * nf_tables: introduce chain rename * nf_tables: check for duplicate names on chain rename * nf_tables: remove ability to specify handles for new rules * nf_tables: return error for rule change request * nf_tables: return error for NLM_F_REPLACE without rule handle * nf_tables: include NLM_F_APPEND/NLM_F_REPLACE flags in rule notification * nf_tables: fix NLM_F_MULTI usage in netlink notifications * nf_tables: include NLM_F_APPEND in rule dumps From Pablo Neira Ayuso: * nf_tables: fix stack overflow in nf_tables_newrule * nf_tables: nft_ct: fix compilation warning * nf_tables: nft_ct: fix crash with invalid packets * nft_log: group and qthreshold are 2^16 * nf_tables: nft_meta: fix socket uid,gid handling * nft_counter: allow to restore counters * nf_tables: fix module autoload * nf_tables: allow to remove all rules placed in one chain * nf_tables: use 64-bits rule handle instead of 16-bits * nf_tables: fix chain after rule deletion * nf_tables: improve deletion performance * nf_tables: add missing code in route chain type * nf_tables: rise maximum number of expressions from 12 to 128 * nf_tables: don't delete table if in use * nf_tables: fix basechain release From Tomasz Bursztyka: * nf_tables: Add support for changing users chain's name * nf_tables: Change chain's name to be fixed sized * nf_tables: Add support for replacing a rule by another one * nf_tables: Update uapi nftables netlink header documentation From Florian Westphal: * nft_log: group is u16, snaplen u32 From Phil Oester: * nf_tables: operational limit match Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 11 +- include/net/netfilter/nf_tables.h | 301 +++ include/net/netfilter/nf_tables_core.h | 25 + include/uapi/linux/netfilter/Kbuild | 1 + .../linux/netfilter/nf_conntrack_common.h | 4 + include/uapi/linux/netfilter/nf_tables.h | 582 ++++++ include/uapi/linux/netfilter/nfnetlink.h | 5 +- net/bridge/netfilter/Kconfig | 3 + net/bridge/netfilter/Makefile | 2 + net/bridge/netfilter/nf_tables_bridge.c | 37 + net/ipv4/netfilter/Kconfig | 16 + net/ipv4/netfilter/Makefile | 5 + net/ipv4/netfilter/nf_table_nat_ipv4.c | 409 ++++ net/ipv4/netfilter/nf_table_route_ipv4.c | 97 + net/ipv4/netfilter/nf_tables_ipv4.c | 59 + net/ipv4/netfilter/nft_reject_ipv4.c | 117 ++ net/ipv6/netfilter/Kconfig | 8 + net/ipv6/netfilter/Makefile | 4 + net/ipv6/netfilter/nf_table_route_ipv6.c | 93 + net/ipv6/netfilter/nf_tables_ipv6.c | 57 + net/netfilter/Kconfig | 37 + net/netfilter/Makefile | 16 + net/netfilter/nf_tables_api.c | 1760 +++++++++++++++++ net/netfilter/nf_tables_core.c | 152 ++ net/netfilter/nft_bitwise.c | 140 ++ net/netfilter/nft_byteorder.c | 167 ++ net/netfilter/nft_cmp.c | 146 ++ net/netfilter/nft_counter.c | 107 + net/netfilter/nft_ct.c | 252 +++ net/netfilter/nft_expr_template.c | 88 + net/netfilter/nft_exthdr.c | 127 ++ net/netfilter/nft_hash.c | 348 ++++ net/netfilter/nft_immediate.c | 113 ++ net/netfilter/nft_limit.c | 113 ++ net/netfilter/nft_log.c | 140 ++ net/netfilter/nft_meta.c | 222 +++ net/netfilter/nft_meta_target.c | 117 ++ net/netfilter/nft_payload.c | 137 ++ net/netfilter/nft_set.c | 381 ++++ 39 files changed, 6393 insertions(+), 6 deletions(-) create mode 100644 include/net/netfilter/nf_tables.h create mode 100644 include/net/netfilter/nf_tables_core.h create mode 100644 include/uapi/linux/netfilter/nf_tables.h create mode 100644 net/bridge/netfilter/nf_tables_bridge.c create mode 100644 net/ipv4/netfilter/nf_table_nat_ipv4.c create mode 100644 net/ipv4/netfilter/nf_table_route_ipv4.c create mode 100644 net/ipv4/netfilter/nf_tables_ipv4.c create mode 100644 net/ipv4/netfilter/nft_reject_ipv4.c create mode 100644 net/ipv6/netfilter/nf_table_route_ipv6.c create mode 100644 net/ipv6/netfilter/nf_tables_ipv6.c create mode 100644 net/netfilter/nf_tables_api.c create mode 100644 net/netfilter/nf_tables_core.c create mode 100644 net/netfilter/nft_bitwise.c create mode 100644 net/netfilter/nft_byteorder.c create mode 100644 net/netfilter/nft_cmp.c create mode 100644 net/netfilter/nft_counter.c create mode 100644 net/netfilter/nft_ct.c create mode 100644 net/netfilter/nft_expr_template.c create mode 100644 net/netfilter/nft_exthdr.c create mode 100644 net/netfilter/nft_hash.c create mode 100644 net/netfilter/nft_immediate.c create mode 100644 net/netfilter/nft_limit.c create mode 100644 net/netfilter/nft_log.c create mode 100644 net/netfilter/nft_meta.c create mode 100644 net/netfilter/nft_meta_target.c create mode 100644 net/netfilter/nft_payload.c create mode 100644 net/netfilter/nft_set.c diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index fef7e67f7101..2077489f9887 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -53,12 +53,13 @@ struct nf_hook_ops { struct list_head list; /* User fills in from here down. */ - nf_hookfn *hook; - struct module *owner; - u_int8_t pf; - unsigned int hooknum; + nf_hookfn *hook; + struct module *owner; + void *priv; + u_int8_t pf; + unsigned int hooknum; /* Hooks are ordered in ascending priority. */ - int priority; + int priority; }; struct nf_sockopt_ops { diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h new file mode 100644 index 000000000000..d26dfa345f49 --- /dev/null +++ b/include/net/netfilter/nf_tables.h @@ -0,0 +1,301 @@ +#ifndef _NET_NF_TABLES_H +#define _NET_NF_TABLES_H + +#include +#include +#include +#include + +struct nft_pktinfo { + struct sk_buff *skb; + const struct net_device *in; + const struct net_device *out; + u8 hooknum; + u8 nhoff; + u8 thoff; +}; + +struct nft_data { + union { + u32 data[4]; + struct { + u32 verdict; + struct nft_chain *chain; + }; + }; +} __attribute__((aligned(__alignof__(u64)))); + +static inline int nft_data_cmp(const struct nft_data *d1, + const struct nft_data *d2, + unsigned int len) +{ + return memcmp(d1->data, d2->data, len); +} + +static inline void nft_data_copy(struct nft_data *dst, + const struct nft_data *src) +{ + BUILD_BUG_ON(__alignof__(*dst) != __alignof__(u64)); + *(u64 *)&dst->data[0] = *(u64 *)&src->data[0]; + *(u64 *)&dst->data[2] = *(u64 *)&src->data[2]; +} + +static inline void nft_data_debug(const struct nft_data *data) +{ + pr_debug("data[0]=%x data[1]=%x data[2]=%x data[3]=%x\n", + data->data[0], data->data[1], + data->data[2], data->data[3]); +} + +/** + * struct nft_ctx - nf_tables rule context + * + * @afi: address family info + * @table: the table the chain is contained in + * @chain: the chain the rule is contained in + */ +struct nft_ctx { + const struct nft_af_info *afi; + const struct nft_table *table; + const struct nft_chain *chain; +}; + +enum nft_data_types { + NFT_DATA_VALUE, + NFT_DATA_VERDICT, +}; + +struct nft_data_desc { + enum nft_data_types type; + unsigned int len; +}; + +extern int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data, + struct nft_data_desc *desc, const struct nlattr *nla); +extern void nft_data_uninit(const struct nft_data *data, + enum nft_data_types type); +extern int nft_data_dump(struct sk_buff *skb, int attr, + const struct nft_data *data, + enum nft_data_types type, unsigned int len); + +static inline enum nft_data_types nft_dreg_to_type(enum nft_registers reg) +{ + return reg == NFT_REG_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE; +} + +extern int nft_validate_input_register(enum nft_registers reg); +extern int nft_validate_output_register(enum nft_registers reg); +extern int nft_validate_data_load(const struct nft_ctx *ctx, + enum nft_registers reg, + const struct nft_data *data, + enum nft_data_types type); + +/** + * struct nft_expr_ops - nf_tables expression operations + * + * @eval: Expression evaluation function + * @init: initialization function + * @destroy: destruction function + * @dump: function to dump parameters + * @list: used internally + * @name: Identifier + * @owner: module reference + * @policy: netlink attribute policy + * @maxattr: highest netlink attribute number + * @size: full expression size, including private data size + */ +struct nft_expr; +struct nft_expr_ops { + void (*eval)(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt); + int (*init)(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]); + void (*destroy)(const struct nft_expr *expr); + int (*dump)(struct sk_buff *skb, + const struct nft_expr *expr); + + struct list_head list; + const char *name; + struct module *owner; + const struct nla_policy *policy; + unsigned int maxattr; + unsigned int size; +}; + +#define NFT_EXPR_SIZE(size) (sizeof(struct nft_expr) + \ + ALIGN(size, __alignof__(struct nft_expr))) + +/** + * struct nft_expr - nf_tables expression + * + * @ops: expression ops + * @data: expression private data + */ +struct nft_expr { + const struct nft_expr_ops *ops; + unsigned char data[]; +}; + +static inline void *nft_expr_priv(const struct nft_expr *expr) +{ + return (void *)expr->data; +} + +/** + * struct nft_rule - nf_tables rule + * + * @list: used internally + * @rcu_head: used internally for rcu + * @handle: rule handle + * @dlen: length of expression data + * @data: expression data + */ +struct nft_rule { + struct list_head list; + struct rcu_head rcu_head; + u64 handle:48, + dlen:16; + unsigned char data[] + __attribute__((aligned(__alignof__(struct nft_expr)))); +}; + +static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule) +{ + return (struct nft_expr *)&rule->data[0]; +} + +static inline struct nft_expr *nft_expr_next(const struct nft_expr *expr) +{ + return ((void *)expr) + expr->ops->size; +} + +static inline struct nft_expr *nft_expr_last(const struct nft_rule *rule) +{ + return (struct nft_expr *)&rule->data[rule->dlen]; +} + +/* + * The last pointer isn't really necessary, but the compiler isn't able to + * determine that the result of nft_expr_last() is always the same since it + * can't assume that the dlen value wasn't changed within calls in the loop. + */ +#define nft_rule_for_each_expr(expr, last, rule) \ + for ((expr) = nft_expr_first(rule), (last) = nft_expr_last(rule); \ + (expr) != (last); \ + (expr) = nft_expr_next(expr)) + +enum nft_chain_flags { + NFT_BASE_CHAIN = 0x1, + NFT_CHAIN_BUILTIN = 0x2, +}; + +/** + * struct nft_chain - nf_tables chain + * + * @rules: list of rules in the chain + * @list: used internally + * @rcu_head: used internally + * @handle: chain handle + * @flags: bitmask of enum nft_chain_flags + * @use: number of jump references to this chain + * @level: length of longest path to this chain + * @name: name of the chain + */ +struct nft_chain { + struct list_head rules; + struct list_head list; + struct rcu_head rcu_head; + u64 handle; + u8 flags; + u16 use; + u16 level; + char name[NFT_CHAIN_MAXNAMELEN]; +}; + +/** + * struct nft_base_chain - nf_tables base chain + * + * @ops: netfilter hook ops + * @chain: the chain + */ +struct nft_base_chain { + struct nf_hook_ops ops; + struct nft_chain chain; +}; + +static inline struct nft_base_chain *nft_base_chain(const struct nft_chain *chain) +{ + return container_of(chain, struct nft_base_chain, chain); +} + +extern unsigned int nft_do_chain(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)); + +enum nft_table_flags { + NFT_TABLE_BUILTIN = 0x1, +}; + +/** + * struct nft_table - nf_tables table + * + * @list: used internally + * @chains: chains in the table + * @sets: sets in the table + * @hgenerator: handle generator state + * @use: number of chain references to this table + * @flags: table flag (see enum nft_table_flags) + * @name: name of the table + */ +struct nft_table { + struct list_head list; + struct list_head chains; + struct list_head sets; + u64 hgenerator; + u32 use; + u16 flags; + char name[]; +}; + +/** + * struct nft_af_info - nf_tables address family info + * + * @list: used internally + * @family: address family + * @nhooks: number of hooks in this family + * @owner: module owner + * @tables: used internally + * @hooks: hookfn overrides for packet validation + */ +struct nft_af_info { + struct list_head list; + int family; + unsigned int nhooks; + struct module *owner; + struct list_head tables; + nf_hookfn *hooks[NF_MAX_HOOKS]; +}; + +extern int nft_register_afinfo(struct nft_af_info *); +extern void nft_unregister_afinfo(struct nft_af_info *); + +extern int nft_register_table(struct nft_table *, int family); +extern void nft_unregister_table(struct nft_table *, int family); + +extern int nft_register_expr(struct nft_expr_ops *); +extern void nft_unregister_expr(struct nft_expr_ops *); + +#define MODULE_ALIAS_NFT_FAMILY(family) \ + MODULE_ALIAS("nft-afinfo-" __stringify(family)) + +#define MODULE_ALIAS_NFT_TABLE(family, name) \ + MODULE_ALIAS("nft-table-" __stringify(family) "-" name) + +#define MODULE_ALIAS_NFT_EXPR(name) \ + MODULE_ALIAS("nft-expr-" name) + +#endif /* _NET_NF_TABLES_H */ diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h new file mode 100644 index 000000000000..283396c916e0 --- /dev/null +++ b/include/net/netfilter/nf_tables_core.h @@ -0,0 +1,25 @@ +#ifndef _NET_NF_TABLES_CORE_H +#define _NET_NF_TABLES_CORE_H + +extern int nf_tables_core_module_init(void); +extern void nf_tables_core_module_exit(void); + +extern int nft_immediate_module_init(void); +extern void nft_immediate_module_exit(void); + +extern int nft_cmp_module_init(void); +extern void nft_cmp_module_exit(void); + +extern int nft_lookup_module_init(void); +extern void nft_lookup_module_exit(void); + +extern int nft_bitwise_module_init(void); +extern void nft_bitwise_module_exit(void); + +extern int nft_byteorder_module_init(void); +extern void nft_byteorder_module_exit(void); + +extern int nft_payload_module_init(void); +extern void nft_payload_module_exit(void); + +#endif /* _NET_NF_TABLES_CORE_H */ diff --git a/include/uapi/linux/netfilter/Kbuild b/include/uapi/linux/netfilter/Kbuild index 174915420d3f..6ce0b7f566a7 100644 --- a/include/uapi/linux/netfilter/Kbuild +++ b/include/uapi/linux/netfilter/Kbuild @@ -5,6 +5,7 @@ header-y += nf_conntrack_ftp.h header-y += nf_conntrack_sctp.h header-y += nf_conntrack_tcp.h header-y += nf_conntrack_tuple_common.h +header-y += nf_tables.h header-y += nf_nat.h header-y += nfnetlink.h header-y += nfnetlink_acct.h diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index 8dd803818ebe..319f47128db8 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -25,6 +25,10 @@ enum ip_conntrack_info { IP_CT_NUMBER = IP_CT_IS_REPLY * 2 - 1 }; +#define NF_CT_STATE_INVALID_BIT (1 << 0) +#define NF_CT_STATE_BIT(ctinfo) (1 << ((ctinfo) % IP_CT_IS_REPLY + 1)) +#define NF_CT_STATE_UNTRACKED_BIT (1 << (IP_CT_NUMBER + 1)) + /* Bitset representing status of connection. */ enum ip_conntrack_status { /* It's an expected connection: bit 0 set. This bit never changed */ diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h new file mode 100644 index 000000000000..ec6d84a8ed1e --- /dev/null +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -0,0 +1,582 @@ +#ifndef _LINUX_NF_TABLES_H +#define _LINUX_NF_TABLES_H + +#define NFT_CHAIN_MAXNAMELEN 32 + +enum nft_registers { + NFT_REG_VERDICT, + NFT_REG_1, + NFT_REG_2, + NFT_REG_3, + NFT_REG_4, + __NFT_REG_MAX +}; +#define NFT_REG_MAX (__NFT_REG_MAX - 1) + +/** + * enum nft_verdicts - nf_tables internal verdicts + * + * @NFT_CONTINUE: continue evaluation of the current rule + * @NFT_BREAK: terminate evaluation of the current rule + * @NFT_JUMP: push the current chain on the jump stack and jump to a chain + * @NFT_GOTO: jump to a chain without pushing the current chain on the jump stack + * @NFT_RETURN: return to the topmost chain on the jump stack + * + * The nf_tables verdicts share their numeric space with the netfilter verdicts. + */ +enum nft_verdicts { + NFT_CONTINUE = -1, + NFT_BREAK = -2, + NFT_JUMP = -3, + NFT_GOTO = -4, + NFT_RETURN = -5, +}; + +/** + * enum nf_tables_msg_types - nf_tables netlink message types + * + * @NFT_MSG_NEWTABLE: create a new table (enum nft_table_attributes) + * @NFT_MSG_GETTABLE: get a table (enum nft_table_attributes) + * @NFT_MSG_DELTABLE: delete a table (enum nft_table_attributes) + * @NFT_MSG_NEWCHAIN: create a new chain (enum nft_chain_attributes) + * @NFT_MSG_GETCHAIN: get a chain (enum nft_chain_attributes) + * @NFT_MSG_DELCHAIN: delete a chain (enum nft_chain_attributes) + * @NFT_MSG_NEWRULE: create a new rule (enum nft_rule_attributes) + * @NFT_MSG_GETRULE: get a rule (enum nft_rule_attributes) + * @NFT_MSG_DELRULE: delete a rule (enum nft_rule_attributes) + */ +enum nf_tables_msg_types { + NFT_MSG_NEWTABLE, + NFT_MSG_GETTABLE, + NFT_MSG_DELTABLE, + NFT_MSG_NEWCHAIN, + NFT_MSG_GETCHAIN, + NFT_MSG_DELCHAIN, + NFT_MSG_NEWRULE, + NFT_MSG_GETRULE, + NFT_MSG_DELRULE, + NFT_MSG_MAX, +}; + +enum nft_list_attributes { + NFTA_LIST_UNPEC, + NFTA_LIST_ELEM, + __NFTA_LIST_MAX +}; +#define NFTA_LIST_MAX (__NFTA_LIST_MAX - 1) + +/** + * enum nft_hook_attributes - nf_tables netfilter hook netlink attributes + * + * @NFTA_HOOK_HOOKNUM: netfilter hook number (NLA_U32) + * @NFTA_HOOK_PRIORITY: netfilter hook priority (NLA_U32) + */ +enum nft_hook_attributes { + NFTA_HOOK_UNSPEC, + NFTA_HOOK_HOOKNUM, + NFTA_HOOK_PRIORITY, + __NFTA_HOOK_MAX +}; +#define NFTA_HOOK_MAX (__NFTA_HOOK_MAX - 1) + +/** + * enum nft_table_attributes - nf_tables table netlink attributes + * + * @NFTA_TABLE_NAME: name of the table (NLA_STRING) + */ +enum nft_table_attributes { + NFTA_TABLE_UNSPEC, + NFTA_TABLE_NAME, + __NFTA_TABLE_MAX +}; +#define NFTA_TABLE_MAX (__NFTA_TABLE_MAX - 1) + +/** + * enum nft_chain_attributes - nf_tables chain netlink attributes + * + * @NFTA_CHAIN_TABLE: name of the table containing the chain (NLA_STRING) + * @NFTA_CHAIN_HANDLE: numeric handle of the chain (NLA_U64) + * @NFTA_CHAIN_NAME: name of the chain (NLA_STRING) + * @NFTA_CHAIN_HOOK: hook specification for basechains (NLA_NESTED: nft_hook_attributes) + */ +enum nft_chain_attributes { + NFTA_CHAIN_UNSPEC, + NFTA_CHAIN_TABLE, + NFTA_CHAIN_HANDLE, + NFTA_CHAIN_NAME, + NFTA_CHAIN_HOOK, + __NFTA_CHAIN_MAX +}; +#define NFTA_CHAIN_MAX (__NFTA_CHAIN_MAX - 1) + +/** + * enum nft_rule_attributes - nf_tables rule netlink attributes + * + * @NFTA_RULE_TABLE: name of the table containing the rule (NLA_STRING) + * @NFTA_RULE_CHAIN: name of the chain containing the rule (NLA_STRING) + * @NFTA_RULE_HANDLE: numeric handle of the rule (NLA_U64) + * @NFTA_RULE_EXPRESSIONS: list of expressions (NLA_NESTED: nft_expr_attributes) + */ +enum nft_rule_attributes { + NFTA_RULE_UNSPEC, + NFTA_RULE_TABLE, + NFTA_RULE_CHAIN, + NFTA_RULE_HANDLE, + NFTA_RULE_EXPRESSIONS, + __NFTA_RULE_MAX +}; +#define NFTA_RULE_MAX (__NFTA_RULE_MAX - 1) + +enum nft_data_attributes { + NFTA_DATA_UNSPEC, + NFTA_DATA_VALUE, + NFTA_DATA_VERDICT, + __NFTA_DATA_MAX +}; +#define NFTA_DATA_MAX (__NFTA_DATA_MAX - 1) + +/** + * enum nft_verdict_attributes - nf_tables verdict netlink attributes + * + * @NFTA_VERDICT_CODE: nf_tables verdict (NLA_U32: enum nft_verdicts) + * @NFTA_VERDICT_CHAIN: jump target chain name (NLA_STRING) + */ +enum nft_verdict_attributes { + NFTA_VERDICT_UNSPEC, + NFTA_VERDICT_CODE, + NFTA_VERDICT_CHAIN, + __NFTA_VERDICT_MAX +}; +#define NFTA_VERDICT_MAX (__NFTA_VERDICT_MAX - 1) + +/** + * enum nft_expr_attributes - nf_tables expression netlink attributes + * + * @NFTA_EXPR_NAME: name of the expression type (NLA_STRING) + * @NFTA_EXPR_DATA: type specific data (NLA_NESTED) + */ +enum nft_expr_attributes { + NFTA_EXPR_UNSPEC, + NFTA_EXPR_NAME, + NFTA_EXPR_DATA, + __NFTA_EXPR_MAX +}; +#define NFTA_EXPR_MAX (__NFTA_EXPR_MAX - 1) + +/** + * enum nft_immediate_attributes - nf_tables immediate expression netlink attributes + * + * @NFTA_IMMEDIATE_DREG: destination register to load data into (NLA_U32) + * @NFTA_IMMEDIATE_DATA: data to load (NLA_NESTED: nft_data_attributes) + */ +enum nft_immediate_attributes { + NFTA_IMMEDIATE_UNSPEC, + NFTA_IMMEDIATE_DREG, + NFTA_IMMEDIATE_DATA, + __NFTA_IMMEDIATE_MAX +}; +#define NFTA_IMMEDIATE_MAX (__NFTA_IMMEDIATE_MAX - 1) + +/** + * enum nft_bitwise_attributes - nf_tables bitwise expression netlink attributes + * + * @NFTA_BITWISE_SREG: source register (NLA_U32: nft_registers) + * @NFTA_BITWISE_DREG: destination register (NLA_U32: nft_registers) + * @NFTA_BITWISE_LEN: length of operands (NLA_U32) + * @NFTA_BITWISE_MASK: mask value (NLA_NESTED: nft_data_attributes) + * @NFTA_BITWISE_XOR: xor value (NLA_NESTED: nft_data_attributes) + * + * The bitwise expression performs the following operation: + * + * dreg = (sreg & mask) ^ xor + * + * which allow to express all bitwise operations: + * + * mask xor + * NOT: 1 1 + * OR: 0 x + * XOR: 1 x + * AND: x 0 + */ +enum nft_bitwise_attributes { + NFTA_BITWISE_UNSPEC, + NFTA_BITWISE_SREG, + NFTA_BITWISE_DREG, + NFTA_BITWISE_LEN, + NFTA_BITWISE_MASK, + NFTA_BITWISE_XOR, + __NFTA_BITWISE_MAX +}; +#define NFTA_BITWISE_MAX (__NFTA_BITWISE_MAX - 1) + +/** + * enum nft_byteorder_ops - nf_tables byteorder operators + * + * @NFT_BYTEORDER_NTOH: network to host operator + * @NFT_BYTEORDER_HTON: host to network opertaor + */ +enum nft_byteorder_ops { + NFT_BYTEORDER_NTOH, + NFT_BYTEORDER_HTON, +}; + +/** + * enum nft_byteorder_attributes - nf_tables byteorder expression netlink attributes + * + * @NFTA_BYTEORDER_SREG: source register (NLA_U32: nft_registers) + * @NFTA_BYTEORDER_DREG: destination register (NLA_U32: nft_registers) + * @NFTA_BYTEORDER_OP: operator (NLA_U32: enum nft_byteorder_ops) + * @NFTA_BYTEORDER_LEN: length of the data (NLA_U32) + * @NFTA_BYTEORDER_SIZE: data size in bytes (NLA_U32: 2 or 4) + */ +enum nft_byteorder_attributes { + NFTA_BYTEORDER_UNSPEC, + NFTA_BYTEORDER_SREG, + NFTA_BYTEORDER_DREG, + NFTA_BYTEORDER_OP, + NFTA_BYTEORDER_LEN, + NFTA_BYTEORDER_SIZE, + __NFTA_BYTEORDER_MAX +}; +#define NFTA_BYTEORDER_MAX (__NFTA_BYTEORDER_MAX - 1) + +/** + * enum nft_cmp_ops - nf_tables relational operator + * + * @NFT_CMP_EQ: equal + * @NFT_CMP_NEQ: not equal + * @NFT_CMP_LT: less than + * @NFT_CMP_LTE: less than or equal to + * @NFT_CMP_GT: greater than + * @NFT_CMP_GTE: greater than or equal to + */ +enum nft_cmp_ops { + NFT_CMP_EQ, + NFT_CMP_NEQ, + NFT_CMP_LT, + NFT_CMP_LTE, + NFT_CMP_GT, + NFT_CMP_GTE, +}; + +/** + * enum nft_cmp_attributes - nf_tables cmp expression netlink attributes + * + * @NFTA_CMP_SREG: source register of data to compare (NLA_U32: nft_registers) + * @NFTA_CMP_OP: cmp operation (NLA_U32: nft_cmp_ops) + * @NFTA_CMP_DATA: data to compare against (NLA_NESTED: nft_data_attributes) + */ +enum nft_cmp_attributes { + NFTA_CMP_UNSPEC, + NFTA_CMP_SREG, + NFTA_CMP_OP, + NFTA_CMP_DATA, + __NFTA_CMP_MAX +}; +#define NFTA_CMP_MAX (__NFTA_CMP_MAX - 1) + +enum nft_set_elem_flags { + NFT_SE_INTERVAL_END = 0x1, +}; + +enum nft_set_elem_attributes { + NFTA_SE_UNSPEC, + NFTA_SE_KEY, + NFTA_SE_DATA, + NFTA_SE_FLAGS, + __NFTA_SE_MAX +}; +#define NFTA_SE_MAX (__NFTA_SE_MAX - 1) + +enum nft_set_flags { + NFT_SET_INTERVAL = 0x1, + NFT_SET_MAP = 0x2, +}; + +enum nft_set_attributes { + NFTA_SET_UNSPEC, + NFTA_SET_FLAGS, + NFTA_SET_SREG, + NFTA_SET_DREG, + NFTA_SET_KLEN, + NFTA_SET_DLEN, + NFTA_SET_ELEMENTS, + __NFTA_SET_MAX +}; +#define NFTA_SET_MAX (__NFTA_SET_MAX - 1) + +enum nft_hash_flags { + NFT_HASH_MAP = 0x1, +}; + +enum nft_hash_elem_attributes { + NFTA_HE_UNSPEC, + NFTA_HE_KEY, + NFTA_HE_DATA, + __NFTA_HE_MAX +}; +#define NFTA_HE_MAX (__NFTA_HE_MAX - 1) + +enum nft_hash_attributes { + NFTA_HASH_UNSPEC, + NFTA_HASH_FLAGS, + NFTA_HASH_SREG, + NFTA_HASH_DREG, + NFTA_HASH_KLEN, + NFTA_HASH_ELEMENTS, + __NFTA_HASH_MAX +}; +#define NFTA_HASH_MAX (__NFTA_HASH_MAX - 1) + +/** + * enum nft_payload_bases - nf_tables payload expression offset bases + * + * @NFT_PAYLOAD_LL_HEADER: link layer header + * @NFT_PAYLOAD_NETWORK_HEADER: network header + * @NFT_PAYLOAD_TRANSPORT_HEADER: transport header + */ +enum nft_payload_bases { + NFT_PAYLOAD_LL_HEADER, + NFT_PAYLOAD_NETWORK_HEADER, + NFT_PAYLOAD_TRANSPORT_HEADER, +}; + +/** + * enum nft_payload_attributes - nf_tables payload expression netlink attributes + * + * @NFTA_PAYLOAD_DREG: destination register to load data into (NLA_U32: nft_registers) + * @NFTA_PAYLOAD_BASE: payload base (NLA_U32: nft_payload_bases) + * @NFTA_PAYLOAD_OFFSET: payload offset relative to base (NLA_U32) + * @NFTA_PAYLOAD_LEN: payload length (NLA_U32) + */ +enum nft_payload_attributes { + NFTA_PAYLOAD_UNSPEC, + NFTA_PAYLOAD_DREG, + NFTA_PAYLOAD_BASE, + NFTA_PAYLOAD_OFFSET, + NFTA_PAYLOAD_LEN, + __NFTA_PAYLOAD_MAX +}; +#define NFTA_PAYLOAD_MAX (__NFTA_PAYLOAD_MAX - 1) + +/** + * enum nft_exthdr_attributes - nf_tables IPv6 extension header expression netlink attributes + * + * @NFTA_EXTHDR_DREG: destination register (NLA_U32: nft_registers) + * @NFTA_EXTHDR_TYPE: extension header type (NLA_U8) + * @NFTA_EXTHDR_OFFSET: extension header offset (NLA_U32) + * @NFTA_EXTHDR_LEN: extension header length (NLA_U32) + */ +enum nft_exthdr_attributes { + NFTA_EXTHDR_UNSPEC, + NFTA_EXTHDR_DREG, + NFTA_EXTHDR_TYPE, + NFTA_EXTHDR_OFFSET, + NFTA_EXTHDR_LEN, + __NFTA_EXTHDR_MAX +}; +#define NFTA_EXTHDR_MAX (__NFTA_EXTHDR_MAX - 1) + +/** + * enum nft_meta_keys - nf_tables meta expression keys + * + * @NFT_META_LEN: packet length (skb->len) + * @NFT_META_PROTOCOL: packet ethertype protocol (skb->protocol), invalid in OUTPUT + * @NFT_META_PRIORITY: packet priority (skb->priority) + * @NFT_META_MARK: packet mark (skb->mark) + * @NFT_META_IIF: packet input interface index (dev->ifindex) + * @NFT_META_OIF: packet output interface index (dev->ifindex) + * @NFT_META_IIFNAME: packet input interface name (dev->name) + * @NFT_META_OIFNAME: packet output interface name (dev->name) + * @NFT_META_IIFTYPE: packet input interface type (dev->type) + * @NFT_META_OIFTYPE: packet output interface type (dev->type) + * @NFT_META_SKUID: originating socket UID (fsuid) + * @NFT_META_SKGID: originating socket GID (fsgid) + * @NFT_META_NFTRACE: packet nftrace bit + * @NFT_META_RTCLASSID: realm value of packet's route (skb->dst->tclassid) + * @NFT_META_SECMARK: packet secmark (skb->secmark) + */ +enum nft_meta_keys { + NFT_META_LEN, + NFT_META_PROTOCOL, + NFT_META_PRIORITY, + NFT_META_MARK, + NFT_META_IIF, + NFT_META_OIF, + NFT_META_IIFNAME, + NFT_META_OIFNAME, + NFT_META_IIFTYPE, + NFT_META_OIFTYPE, + NFT_META_SKUID, + NFT_META_SKGID, + NFT_META_NFTRACE, + NFT_META_RTCLASSID, + NFT_META_SECMARK, +}; + +/** + * enum nft_meta_attributes - nf_tables meta expression netlink attributes + * + * @NFTA_META_DREG: destination register (NLA_U32) + * @NFTA_META_KEY: meta data item to load (NLA_U32: nft_meta_keys) + */ +enum nft_meta_attributes { + NFTA_META_UNSPEC, + NFTA_META_DREG, + NFTA_META_KEY, + __NFTA_META_MAX +}; +#define NFTA_META_MAX (__NFTA_META_MAX - 1) + +/** + * enum nft_ct_keys - nf_tables ct expression keys + * + * @NFT_CT_STATE: conntrack state (bitmask of enum ip_conntrack_info) + * @NFT_CT_DIRECTION: conntrack direction (enum ip_conntrack_dir) + * @NFT_CT_STATUS: conntrack status (bitmask of enum ip_conntrack_status) + * @NFT_CT_MARK: conntrack mark value + * @NFT_CT_SECMARK: conntrack secmark value + * @NFT_CT_EXPIRATION: relative conntrack expiration time in ms + * @NFT_CT_HELPER: connection tracking helper assigned to conntrack + * @NFT_CT_L3PROTOCOL: conntrack layer 3 protocol + * @NFT_CT_SRC: conntrack layer 3 protocol source (IPv4/IPv6 address) + * @NFT_CT_DST: conntrack layer 3 protocol destination (IPv4/IPv6 address) + * @NFT_CT_PROTOCOL: conntrack layer 4 protocol + * @NFT_CT_PROTO_SRC: conntrack layer 4 protocol source + * @NFT_CT_PROTO_DST: conntrack layer 4 protocol destination + */ +enum nft_ct_keys { + NFT_CT_STATE, + NFT_CT_DIRECTION, + NFT_CT_STATUS, + NFT_CT_MARK, + NFT_CT_SECMARK, + NFT_CT_EXPIRATION, + NFT_CT_HELPER, + NFT_CT_L3PROTOCOL, + NFT_CT_SRC, + NFT_CT_DST, + NFT_CT_PROTOCOL, + NFT_CT_PROTO_SRC, + NFT_CT_PROTO_DST, +}; + +/** + * enum nft_ct_attributes - nf_tables ct expression netlink attributes + * + * @NFTA_CT_DREG: destination register (NLA_U32) + * @NFTA_CT_KEY: conntrack data item to load (NLA_U32: nft_ct_keys) + * @NFTA_CT_DIRECTION: direction in case of directional keys (NLA_U8) + */ +enum nft_ct_attributes { + NFTA_CT_UNSPEC, + NFTA_CT_DREG, + NFTA_CT_KEY, + NFTA_CT_DIRECTION, + __NFTA_CT_MAX +}; +#define NFTA_CT_MAX (__NFTA_CT_MAX - 1) + +/** + * enum nft_limit_attributes - nf_tables limit expression netlink attributes + * + * @NFTA_LIMIT_RATE: refill rate (NLA_U64) + * @NFTA_LIMIT_UNIT: refill unit (NLA_U64) + */ +enum nft_limit_attributes { + NFTA_LIMIT_UNSPEC, + NFTA_LIMIT_RATE, + NFTA_LIMIT_UNIT, + __NFTA_LIMIT_MAX +}; +#define NFTA_LIMIT_MAX (__NFTA_LIMIT_MAX - 1) + +/** + * enum nft_counter_attributes - nf_tables counter expression netlink attributes + * + * @NFTA_COUNTER_BYTES: number of bytes (NLA_U64) + * @NFTA_COUNTER_PACKETS: number of packets (NLA_U64) + */ +enum nft_counter_attributes { + NFTA_COUNTER_UNSPEC, + NFTA_COUNTER_BYTES, + NFTA_COUNTER_PACKETS, + __NFTA_COUNTER_MAX +}; +#define NFTA_COUNTER_MAX (__NFTA_COUNTER_MAX - 1) + +/** + * enum nft_log_attributes - nf_tables log expression netlink attributes + * + * @NFTA_LOG_GROUP: netlink group to send messages to (NLA_U32) + * @NFTA_LOG_PREFIX: prefix to prepend to log messages (NLA_STRING) + * @NFTA_LOG_SNAPLEN: length of payload to include in netlink message (NLA_U32) + * @NFTA_LOG_QTHRESHOLD: queue threshold (NLA_U32) + */ +enum nft_log_attributes { + NFTA_LOG_UNSPEC, + NFTA_LOG_GROUP, + NFTA_LOG_PREFIX, + NFTA_LOG_SNAPLEN, + NFTA_LOG_QTHRESHOLD, + __NFTA_LOG_MAX +}; +#define NFTA_LOG_MAX (__NFTA_LOG_MAX - 1) + +/** + * enum nft_reject_types - nf_tables reject expression reject types + * + * @NFT_REJECT_ICMP_UNREACH: reject using ICMP unreachable + * @NFT_REJECT_TCP_RST: reject using TCP RST + */ +enum nft_reject_types { + NFT_REJECT_ICMP_UNREACH, + NFT_REJECT_TCP_RST, +}; + +/** + * enum nft_reject_attributes - nf_tables reject expression netlink attributes + * + * @NFTA_REJECT_TYPE: packet type to use (NLA_U32: nft_reject_types) + * @NFTA_REJECT_ICMP_CODE: ICMP code to use (NLA_U8) + */ +enum nft_reject_attributes { + NFTA_REJECT_UNSPEC, + NFTA_REJECT_TYPE, + NFTA_REJECT_ICMP_CODE, + __NFTA_REJECT_MAX +}; +#define NFTA_REJECT_MAX (__NFTA_REJECT_MAX - 1) + +/** + * enum nft_nat_types - nf_tables nat expression NAT types + * + * @NFT_NAT_SNAT: source NAT + * @NFT_NAT_DNAT: destination NAT + */ +enum nft_nat_types { + NFT_NAT_SNAT, + NFT_NAT_DNAT, +}; + +/** + * enum nft_nat_attributes - nf_tables nat expression netlink attributes + * + * @NFTA_NAT_TYPE: NAT type (NLA_U32: nft_nat_types) + * @NFTA_NAT_ADDR_MIN: source register of address range start (NLA_U32: nft_registers) + * @NFTA_NAT_ADDR_MAX: source register of address range end (NLA_U32: nft_registers) + * @NFTA_NAT_PROTO_MIN: source register of proto range start (NLA_U32: nft_registers) + * @NFTA_NAT_PROTO_MAX: source register of proto range end (NLA_U32: nft_registers) + */ +enum nft_nat_attributes { + NFTA_NAT_UNSPEC, + NFTA_NAT_TYPE, + NFTA_NAT_ADDR_MIN, + NFTA_NAT_ADDR_MAX, + NFTA_NAT_PROTO_MIN, + NFTA_NAT_PROTO_MAX, + __NFTA_NAT_MAX +}; +#define NFTA_NAT_MAX (__NFTA_NAT_MAX - 1) + +#endif /* _LINUX_NF_TABLES_H */ diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h index 4a4efafad5f4..d276c3bd55b8 100644 --- a/include/uapi/linux/netfilter/nfnetlink.h +++ b/include/uapi/linux/netfilter/nfnetlink.h @@ -18,6 +18,8 @@ enum nfnetlink_groups { #define NFNLGRP_CONNTRACK_EXP_UPDATE NFNLGRP_CONNTRACK_EXP_UPDATE NFNLGRP_CONNTRACK_EXP_DESTROY, #define NFNLGRP_CONNTRACK_EXP_DESTROY NFNLGRP_CONNTRACK_EXP_DESTROY + NFNLGRP_NFTABLES, +#define NFNLGRP_NFTABLES NFNLGRP_NFTABLES __NFNLGRP_MAX, }; #define NFNLGRP_MAX (__NFNLGRP_MAX - 1) @@ -51,6 +53,7 @@ struct nfgenmsg { #define NFNL_SUBSYS_ACCT 7 #define NFNL_SUBSYS_CTNETLINK_TIMEOUT 8 #define NFNL_SUBSYS_CTHELPER 9 -#define NFNL_SUBSYS_COUNT 10 +#define NFNL_SUBSYS_NFTABLES 10 +#define NFNL_SUBSYS_COUNT 11 #endif /* _UAPI_NFNETLINK_H */ diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig index a9aff9c7d027..68f8128147be 100644 --- a/net/bridge/netfilter/Kconfig +++ b/net/bridge/netfilter/Kconfig @@ -1,6 +1,9 @@ # # Bridge netfilter configuration # +# +config NF_TABLES_BRIDGE + tristate "Ethernet Bridge nf_tables support" menuconfig BRIDGE_NF_EBTABLES tristate "Ethernet Bridge tables (ebtables) support" diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile index 0718699540b0..ea7629f58b3d 100644 --- a/net/bridge/netfilter/Makefile +++ b/net/bridge/netfilter/Makefile @@ -2,6 +2,8 @@ # Makefile for the netfilter modules for Link Layer filtering on a bridge. # +obj-$(CONFIG_NF_TABLES_BRIDGE) += nf_tables_bridge.o + obj-$(CONFIG_BRIDGE_NF_EBTABLES) += ebtables.o # tables diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c new file mode 100644 index 000000000000..bc5c21c911c0 --- /dev/null +++ b/net/bridge/netfilter/nf_tables_bridge.c @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include + +static struct nft_af_info nft_af_bridge __read_mostly = { + .family = NFPROTO_BRIDGE, + .nhooks = NF_BR_NUMHOOKS, + .owner = THIS_MODULE, +}; + +static int __init nf_tables_bridge_init(void) +{ + return nft_register_afinfo(&nft_af_bridge); +} + +static void __exit nf_tables_bridge_exit(void) +{ + nft_unregister_afinfo(&nft_af_bridge); +} + +module_init(nf_tables_bridge_init); +module_exit(nf_tables_bridge_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_FAMILY(AF_BRIDGE); diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 1657e39b291f..eb1d56ece361 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -36,6 +36,22 @@ config NF_CONNTRACK_PROC_COMPAT If unsure, say Y. +config NF_TABLES_IPV4 + depends on NF_TABLES + tristate "IPv4 nf_tables support" + +config NFT_REJECT_IPV4 + depends on NF_TABLES_IPV4 + tristate "nf_tables IPv4 reject support" + +config NF_TABLE_ROUTE_IPV4 + depends on NF_TABLES_IPV4 + tristate "IPv4 nf_tables route table support" + +config NF_TABLE_NAT_IPV4 + depends on NF_TABLES_IPV4 + tristate "IPv4 nf_tables nat table support" + config IP_NF_IPTABLES tristate "IP tables support (required for filtering/masq/NAT)" default m if NETFILTER_ADVANCED=n diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 3622b248b6dd..b2f01cd2cd65 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -27,6 +27,11 @@ obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o # NAT protocols (nf_nat) obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o +obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o +obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o +obj-$(CONFIG_NF_TABLE_ROUTE_IPV4) += nf_table_route_ipv4.o +obj-$(CONFIG_NF_TABLE_NAT_IPV4) += nf_table_nat_ipv4.o + # generic IP tables obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o diff --git a/net/ipv4/netfilter/nf_table_nat_ipv4.c b/net/ipv4/netfilter/nf_table_nat_ipv4.c new file mode 100644 index 000000000000..2a6f184c10bd --- /dev/null +++ b/net/ipv4/netfilter/nf_table_nat_ipv4.c @@ -0,0 +1,409 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_nat { + enum nft_registers sreg_addr_min:8; + enum nft_registers sreg_addr_max:8; + enum nft_registers sreg_proto_min:8; + enum nft_registers sreg_proto_max:8; + enum nf_nat_manip_type type; +}; + +static void nft_nat_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_nat *priv = nft_expr_priv(expr); + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(pkt->skb, &ctinfo); + struct nf_nat_range range; + + memset(&range, 0, sizeof(range)); + if (priv->sreg_addr_min) { + range.min_addr.ip = data[priv->sreg_addr_min].data[0]; + range.max_addr.ip = data[priv->sreg_addr_max].data[0]; + range.flags |= NF_NAT_RANGE_MAP_IPS; + } + + if (priv->sreg_proto_min) { + range.min_proto.all = data[priv->sreg_proto_min].data[0]; + range.max_proto.all = data[priv->sreg_proto_max].data[0]; + range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; + } + + data[NFT_REG_VERDICT].verdict = + nf_nat_setup_info(ct, &range, priv->type); +} + +static const struct nla_policy nft_nat_policy[NFTA_NAT_MAX + 1] = { + [NFTA_NAT_ADDR_MIN] = { .type = NLA_U32 }, + [NFTA_NAT_ADDR_MAX] = { .type = NLA_U32 }, + [NFTA_NAT_PROTO_MIN] = { .type = NLA_U32 }, + [NFTA_NAT_PROTO_MAX] = { .type = NLA_U32 }, + [NFTA_NAT_TYPE] = { .type = NLA_U32 }, +}; + +static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_nat *priv = nft_expr_priv(expr); + int err; + + if (tb[NFTA_NAT_TYPE] == NULL) + return -EINVAL; + + switch (ntohl(nla_get_be32(tb[NFTA_NAT_TYPE]))) { + case NFT_NAT_SNAT: + priv->type = NF_NAT_MANIP_SRC; + break; + case NFT_NAT_DNAT: + priv->type = NF_NAT_MANIP_DST; + break; + default: + return -EINVAL; + } + + if (tb[NFTA_NAT_ADDR_MIN]) { + priv->sreg_addr_min = ntohl(nla_get_be32(tb[NFTA_NAT_ADDR_MIN])); + err = nft_validate_input_register(priv->sreg_addr_min); + if (err < 0) + return err; + } + + if (tb[NFTA_NAT_ADDR_MAX]) { + priv->sreg_addr_max = ntohl(nla_get_be32(tb[NFTA_NAT_ADDR_MAX])); + err = nft_validate_input_register(priv->sreg_addr_max); + if (err < 0) + return err; + } else + priv->sreg_addr_max = priv->sreg_addr_min; + + if (tb[NFTA_NAT_PROTO_MIN]) { + priv->sreg_proto_min = ntohl(nla_get_be32(tb[NFTA_NAT_PROTO_MIN])); + err = nft_validate_input_register(priv->sreg_proto_min); + if (err < 0) + return err; + } + + if (tb[NFTA_NAT_PROTO_MAX]) { + priv->sreg_proto_max = ntohl(nla_get_be32(tb[NFTA_NAT_PROTO_MAX])); + err = nft_validate_input_register(priv->sreg_proto_max); + if (err < 0) + return err; + } else + priv->sreg_proto_max = priv->sreg_proto_min; + + return 0; +} + +static int nft_nat_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_nat *priv = nft_expr_priv(expr); + + switch (priv->type) { + case NF_NAT_MANIP_SRC: + if (nla_put_be32(skb, NFTA_NAT_TYPE, htonl(NFT_NAT_SNAT))) + goto nla_put_failure; + break; + case NF_NAT_MANIP_DST: + if (nla_put_be32(skb, NFTA_NAT_TYPE, htonl(NFT_NAT_DNAT))) + goto nla_put_failure; + break; + } + + if (nla_put_be32(skb, NFTA_NAT_ADDR_MIN, htonl(priv->sreg_addr_min))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_NAT_ADDR_MAX, htonl(priv->sreg_addr_max))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_NAT_PROTO_MIN, htonl(priv->sreg_proto_min))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_NAT_PROTO_MAX, htonl(priv->sreg_proto_max))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_nat_ops __read_mostly = { + .name = "nat", + .size = NFT_EXPR_SIZE(sizeof(struct nft_nat)), + .owner = THIS_MODULE, + .eval = nft_nat_eval, + .init = nft_nat_init, + .dump = nft_nat_dump, + .policy = nft_nat_policy, + .maxattr = NFTA_NAT_MAX, +}; + +/* + * NAT table + */ + +static unsigned int nf_nat_fn(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + struct nf_conn_nat *nat; + enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum); + unsigned int ret; + + if (ct == NULL || nf_ct_is_untracked(ct)) + return NF_ACCEPT; + + NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))); + + nat = nfct_nat(ct); + if (nat == NULL) { + /* Conntrack module was loaded late, can't add extension. */ + if (nf_ct_is_confirmed(ct)) + return NF_ACCEPT; + nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); + if (nat == NULL) + return NF_ACCEPT; + } + + switch (ctinfo) { + case IP_CT_RELATED: + case IP_CT_RELATED + IP_CT_IS_REPLY: + if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { + if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, + ops->hooknum)) + return NF_DROP; + else + return NF_ACCEPT; + } + /* Fall through */ + case IP_CT_NEW: + if (nf_nat_initialized(ct, maniptype)) + break; + + ret = nft_do_chain(ops, skb, in, out, okfn); + if (ret != NF_ACCEPT) + return ret; + if (!nf_nat_initialized(ct, maniptype)) { + ret = nf_nat_alloc_null_binding(ct, ops->hooknum); + if (ret != NF_ACCEPT) + return ret; + } + default: + break; + } + + return nf_nat_packet(ct, ctinfo, ops->hooknum, skb); +} + +static unsigned int nf_nat_prerouting(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + __be32 daddr = ip_hdr(skb)->daddr; + unsigned int ret; + + ret = nf_nat_fn(ops, skb, in, out, okfn); + if (ret != NF_DROP && ret != NF_STOLEN && + ip_hdr(skb)->daddr != daddr) { + skb_dst_drop(skb); + } + return ret; +} + +static unsigned int nf_nat_postrouting(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + enum ip_conntrack_info ctinfo __maybe_unused; + const struct nf_conn *ct __maybe_unused; + unsigned int ret; + + ret = nf_nat_fn(ops, skb, in, out, okfn); +#ifdef CONFIG_XFRM + if (ret != NF_DROP && ret != NF_STOLEN && + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (ct->tuplehash[dir].tuple.src.u3.ip != + ct->tuplehash[!dir].tuple.dst.u3.ip || + ct->tuplehash[dir].tuple.src.u.all != + ct->tuplehash[!dir].tuple.dst.u.all) + return nf_xfrm_me_harder(skb, AF_INET) == 0 ? + ret : NF_DROP; + } +#endif + return ret; +} + +static unsigned int nf_nat_output(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + enum ip_conntrack_info ctinfo; + const struct nf_conn *ct; + unsigned int ret; + + ret = nf_nat_fn(ops, skb, in, out, okfn); + if (ret != NF_DROP && ret != NF_STOLEN && + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (ct->tuplehash[dir].tuple.dst.u3.ip != + ct->tuplehash[!dir].tuple.src.u3.ip) { + if (ip_route_me_harder(skb, RTN_UNSPEC)) + ret = NF_DROP; + } +#ifdef CONFIG_XFRM + else if (ct->tuplehash[dir].tuple.dst.u.all != + ct->tuplehash[!dir].tuple.src.u.all) + if (nf_xfrm_me_harder(skb, AF_INET)) + ret = NF_DROP; +#endif + } + return ret; +} + +static struct nft_base_chain nf_chain_nat_prerouting __read_mostly = { + .chain = { + .name = "PREROUTING", + .rules = LIST_HEAD_INIT(nf_chain_nat_prerouting.chain.rules), + .flags = NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN, + }, + .ops = { + .hook = nf_nat_prerouting, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_PRE_ROUTING, + .priority = NF_IP_PRI_NAT_DST, + .priv = &nf_chain_nat_prerouting.chain, + }, +}; + +static struct nft_base_chain nf_chain_nat_postrouting __read_mostly = { + .chain = { + .name = "POSTROUTING", + .rules = LIST_HEAD_INIT(nf_chain_nat_postrouting.chain.rules), + .flags = NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN, + }, + .ops = { + .hook = nf_nat_postrouting, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_POST_ROUTING, + .priority = NF_IP_PRI_NAT_SRC, + .priv = &nf_chain_nat_postrouting.chain, + }, +}; + +static struct nft_base_chain nf_chain_nat_output __read_mostly = { + .chain = { + .name = "OUTPUT", + .rules = LIST_HEAD_INIT(nf_chain_nat_output.chain.rules), + .flags = NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN, + }, + .ops = { + .hook = nf_nat_output, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_OUT, + .priority = NF_IP_PRI_NAT_DST, + .priv = &nf_chain_nat_output.chain, + }, +}; + +static struct nft_base_chain nf_chain_nat_input __read_mostly = { + .chain = { + .name = "INPUT", + .rules = LIST_HEAD_INIT(nf_chain_nat_input.chain.rules), + .flags = NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN, + }, + .ops = { + .hook = nf_nat_fn, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_IN, + .priority = NF_IP_PRI_NAT_SRC, + .priv = &nf_chain_nat_input.chain, + }, +}; + + +static struct nft_table nf_table_nat_ipv4 __read_mostly = { + .name = "nat", + .chains = LIST_HEAD_INIT(nf_table_nat_ipv4.chains), +}; + +static int __init nf_table_nat_init(void) +{ + int err; + + list_add_tail(&nf_chain_nat_prerouting.chain.list, + &nf_table_nat_ipv4.chains); + list_add_tail(&nf_chain_nat_postrouting.chain.list, + &nf_table_nat_ipv4.chains); + list_add_tail(&nf_chain_nat_output.chain.list, + &nf_table_nat_ipv4.chains); + list_add_tail(&nf_chain_nat_input.chain.list, + &nf_table_nat_ipv4.chains); + + err = nft_register_table(&nf_table_nat_ipv4, NFPROTO_IPV4); + if (err < 0) + goto err1; + + err = nft_register_expr(&nft_nat_ops); + if (err < 0) + goto err2; + + return 0; + +err2: + nft_unregister_table(&nf_table_nat_ipv4, NFPROTO_IPV4); +err1: + return err; +} + +static void __exit nf_table_nat_exit(void) +{ + nft_unregister_expr(&nft_nat_ops); + nft_unregister_table(&nf_table_nat_ipv4, AF_INET); +} + +module_init(nf_table_nat_init); +module_exit(nf_table_nat_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_TABLE(AF_INET, "nat"); +MODULE_ALIAS_NFT_EXPR("nat"); diff --git a/net/ipv4/netfilter/nf_table_route_ipv4.c b/net/ipv4/netfilter/nf_table_route_ipv4.c new file mode 100644 index 000000000000..4f257a1ed661 --- /dev/null +++ b/net/ipv4/netfilter/nf_table_route_ipv4.c @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + unsigned int ret; + u32 mark; + __be32 saddr, daddr; + u_int8_t tos; + const struct iphdr *iph; + + /* root is playing with raw sockets. */ + if (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr)) + return NF_ACCEPT; + + mark = skb->mark; + iph = ip_hdr(skb); + saddr = iph->saddr; + daddr = iph->daddr; + tos = iph->tos; + + ret = nft_do_chain(ops, skb, in, out, okfn); + if (ret != NF_DROP && ret != NF_QUEUE) { + iph = ip_hdr(skb); + + if (iph->saddr != saddr || + iph->daddr != daddr || + skb->mark != mark || + iph->tos != tos) + if (ip_route_me_harder(skb, RTN_UNSPEC)) + ret = NF_DROP; + } + return ret; +} + +static struct nft_base_chain nf_chain_route_output __read_mostly = { + .chain = { + .name = "OUTPUT", + .rules = LIST_HEAD_INIT(nf_chain_route_output.chain.rules), + .flags = NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN, + }, + .ops = { + .hook = nf_route_table_hook, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_OUT, + .priority = NF_IP_PRI_MANGLE, + .priv = &nf_chain_route_output.chain, + }, +}; + +static struct nft_table nf_table_route_ipv4 __read_mostly = { + .name = "route", + .chains = LIST_HEAD_INIT(nf_table_route_ipv4.chains), +}; + +static int __init nf_table_route_init(void) +{ + list_add_tail(&nf_chain_route_output.chain.list, + &nf_table_route_ipv4.chains); + return nft_register_table(&nf_table_route_ipv4, NFPROTO_IPV4); +} + +static void __exit nf_table_route_exit(void) +{ + nft_unregister_table(&nf_table_route_ipv4, NFPROTO_IPV4); +} + +module_init(nf_table_route_init); +module_exit(nf_table_route_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_TABLE(AF_INET, "route"); diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c new file mode 100644 index 000000000000..63d0a3bf53d3 --- /dev/null +++ b/net/ipv4/netfilter/nf_tables_ipv4.c @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include + +static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + if (unlikely(skb->len < sizeof(struct iphdr) || + ip_hdr(skb)->ihl < sizeof(struct iphdr) / 4)) { + if (net_ratelimit()) + pr_info("nf_tables_ipv4: ignoring short SOCK_RAW " + "packet\n"); + return NF_ACCEPT; + } + + return nft_do_chain(ops, skb, in, out, okfn); +} + +static struct nft_af_info nft_af_ipv4 __read_mostly = { + .family = NFPROTO_IPV4, + .nhooks = NF_INET_NUMHOOKS, + .owner = THIS_MODULE, + .hooks = { + [NF_INET_LOCAL_OUT] = nft_ipv4_output, + }, +}; + +static int __init nf_tables_ipv4_init(void) +{ + return nft_register_afinfo(&nft_af_ipv4); +} + +static void __exit nf_tables_ipv4_exit(void) +{ + nft_unregister_afinfo(&nft_af_ipv4); +} + +module_init(nf_tables_ipv4_init); +module_exit(nf_tables_ipv4_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_FAMILY(AF_INET); diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c new file mode 100644 index 000000000000..b4ee8d3bb1e4 --- /dev/null +++ b/net/ipv4/netfilter/nft_reject_ipv4.c @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_reject { + enum nft_reject_types type:8; + u8 icmp_code; +}; + +static void nft_reject_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + struct nft_reject *priv = nft_expr_priv(expr); + + switch (priv->type) { + case NFT_REJECT_ICMP_UNREACH: + icmp_send(pkt->skb, ICMP_DEST_UNREACH, priv->icmp_code, 0); + break; + case NFT_REJECT_TCP_RST: + break; + } + + data[NFT_REG_VERDICT].verdict = NF_DROP; +} + +static const struct nla_policy nft_reject_policy[NFTA_REJECT_MAX + 1] = { + [NFTA_REJECT_TYPE] = { .type = NLA_U32 }, + [NFTA_REJECT_ICMP_CODE] = { .type = NLA_U8 }, +}; + +static int nft_reject_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_reject *priv = nft_expr_priv(expr); + + if (tb[NFTA_REJECT_TYPE] == NULL) + return -EINVAL; + + priv->type = ntohl(nla_get_be32(tb[NFTA_REJECT_TYPE])); + switch (priv->type) { + case NFT_REJECT_ICMP_UNREACH: + if (tb[NFTA_REJECT_ICMP_CODE] == NULL) + return -EINVAL; + priv->icmp_code = nla_get_u8(tb[NFTA_REJECT_ICMP_CODE]); + case NFT_REJECT_TCP_RST: + break; + default: + return -EINVAL; + } + + return 0; +} + +static int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_reject *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_REJECT_TYPE, priv->type)) + goto nla_put_failure; + + switch (priv->type) { + case NFT_REJECT_ICMP_UNREACH: + if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code)) + goto nla_put_failure; + break; + } + + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops reject_ops __read_mostly = { + .name = "reject", + .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)), + .owner = THIS_MODULE, + .eval = nft_reject_eval, + .init = nft_reject_init, + .dump = nft_reject_dump, + .policy = nft_reject_policy, + .maxattr = NFTA_REJECT_MAX, +}; + +static int __init nft_reject_module_init(void) +{ + return nft_register_expr(&reject_ops); +} + +static void __exit nft_reject_module_exit(void) +{ + nft_unregister_expr(&reject_ops); +} + +module_init(nft_reject_module_init); +module_exit(nft_reject_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_EXPR("reject"); diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index a7f842b29b67..5677e38eeca3 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -25,6 +25,14 @@ config NF_CONNTRACK_IPV6 To compile it as a module, choose M here. If unsure, say N. +config NF_TABLES_IPV6 + depends on NF_TABLES + tristate "IPv6 nf_tables support" + +config NF_TABLE_ROUTE_IPV6 + depends on NF_TABLES_IPV6 + tristate "IPv6 nf_tables route table support" + config IP6_NF_IPTABLES tristate "IP6 tables support (required for filtering)" depends on INET && IPV6 diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 2b53738f798c..956af4492d10 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -23,6 +23,10 @@ obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o +# nf_tables +obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o +obj-$(CONFIG_NF_TABLE_ROUTE_IPV6) += nf_table_route_ipv6.o + # matches obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o diff --git a/net/ipv6/netfilter/nf_table_route_ipv6.c b/net/ipv6/netfilter/nf_table_route_ipv6.c new file mode 100644 index 000000000000..48ac65c7b398 --- /dev/null +++ b/net/ipv6/netfilter/nf_table_route_ipv6.c @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + unsigned int ret; + struct in6_addr saddr, daddr; + u_int8_t hop_limit; + u32 mark, flowlabel; + + /* save source/dest address, mark, hoplimit, flowlabel, priority */ + memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr)); + memcpy(&daddr, &ipv6_hdr(skb)->daddr, sizeof(daddr)); + mark = skb->mark; + hop_limit = ipv6_hdr(skb)->hop_limit; + + /* flowlabel and prio (includes version, which shouldn't change either */ + flowlabel = *((u32 *)ipv6_hdr(skb)); + + ret = nft_do_chain(ops, skb, in, out, okfn); + if (ret != NF_DROP && ret != NF_QUEUE && + (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) || + memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) || + skb->mark != mark || + ipv6_hdr(skb)->hop_limit != hop_limit || + flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) + return ip6_route_me_harder(skb) == 0 ? ret : NF_DROP; + + return ret; +} + +static struct nft_base_chain nf_chain_route_output __read_mostly = { + .chain = { + .name = "OUTPUT", + .rules = LIST_HEAD_INIT(nf_chain_route_output.chain.rules), + .flags = NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN, + }, + .ops = { + .hook = nf_route_table_hook, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_LOCAL_OUT, + .priority = NF_IP6_PRI_MANGLE, + .priv = &nf_chain_route_output.chain, + }, +}; + +static struct nft_table nf_table_route_ipv6 __read_mostly = { + .name = "route", + .chains = LIST_HEAD_INIT(nf_table_route_ipv6.chains), +}; + +static int __init nf_table_route_init(void) +{ + list_add_tail(&nf_chain_route_output.chain.list, + &nf_table_route_ipv6.chains); + return nft_register_table(&nf_table_route_ipv6, NFPROTO_IPV6); +} + +static void __exit nf_table_route_exit(void) +{ + nft_unregister_table(&nf_table_route_ipv6, NFPROTO_IPV6); +} + +module_init(nf_table_route_init); +module_exit(nf_table_route_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_TABLE(AF_INET6, "route"); diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c new file mode 100644 index 000000000000..e0717cea4913 --- /dev/null +++ b/net/ipv6/netfilter/nf_tables_ipv6.c @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include + +static unsigned int nft_ipv6_output(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + if (unlikely(skb->len < sizeof(struct ipv6hdr))) { + if (net_ratelimit()) + pr_info("nf_tables_ipv6: ignoring short SOCK_RAW " + "packet\n"); + return NF_ACCEPT; + } + + return nft_do_chain(ops, skb, in, out, okfn); +} + +static struct nft_af_info nft_af_ipv6 __read_mostly = { + .family = NFPROTO_IPV6, + .nhooks = NF_INET_NUMHOOKS, + .owner = THIS_MODULE, + .hooks = { + [NF_INET_LOCAL_OUT] = nft_ipv6_output, + }, +}; + +static int __init nf_tables_ipv6_init(void) +{ + return nft_register_afinfo(&nft_af_ipv6); +} + +static void __exit nf_tables_ipv6_exit(void) +{ + nft_unregister_afinfo(&nft_af_ipv6); +} + +module_init(nf_tables_ipv6_init); +module_exit(nf_tables_ipv6_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_FAMILY(AF_INET6); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 6e839b6dff2b..c271e1af93b5 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -413,6 +413,43 @@ config NETFILTER_SYNPROXY endif # NF_CONNTRACK +config NF_TABLES + depends on NETFILTER_NETLINK + tristate "Netfilter nf_tables support" + +config NFT_EXTHDR + depends on NF_TABLES + tristate "Netfilter nf_tables IPv6 exthdr module" + +config NFT_META + depends on NF_TABLES + tristate "Netfilter nf_tables meta module" + +config NFT_CT + depends on NF_TABLES + depends on NF_CONNTRACK + tristate "Netfilter nf_tables conntrack module" + +config NFT_SET + depends on NF_TABLES + tristate "Netfilter nf_tables set module" + +config NFT_HASH + depends on NF_TABLES + tristate "Netfilter nf_tables hash module" + +config NFT_COUNTER + depends on NF_TABLES + tristate "Netfilter nf_tables counter module" + +config NFT_LOG + depends on NF_TABLES + tristate "Netfilter nf_tables log module" + +config NFT_LIMIT + depends on NF_TABLES + tristate "Netfilter nf_tables limit module" + config NETFILTER_XTABLES tristate "Netfilter Xtables support (required for ip_tables)" default m if NETFILTER_ADVANCED=n diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index c3a0a12907f6..1ca3f3932826 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -64,6 +64,22 @@ obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o # SYNPROXY obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o +# nf_tables +nf_tables-objs += nf_tables_core.o nf_tables_api.o +nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o +nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o + +obj-$(CONFIG_NF_TABLES) += nf_tables.o +obj-$(CONFIG_NFT_EXTHDR) += nft_exthdr.o +obj-$(CONFIG_NFT_META) += nft_meta.o +obj-$(CONFIG_NFT_CT) += nft_ct.o +obj-$(CONFIG_NFT_LIMIT) += nft_limit.o +#nf_tables-objs += nft_meta_target.o +obj-$(CONFIG_NFT_SET) += nft_set.o +obj-$(CONFIG_NFT_HASH) += nft_hash.o +obj-$(CONFIG_NFT_COUNTER) += nft_counter.o +obj-$(CONFIG_NFT_LOG) += nft_log.o + # generic X tables obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c new file mode 100644 index 000000000000..7d59c89c6c75 --- /dev/null +++ b/net/netfilter/nf_tables_api.c @@ -0,0 +1,1760 @@ +/* + * Copyright (c) 2007, 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static LIST_HEAD(nf_tables_afinfo); +static LIST_HEAD(nf_tables_expressions); + +/** + * nft_register_afinfo - register nf_tables address family info + * + * @afi: address family info to register + * + * Register the address family for use with nf_tables. Returns zero on + * success or a negative errno code otherwise. + */ +int nft_register_afinfo(struct nft_af_info *afi) +{ + INIT_LIST_HEAD(&afi->tables); + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_add_tail(&afi->list, &nf_tables_afinfo); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + return 0; +} +EXPORT_SYMBOL_GPL(nft_register_afinfo); + +/** + * nft_unregister_afinfo - unregister nf_tables address family info + * + * @afi: address family info to unregister + * + * Unregister the address family for use with nf_tables. + */ +void nft_unregister_afinfo(struct nft_af_info *afi) +{ + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_del(&afi->list); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); +} +EXPORT_SYMBOL_GPL(nft_unregister_afinfo); + +static struct nft_af_info *nft_afinfo_lookup(int family) +{ + struct nft_af_info *afi; + + list_for_each_entry(afi, &nf_tables_afinfo, list) { + if (afi->family == family) + return afi; + } + return NULL; +} + +static struct nft_af_info *nf_tables_afinfo_lookup(int family, bool autoload) +{ + struct nft_af_info *afi; + + afi = nft_afinfo_lookup(family); + if (afi != NULL) + return afi; +#ifdef CONFIG_MODULES + if (autoload) { + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + request_module("nft-afinfo-%u", family); + nfnl_lock(NFNL_SUBSYS_NFTABLES); + afi = nft_afinfo_lookup(family); + if (afi != NULL) + return ERR_PTR(-EAGAIN); + } +#endif + return ERR_PTR(-EAFNOSUPPORT); +} + +/* + * Tables + */ + +static struct nft_table *nft_table_lookup(const struct nft_af_info *afi, + const struct nlattr *nla) +{ + struct nft_table *table; + + list_for_each_entry(table, &afi->tables, list) { + if (!nla_strcmp(nla, table->name)) + return table; + } + return NULL; +} + +static struct nft_table *nf_tables_table_lookup(const struct nft_af_info *afi, + const struct nlattr *nla, + bool autoload) +{ + struct nft_table *table; + + if (nla == NULL) + return ERR_PTR(-EINVAL); + + table = nft_table_lookup(afi, nla); + if (table != NULL) + return table; + +#ifdef CONFIG_MODULES + if (autoload) { + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + request_module("nft-table-%u-%*.s", afi->family, + nla_len(nla)-1, (const char *)nla_data(nla)); + nfnl_lock(NFNL_SUBSYS_NFTABLES); + if (nft_table_lookup(afi, nla)) + return ERR_PTR(-EAGAIN); + } +#endif + return ERR_PTR(-ENOENT); +} + +static inline u64 nf_tables_alloc_handle(struct nft_table *table) +{ + return ++table->hgenerator; +} + +static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = { + [NFTA_TABLE_NAME] = { .type = NLA_STRING }, +}; + +static int nf_tables_fill_table_info(struct sk_buff *skb, u32 portid, u32 seq, + int event, u32 flags, int family, + const struct nft_table *table) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + + event |= NFNL_SUBSYS_NFTABLES << 8; + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags); + if (nlh == NULL) + goto nla_put_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = family; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (nla_put_string(skb, NFTA_TABLE_NAME, table->name)) + goto nla_put_failure; + + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_trim(skb, nlh); + return -1; +} + +static int nf_tables_table_notify(const struct sk_buff *oskb, + const struct nlmsghdr *nlh, + const struct nft_table *table, + int event, int family) +{ + struct sk_buff *skb; + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; + u32 seq = nlh ? nlh->nlmsg_seq : 0; + struct net *net = oskb ? sock_net(oskb->sk) : &init_net; + bool report; + int err; + + report = nlh ? nlmsg_report(nlh) : false; + if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) + return 0; + + err = -ENOBUFS; + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb == NULL) + goto err; + + err = nf_tables_fill_table_info(skb, portid, seq, event, 0, + family, table); + if (err < 0) { + kfree_skb(skb); + goto err; + } + + err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, + GFP_KERNEL); +err: + if (err < 0) + nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err); + return err; +} + +static int nf_tables_dump_tables(struct sk_buff *skb, + struct netlink_callback *cb) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + const struct nft_af_info *afi; + const struct nft_table *table; + unsigned int idx = 0, s_idx = cb->args[0]; + int family = nfmsg->nfgen_family; + + list_for_each_entry(afi, &nf_tables_afinfo, list) { + if (family != NFPROTO_UNSPEC && family != afi->family) + continue; + + list_for_each_entry(table, &afi->tables, list) { + if (idx < s_idx) + goto cont; + if (idx > s_idx) + memset(&cb->args[1], 0, + sizeof(cb->args) - sizeof(cb->args[0])); + if (nf_tables_fill_table_info(skb, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NFT_MSG_NEWTABLE, + NLM_F_MULTI, + afi->family, table) < 0) + goto done; +cont: + idx++; + } + } +done: + cb->args[0] = idx; + return skb->len; +} + +static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nft_af_info *afi; + const struct nft_table *table; + struct sk_buff *skb2; + int family = nfmsg->nfgen_family; + int err; + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = nf_tables_dump_tables, + }; + return netlink_dump_start(nlsk, skb, nlh, &c); + } + + afi = nf_tables_afinfo_lookup(family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], false); + if (IS_ERR(table)) + return PTR_ERR(table); + + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb2) + return -ENOMEM; + + err = nf_tables_fill_table_info(skb2, NETLINK_CB(skb).portid, + nlh->nlmsg_seq, NFT_MSG_NEWTABLE, 0, + family, table); + if (err < 0) + goto err; + + return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid); + +err: + kfree_skb(skb2); + return err; +} + +static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nlattr *name; + struct nft_af_info *afi; + struct nft_table *table; + int family = nfmsg->nfgen_family; + + afi = nf_tables_afinfo_lookup(family, true); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + name = nla[NFTA_TABLE_NAME]; + table = nf_tables_table_lookup(afi, name, false); + if (IS_ERR(table)) { + if (PTR_ERR(table) != -ENOENT) + return PTR_ERR(table); + table = NULL; + } + + if (table != NULL) { + if (nlh->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; + if (nlh->nlmsg_flags & NLM_F_REPLACE) + return -EOPNOTSUPP; + return 0; + } + + table = kzalloc(sizeof(*table) + nla_len(name), GFP_KERNEL); + if (table == NULL) + return -ENOMEM; + + nla_strlcpy(table->name, name, nla_len(name)); + INIT_LIST_HEAD(&table->chains); + + list_add_tail(&table->list, &afi->tables); + nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family); + return 0; +} + +static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + struct nft_af_info *afi; + struct nft_table *table; + int family = nfmsg->nfgen_family; + + afi = nf_tables_afinfo_lookup(family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], false); + if (IS_ERR(table)) + return PTR_ERR(table); + + if (table->flags & NFT_TABLE_BUILTIN) + return -EOPNOTSUPP; + + if (table->use) + return -EBUSY; + + list_del(&table->list); + nf_tables_table_notify(skb, nlh, table, NFT_MSG_DELTABLE, family); + kfree(table); + return 0; +} + +static struct nft_table *__nf_tables_table_lookup(const struct nft_af_info *afi, + const char *name) +{ + struct nft_table *table; + + list_for_each_entry(table, &afi->tables, list) { + if (!strcmp(name, table->name)) + return table; + } + + return ERR_PTR(-ENOENT); +} + +static int nf_tables_chain_notify(const struct sk_buff *oskb, + const struct nlmsghdr *nlh, + const struct nft_table *table, + const struct nft_chain *chain, + int event, int family); + +/** + * nft_register_table - register a built-in table + * + * @table: the table to register + * @family: protocol family to register table with + * + * Register a built-in table for use with nf_tables. Returns zero on + * success or a negative errno code otherwise. + */ +int nft_register_table(struct nft_table *table, int family) +{ + struct nft_af_info *afi; + struct nft_table *t; + struct nft_chain *chain; + int err; + + nfnl_lock(NFNL_SUBSYS_NFTABLES); +again: + afi = nf_tables_afinfo_lookup(family, true); + if (IS_ERR(afi)) { + err = PTR_ERR(afi); + if (err == -EAGAIN) + goto again; + goto err; + } + + t = __nf_tables_table_lookup(afi, table->name); + if (IS_ERR(t)) { + err = PTR_ERR(t); + if (err != -ENOENT) + goto err; + t = NULL; + } + + if (t != NULL) { + err = -EEXIST; + goto err; + } + + table->flags |= NFT_TABLE_BUILTIN; + list_add_tail(&table->list, &afi->tables); + nf_tables_table_notify(NULL, NULL, table, NFT_MSG_NEWTABLE, family); + list_for_each_entry(chain, &table->chains, list) + nf_tables_chain_notify(NULL, NULL, table, chain, + NFT_MSG_NEWCHAIN, family); + err = 0; +err: + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + return err; +} +EXPORT_SYMBOL_GPL(nft_register_table); + +/** + * nft_unregister_table - unregister a built-in table + * + * @table: the table to unregister + * @family: protocol family to unregister table with + * + * Unregister a built-in table for use with nf_tables. + */ +void nft_unregister_table(struct nft_table *table, int family) +{ + struct nft_chain *chain; + + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_del(&table->list); + list_for_each_entry(chain, &table->chains, list) + nf_tables_chain_notify(NULL, NULL, table, chain, + NFT_MSG_DELCHAIN, family); + nf_tables_table_notify(NULL, NULL, table, NFT_MSG_DELTABLE, family); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); +} +EXPORT_SYMBOL_GPL(nft_unregister_table); + +/* + * Chains + */ + +static struct nft_chain * +nf_tables_chain_lookup_byhandle(const struct nft_table *table, u64 handle) +{ + struct nft_chain *chain; + + list_for_each_entry(chain, &table->chains, list) { + if (chain->handle == handle) + return chain; + } + + return ERR_PTR(-ENOENT); +} + +static struct nft_chain *nf_tables_chain_lookup(const struct nft_table *table, + const struct nlattr *nla) +{ + struct nft_chain *chain; + + if (nla == NULL) + return ERR_PTR(-EINVAL); + + list_for_each_entry(chain, &table->chains, list) { + if (!nla_strcmp(nla, chain->name)) + return chain; + } + + return ERR_PTR(-ENOENT); +} + +static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = { + [NFTA_CHAIN_TABLE] = { .type = NLA_STRING }, + [NFTA_CHAIN_HANDLE] = { .type = NLA_U64 }, + [NFTA_CHAIN_NAME] = { .type = NLA_STRING, + .len = NFT_CHAIN_MAXNAMELEN - 1 }, + [NFTA_CHAIN_HOOK] = { .type = NLA_NESTED }, +}; + +static const struct nla_policy nft_hook_policy[NFTA_HOOK_MAX + 1] = { + [NFTA_HOOK_HOOKNUM] = { .type = NLA_U32 }, + [NFTA_HOOK_PRIORITY] = { .type = NLA_U32 }, +}; + +static int nf_tables_fill_chain_info(struct sk_buff *skb, u32 portid, u32 seq, + int event, u32 flags, int family, + const struct nft_table *table, + const struct nft_chain *chain) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + + event |= NFNL_SUBSYS_NFTABLES << 8; + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags); + if (nlh == NULL) + goto nla_put_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = family; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (nla_put_string(skb, NFTA_CHAIN_TABLE, table->name)) + goto nla_put_failure; + if (nla_put_be64(skb, NFTA_CHAIN_HANDLE, cpu_to_be64(chain->handle))) + goto nla_put_failure; + if (nla_put_string(skb, NFTA_CHAIN_NAME, chain->name)) + goto nla_put_failure; + + if (chain->flags & NFT_BASE_CHAIN) { + const struct nf_hook_ops *ops = &nft_base_chain(chain)->ops; + struct nlattr *nest = nla_nest_start(skb, NFTA_CHAIN_HOOK); + if (nest == NULL) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_HOOK_HOOKNUM, htonl(ops->hooknum))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_HOOK_PRIORITY, htonl(ops->priority))) + goto nla_put_failure; + nla_nest_end(skb, nest); + } + + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_trim(skb, nlh); + return -1; +} + +static int nf_tables_chain_notify(const struct sk_buff *oskb, + const struct nlmsghdr *nlh, + const struct nft_table *table, + const struct nft_chain *chain, + int event, int family) +{ + struct sk_buff *skb; + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; + struct net *net = oskb ? sock_net(oskb->sk) : &init_net; + u32 seq = nlh ? nlh->nlmsg_seq : 0; + bool report; + int err; + + report = nlh ? nlmsg_report(nlh) : false; + if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) + return 0; + + err = -ENOBUFS; + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb == NULL) + goto err; + + err = nf_tables_fill_chain_info(skb, portid, seq, event, 0, family, + table, chain); + if (err < 0) { + kfree_skb(skb); + goto err; + } + + err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, + GFP_KERNEL); +err: + if (err < 0) + nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err); + return err; +} + +static int nf_tables_dump_chains(struct sk_buff *skb, + struct netlink_callback *cb) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + const struct nft_af_info *afi; + const struct nft_table *table; + const struct nft_chain *chain; + unsigned int idx = 0, s_idx = cb->args[0]; + int family = nfmsg->nfgen_family; + + list_for_each_entry(afi, &nf_tables_afinfo, list) { + if (family != NFPROTO_UNSPEC && family != afi->family) + continue; + + list_for_each_entry(table, &afi->tables, list) { + list_for_each_entry(chain, &table->chains, list) { + if (idx < s_idx) + goto cont; + if (idx > s_idx) + memset(&cb->args[1], 0, + sizeof(cb->args) - sizeof(cb->args[0])); + if (nf_tables_fill_chain_info(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NFT_MSG_NEWCHAIN, + NLM_F_MULTI, + afi->family, table, chain) < 0) + goto done; +cont: + idx++; + } + } + } +done: + cb->args[0] = idx; + return skb->len; +} + + +static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nft_af_info *afi; + const struct nft_table *table; + const struct nft_chain *chain; + struct sk_buff *skb2; + int family = nfmsg->nfgen_family; + int err; + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = nf_tables_dump_chains, + }; + return netlink_dump_start(nlsk, skb, nlh, &c); + } + + afi = nf_tables_afinfo_lookup(family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], false); + if (IS_ERR(table)) + return PTR_ERR(table); + + chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]); + if (IS_ERR(chain)) + return PTR_ERR(chain); + + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb2) + return -ENOMEM; + + err = nf_tables_fill_chain_info(skb2, NETLINK_CB(skb).portid, + nlh->nlmsg_seq, NFT_MSG_NEWCHAIN, 0, + family, table, chain); + if (err < 0) + goto err; + + return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid); + +err: + kfree_skb(skb2); + return err; +} + +static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nlattr * uninitialized_var(name); + const struct nft_af_info *afi; + struct nft_table *table; + struct nft_chain *chain; + struct nft_base_chain *basechain; + struct nlattr *ha[NFTA_HOOK_MAX + 1]; + int family = nfmsg->nfgen_family; + u64 handle = 0; + int err; + bool create; + + create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; + + afi = nf_tables_afinfo_lookup(family, true); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], create); + if (IS_ERR(table)) + return PTR_ERR(table); + + if (table->use == UINT_MAX) + return -EOVERFLOW; + + chain = NULL; + name = nla[NFTA_CHAIN_NAME]; + + if (nla[NFTA_CHAIN_HANDLE]) { + handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE])); + chain = nf_tables_chain_lookup_byhandle(table, handle); + if (IS_ERR(chain)) + return PTR_ERR(chain); + } else { + chain = nf_tables_chain_lookup(table, name); + if (IS_ERR(chain)) { + if (PTR_ERR(chain) != -ENOENT) + return PTR_ERR(chain); + chain = NULL; + } + } + + if (chain != NULL) { + if (nlh->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; + if (nlh->nlmsg_flags & NLM_F_REPLACE) + return -EOPNOTSUPP; + + if (nla[NFTA_CHAIN_HANDLE] && name && + !IS_ERR(nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]))) + return -EEXIST; + + if (nla[NFTA_CHAIN_HANDLE] && name) + nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN); + + goto notify; + } + + if (nla[NFTA_CHAIN_HOOK]) { + struct nf_hook_ops *ops; + + err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK], + nft_hook_policy); + if (err < 0) + return err; + if (ha[NFTA_HOOK_HOOKNUM] == NULL || + ha[NFTA_HOOK_PRIORITY] == NULL) + return -EINVAL; + if (ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM])) >= afi->nhooks) + return -EINVAL; + + basechain = kzalloc(sizeof(*basechain), GFP_KERNEL); + if (basechain == NULL) + return -ENOMEM; + chain = &basechain->chain; + + ops = &basechain->ops; + ops->pf = family; + ops->owner = afi->owner; + ops->hooknum = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM])); + ops->priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY])); + ops->priv = chain; + ops->hook = nft_do_chain; + if (afi->hooks[ops->hooknum]) + ops->hook = afi->hooks[ops->hooknum]; + + chain->flags |= NFT_BASE_CHAIN; + } else { + chain = kzalloc(sizeof(*chain), GFP_KERNEL); + if (chain == NULL) + return -ENOMEM; + } + + INIT_LIST_HEAD(&chain->rules); + chain->handle = nf_tables_alloc_handle(table); + nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN); + + list_add_tail(&chain->list, &table->chains); + table->use++; +notify: + nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_NEWCHAIN, + family); + return 0; +} + +static void nf_tables_rcu_chain_destroy(struct rcu_head *head) +{ + struct nft_chain *chain = container_of(head, struct nft_chain, rcu_head); + + BUG_ON(chain->use > 0); + + if (chain->flags & NFT_BASE_CHAIN) + kfree(nft_base_chain(chain)); + else + kfree(chain); +} + +static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nft_af_info *afi; + struct nft_table *table; + struct nft_chain *chain; + int family = nfmsg->nfgen_family; + + afi = nf_tables_afinfo_lookup(family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], false); + if (IS_ERR(table)) + return PTR_ERR(table); + + chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]); + if (IS_ERR(chain)) + return PTR_ERR(chain); + + if (chain->flags & NFT_CHAIN_BUILTIN) + return -EOPNOTSUPP; + + if (!list_empty(&chain->rules)) + return -EBUSY; + + list_del(&chain->list); + table->use--; + + if (chain->flags & NFT_BASE_CHAIN) + nf_unregister_hook(&nft_base_chain(chain)->ops); + + nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_DELCHAIN, + family); + + /* Make sure all rule references are gone before this is released */ + call_rcu(&chain->rcu_head, nf_tables_rcu_chain_destroy); + return 0; +} + +static void nft_ctx_init(struct nft_ctx *ctx, + const struct nft_af_info *afi, + const struct nft_table *table, + const struct nft_chain *chain) +{ + ctx->afi = afi; + ctx->table = table; + ctx->chain = chain; +} + +/* + * Expressions + */ + +/** + * nft_register_expr - register nf_tables expr operations + * @ops: expr operations + * + * Registers the expr operations for use with nf_tables. Returns zero on + * success or a negative errno code otherwise. + */ +int nft_register_expr(struct nft_expr_ops *ops) +{ + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_add_tail(&ops->list, &nf_tables_expressions); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + return 0; +} +EXPORT_SYMBOL_GPL(nft_register_expr); + +/** + * nft_unregister_expr - unregister nf_tables expr operations + * @ops: expr operations + * + * Unregisters the expr operations for use with nf_tables. + */ +void nft_unregister_expr(struct nft_expr_ops *ops) +{ + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_del(&ops->list); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); +} +EXPORT_SYMBOL_GPL(nft_unregister_expr); + +static const struct nft_expr_ops *__nft_expr_ops_get(struct nlattr *nla) +{ + const struct nft_expr_ops *ops; + + list_for_each_entry(ops, &nf_tables_expressions, list) { + if (!nla_strcmp(nla, ops->name)) + return ops; + } + return NULL; +} + +static const struct nft_expr_ops *nft_expr_ops_get(struct nlattr *nla) +{ + const struct nft_expr_ops *ops; + + if (nla == NULL) + return ERR_PTR(-EINVAL); + + ops = __nft_expr_ops_get(nla); + if (ops != NULL && try_module_get(ops->owner)) + return ops; + +#ifdef CONFIG_MODULES + if (ops == NULL) { + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + request_module("nft-expr-%.*s", + nla_len(nla), (char *)nla_data(nla)); + nfnl_lock(NFNL_SUBSYS_NFTABLES); + if (__nft_expr_ops_get(nla)) + return ERR_PTR(-EAGAIN); + } +#endif + return ERR_PTR(-ENOENT); +} + +static const struct nla_policy nft_expr_policy[NFTA_EXPR_MAX + 1] = { + [NFTA_EXPR_NAME] = { .type = NLA_STRING }, + [NFTA_EXPR_DATA] = { .type = NLA_NESTED }, +}; + +static int nf_tables_fill_expr_info(struct sk_buff *skb, + const struct nft_expr *expr) +{ + if (nla_put_string(skb, NFTA_EXPR_NAME, expr->ops->name)) + goto nla_put_failure; + + if (expr->ops->dump) { + struct nlattr *data = nla_nest_start(skb, NFTA_EXPR_DATA); + if (data == NULL) + goto nla_put_failure; + if (expr->ops->dump(skb, expr) < 0) + goto nla_put_failure; + nla_nest_end(skb, data); + } + + return skb->len; + +nla_put_failure: + return -1; +}; + +struct nft_expr_info { + const struct nft_expr_ops *ops; + struct nlattr *tb[NFTA_EXPR_MAX + 1]; +}; + +static int nf_tables_expr_parse(const struct nlattr *nla, + struct nft_expr_info *info) +{ + const struct nft_expr_ops *ops; + int err; + + err = nla_parse_nested(info->tb, NFTA_EXPR_MAX, nla, nft_expr_policy); + if (err < 0) + return err; + + ops = nft_expr_ops_get(info->tb[NFTA_EXPR_NAME]); + if (IS_ERR(ops)) + return PTR_ERR(ops); + info->ops = ops; + return 0; +} + +static int nf_tables_newexpr(const struct nft_ctx *ctx, + struct nft_expr_info *info, + struct nft_expr *expr) +{ + const struct nft_expr_ops *ops = info->ops; + int err; + + expr->ops = ops; + if (ops->init) { + struct nlattr *ma[ops->maxattr + 1]; + + if (info->tb[NFTA_EXPR_DATA]) { + err = nla_parse_nested(ma, ops->maxattr, + info->tb[NFTA_EXPR_DATA], + ops->policy); + if (err < 0) + goto err1; + } else + memset(ma, 0, sizeof(ma[0]) * (ops->maxattr + 1)); + + err = ops->init(ctx, expr, (const struct nlattr **)ma); + if (err < 0) + goto err1; + } + + info->ops = NULL; + return 0; + +err1: + expr->ops = NULL; + return err; +} + +static void nf_tables_expr_destroy(struct nft_expr *expr) +{ + if (expr->ops->destroy) + expr->ops->destroy(expr); + module_put(expr->ops->owner); +} + +/* + * Rules + */ + +static struct nft_rule *__nf_tables_rule_lookup(const struct nft_chain *chain, + u64 handle) +{ + struct nft_rule *rule; + + // FIXME: this sucks + list_for_each_entry(rule, &chain->rules, list) { + if (handle == rule->handle) + return rule; + } + + return ERR_PTR(-ENOENT); +} + +static struct nft_rule *nf_tables_rule_lookup(const struct nft_chain *chain, + const struct nlattr *nla) +{ + if (nla == NULL) + return ERR_PTR(-EINVAL); + + return __nf_tables_rule_lookup(chain, be64_to_cpu(nla_get_be64(nla))); +} + +static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = { + [NFTA_RULE_TABLE] = { .type = NLA_STRING }, + [NFTA_RULE_CHAIN] = { .type = NLA_STRING, + .len = NFT_CHAIN_MAXNAMELEN - 1 }, + [NFTA_RULE_HANDLE] = { .type = NLA_U64 }, + [NFTA_RULE_EXPRESSIONS] = { .type = NLA_NESTED }, +}; + +static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq, + int event, u32 flags, int family, + const struct nft_table *table, + const struct nft_chain *chain, + const struct nft_rule *rule) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + const struct nft_expr *expr, *next; + struct nlattr *list; + + event |= NFNL_SUBSYS_NFTABLES << 8; + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), + flags); + if (nlh == NULL) + goto nla_put_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = family; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (nla_put_string(skb, NFTA_RULE_TABLE, table->name)) + goto nla_put_failure; + if (nla_put_string(skb, NFTA_RULE_CHAIN, chain->name)) + goto nla_put_failure; + if (nla_put_be64(skb, NFTA_RULE_HANDLE, cpu_to_be64(rule->handle))) + goto nla_put_failure; + + list = nla_nest_start(skb, NFTA_RULE_EXPRESSIONS); + if (list == NULL) + goto nla_put_failure; + nft_rule_for_each_expr(expr, next, rule) { + struct nlattr *elem = nla_nest_start(skb, NFTA_LIST_ELEM); + if (elem == NULL) + goto nla_put_failure; + if (nf_tables_fill_expr_info(skb, expr) < 0) + goto nla_put_failure; + nla_nest_end(skb, elem); + } + nla_nest_end(skb, list); + + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_trim(skb, nlh); + return -1; +} + +static int nf_tables_rule_notify(const struct sk_buff *oskb, + const struct nlmsghdr *nlh, + const struct nft_table *table, + const struct nft_chain *chain, + const struct nft_rule *rule, + int event, u32 flags, int family) +{ + struct sk_buff *skb; + u32 portid = NETLINK_CB(oskb).portid; + struct net *net = oskb ? sock_net(oskb->sk) : &init_net; + u32 seq = nlh->nlmsg_seq; + bool report; + int err; + + report = nlmsg_report(nlh); + if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) + return 0; + + err = -ENOBUFS; + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb == NULL) + goto err; + + err = nf_tables_fill_rule_info(skb, portid, seq, event, flags, + family, table, chain, rule); + if (err < 0) { + kfree_skb(skb); + goto err; + } + + err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, + GFP_KERNEL); +err: + if (err < 0) + nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err); + return err; +} + +static int nf_tables_dump_rules(struct sk_buff *skb, + struct netlink_callback *cb) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + const struct nft_af_info *afi; + const struct nft_table *table; + const struct nft_chain *chain; + const struct nft_rule *rule; + unsigned int idx = 0, s_idx = cb->args[0]; + int family = nfmsg->nfgen_family; + + list_for_each_entry(afi, &nf_tables_afinfo, list) { + if (family != NFPROTO_UNSPEC && family != afi->family) + continue; + + list_for_each_entry(table, &afi->tables, list) { + list_for_each_entry(chain, &table->chains, list) { + list_for_each_entry(rule, &chain->rules, list) { + if (idx < s_idx) + goto cont; + if (idx > s_idx) + memset(&cb->args[1], 0, + sizeof(cb->args) - sizeof(cb->args[0])); + if (nf_tables_fill_rule_info(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NFT_MSG_NEWRULE, + NLM_F_MULTI | NLM_F_APPEND, + afi->family, table, chain, rule) < 0) + goto done; +cont: + idx++; + } + } + } + } +done: + cb->args[0] = idx; + return skb->len; +} + +static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nft_af_info *afi; + const struct nft_table *table; + const struct nft_chain *chain; + const struct nft_rule *rule; + struct sk_buff *skb2; + int family = nfmsg->nfgen_family; + int err; + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = nf_tables_dump_rules, + }; + return netlink_dump_start(nlsk, skb, nlh, &c); + } + + afi = nf_tables_afinfo_lookup(family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], false); + if (IS_ERR(table)) + return PTR_ERR(table); + + chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]); + if (IS_ERR(chain)) + return PTR_ERR(chain); + + rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]); + if (IS_ERR(rule)) + return PTR_ERR(rule); + + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb2) + return -ENOMEM; + + err = nf_tables_fill_rule_info(skb2, NETLINK_CB(skb).portid, + nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0, + family, table, chain, rule); + if (err < 0) + goto err; + + return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid); + +err: + kfree_skb(skb2); + return err; +} + +static void nf_tables_rcu_rule_destroy(struct rcu_head *head) +{ + struct nft_rule *rule = container_of(head, struct nft_rule, rcu_head); + struct nft_expr *expr; + + /* + * Careful: some expressions might not be initialized in case this + * is called on error from nf_tables_newrule(). + */ + expr = nft_expr_first(rule); + while (expr->ops && expr != nft_expr_last(rule)) { + nf_tables_expr_destroy(expr); + expr = nft_expr_next(expr); + } + kfree(rule); +} + +static void nf_tables_rule_destroy(struct nft_rule *rule) +{ + call_rcu(&rule->rcu_head, nf_tables_rcu_rule_destroy); +} + +#define NFT_RULE_MAXEXPRS 128 + +static struct nft_expr_info *info; + +static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nft_af_info *afi; + struct nft_table *table; + struct nft_chain *chain; + struct nft_rule *rule, *old_rule = NULL; + struct nft_expr *expr; + struct nft_ctx ctx; + struct nlattr *tmp; + unsigned int size, i, n; + int err, rem; + bool create; + u64 handle; + + create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; + + afi = nf_tables_afinfo_lookup(nfmsg->nfgen_family, create); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], create); + if (IS_ERR(table)) + return PTR_ERR(table); + + chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]); + if (IS_ERR(chain)) + return PTR_ERR(chain); + + if (nla[NFTA_RULE_HANDLE]) { + handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_HANDLE])); + rule = __nf_tables_rule_lookup(chain, handle); + if (IS_ERR(rule)) + return PTR_ERR(rule); + + if (nlh->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; + if (nlh->nlmsg_flags & NLM_F_REPLACE) + old_rule = rule; + else + return -EOPNOTSUPP; + } else { + if (!create || nlh->nlmsg_flags & NLM_F_REPLACE) + return -EINVAL; + handle = nf_tables_alloc_handle(table); + } + + n = 0; + size = 0; + if (nla[NFTA_RULE_EXPRESSIONS]) { + nla_for_each_nested(tmp, nla[NFTA_RULE_EXPRESSIONS], rem) { + err = -EINVAL; + if (nla_type(tmp) != NFTA_LIST_ELEM) + goto err1; + if (n == NFT_RULE_MAXEXPRS) + goto err1; + err = nf_tables_expr_parse(tmp, &info[n]); + if (err < 0) + goto err1; + size += info[n].ops->size; + n++; + } + } + + err = -ENOMEM; + rule = kzalloc(sizeof(*rule) + size, GFP_KERNEL); + if (rule == NULL) + goto err1; + + rule->handle = handle; + rule->dlen = size; + + nft_ctx_init(&ctx, afi, table, chain); + expr = nft_expr_first(rule); + for (i = 0; i < n; i++) { + err = nf_tables_newexpr(&ctx, &info[i], expr); + if (err < 0) + goto err2; + expr = nft_expr_next(expr); + } + + /* Register hook when first rule is inserted into a base chain */ + if (list_empty(&chain->rules) && chain->flags & NFT_BASE_CHAIN) { + err = nf_register_hook(&nft_base_chain(chain)->ops); + if (err < 0) + goto err2; + } + + if (nlh->nlmsg_flags & NLM_F_REPLACE) { + list_replace_rcu(&old_rule->list, &rule->list); + nf_tables_rule_destroy(old_rule); + } else if (nlh->nlmsg_flags & NLM_F_APPEND) + list_add_tail_rcu(&rule->list, &chain->rules); + else + list_add_rcu(&rule->list, &chain->rules); + + nf_tables_rule_notify(skb, nlh, table, chain, rule, NFT_MSG_NEWRULE, + nlh->nlmsg_flags & (NLM_F_APPEND | NLM_F_REPLACE), + nfmsg->nfgen_family); + return 0; + +err2: + nf_tables_rule_destroy(rule); +err1: + for (i = 0; i < n; i++) { + if (info[i].ops != NULL) + module_put(info[i].ops->owner); + } + return err; +} + +static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nft_af_info *afi; + const struct nft_table *table; + struct nft_chain *chain; + struct nft_rule *rule, *tmp; + int family = nfmsg->nfgen_family; + + afi = nf_tables_afinfo_lookup(family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], false); + if (IS_ERR(table)) + return PTR_ERR(table); + + chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]); + if (IS_ERR(chain)) + return PTR_ERR(chain); + + if (nla[NFTA_RULE_HANDLE]) { + rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]); + if (IS_ERR(rule)) + return PTR_ERR(rule); + + /* List removal must be visible before destroying expressions */ + list_del_rcu(&rule->list); + + nf_tables_rule_notify(skb, nlh, table, chain, rule, + NFT_MSG_DELRULE, 0, family); + nf_tables_rule_destroy(rule); + } else { + /* Remove all rules in this chain */ + list_for_each_entry_safe(rule, tmp, &chain->rules, list) { + list_del_rcu(&rule->list); + + nf_tables_rule_notify(skb, nlh, table, chain, rule, + NFT_MSG_DELRULE, 0, family); + nf_tables_rule_destroy(rule); + } + } + + /* Unregister hook when last rule from base chain is deleted */ + if (list_empty(&chain->rules) && chain->flags & NFT_BASE_CHAIN) + nf_unregister_hook(&nft_base_chain(chain)->ops); + + return 0; +} + +static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { + [NFT_MSG_NEWTABLE] = { + .call = nf_tables_newtable, + .attr_count = NFTA_TABLE_MAX, + .policy = nft_table_policy, + }, + [NFT_MSG_GETTABLE] = { + .call = nf_tables_gettable, + .attr_count = NFTA_TABLE_MAX, + .policy = nft_table_policy, + }, + [NFT_MSG_DELTABLE] = { + .call = nf_tables_deltable, + .attr_count = NFTA_TABLE_MAX, + .policy = nft_table_policy, + }, + [NFT_MSG_NEWCHAIN] = { + .call = nf_tables_newchain, + .attr_count = NFTA_CHAIN_MAX, + .policy = nft_chain_policy, + }, + [NFT_MSG_GETCHAIN] = { + .call = nf_tables_getchain, + .attr_count = NFTA_CHAIN_MAX, + .policy = nft_chain_policy, + }, + [NFT_MSG_DELCHAIN] = { + .call = nf_tables_delchain, + .attr_count = NFTA_CHAIN_MAX, + .policy = nft_chain_policy, + }, + [NFT_MSG_NEWRULE] = { + .call = nf_tables_newrule, + .attr_count = NFTA_RULE_MAX, + .policy = nft_rule_policy, + }, + [NFT_MSG_GETRULE] = { + .call = nf_tables_getrule, + .attr_count = NFTA_RULE_MAX, + .policy = nft_rule_policy, + }, + [NFT_MSG_DELRULE] = { + .call = nf_tables_delrule, + .attr_count = NFTA_RULE_MAX, + .policy = nft_rule_policy, + }, +}; + +static const struct nfnetlink_subsystem nf_tables_subsys = { + .name = "nf_tables", + .subsys_id = NFNL_SUBSYS_NFTABLES, + .cb_count = NFT_MSG_MAX, + .cb = nf_tables_cb, +}; + +/** + * nft_validate_input_register - validate an expressions' input register + * + * @reg: the register number + * + * Validate that the input register is one of the general purpose + * registers. + */ +int nft_validate_input_register(enum nft_registers reg) +{ + if (reg <= NFT_REG_VERDICT) + return -EINVAL; + if (reg > NFT_REG_MAX) + return -ERANGE; + return 0; +} +EXPORT_SYMBOL_GPL(nft_validate_input_register); + +/** + * nft_validate_output_register - validate an expressions' output register + * + * @reg: the register number + * + * Validate that the output register is one of the general purpose + * registers or the verdict register. + */ +int nft_validate_output_register(enum nft_registers reg) +{ + if (reg < NFT_REG_VERDICT) + return -EINVAL; + if (reg > NFT_REG_MAX) + return -ERANGE; + return 0; +} +EXPORT_SYMBOL_GPL(nft_validate_output_register); + +/** + * nft_validate_data_load - validate an expressions' data load + * + * @ctx: context of the expression performing the load + * @reg: the destination register number + * @data: the data to load + * @type: the data type + * + * Validate that a data load uses the appropriate data type for + * the destination register. A value of NULL for the data means + * that its runtime gathered data, which is always of type + * NFT_DATA_VALUE. + */ +int nft_validate_data_load(const struct nft_ctx *ctx, enum nft_registers reg, + const struct nft_data *data, + enum nft_data_types type) +{ + switch (reg) { + case NFT_REG_VERDICT: + if (data == NULL || type != NFT_DATA_VERDICT) + return -EINVAL; + // FIXME: do loop detection + return 0; + default: + if (data != NULL && type != NFT_DATA_VALUE) + return -EINVAL; + return 0; + } +} +EXPORT_SYMBOL_GPL(nft_validate_data_load); + +static const struct nla_policy nft_verdict_policy[NFTA_VERDICT_MAX + 1] = { + [NFTA_VERDICT_CODE] = { .type = NLA_U32 }, + [NFTA_VERDICT_CHAIN] = { .type = NLA_STRING, + .len = NFT_CHAIN_MAXNAMELEN - 1 }, +}; + +static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, + struct nft_data_desc *desc, const struct nlattr *nla) +{ + struct nlattr *tb[NFTA_VERDICT_MAX + 1]; + struct nft_chain *chain; + int err; + + err = nla_parse_nested(tb, NFTA_VERDICT_MAX, nla, nft_verdict_policy); + if (err < 0) + return err; + + if (!tb[NFTA_VERDICT_CODE]) + return -EINVAL; + data->verdict = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE])); + + switch (data->verdict) { + case NF_ACCEPT: + case NF_DROP: + case NF_QUEUE: + case NFT_CONTINUE: + case NFT_BREAK: + case NFT_RETURN: + desc->len = sizeof(data->verdict); + break; + case NFT_JUMP: + case NFT_GOTO: + if (!tb[NFTA_VERDICT_CHAIN]) + return -EINVAL; + chain = nf_tables_chain_lookup(ctx->table, + tb[NFTA_VERDICT_CHAIN]); + if (IS_ERR(chain)) + return PTR_ERR(chain); + if (chain->flags & NFT_BASE_CHAIN) + return -EOPNOTSUPP; + + if (ctx->chain->level + 1 > chain->level) { + if (ctx->chain->level + 1 == 16) + return -EMLINK; + chain->level = ctx->chain->level + 1; + } + chain->use++; + data->chain = chain; + desc->len = sizeof(data); + break; + default: + return -EINVAL; + } + + desc->type = NFT_DATA_VERDICT; + return 0; +} + +static void nft_verdict_uninit(const struct nft_data *data) +{ + switch (data->verdict) { + case NFT_JUMP: + case NFT_GOTO: + data->chain->use--; + break; + } +} + +static int nft_verdict_dump(struct sk_buff *skb, const struct nft_data *data) +{ + struct nlattr *nest; + + nest = nla_nest_start(skb, NFTA_DATA_VERDICT); + if (!nest) + goto nla_put_failure; + + if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(data->verdict))) + goto nla_put_failure; + + switch (data->verdict) { + case NFT_JUMP: + case NFT_GOTO: + if (nla_put_string(skb, NFTA_VERDICT_CHAIN, data->chain->name)) + goto nla_put_failure; + } + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + return -1; +} + +static int nft_value_init(const struct nft_ctx *ctx, struct nft_data *data, + struct nft_data_desc *desc, const struct nlattr *nla) +{ + unsigned int len; + + len = nla_len(nla); + if (len == 0) + return -EINVAL; + if (len > sizeof(data->data)) + return -EOVERFLOW; + + nla_memcpy(data->data, nla, sizeof(data->data)); + desc->type = NFT_DATA_VALUE; + desc->len = len; + return 0; +} + +static int nft_value_dump(struct sk_buff *skb, const struct nft_data *data, + unsigned int len) +{ + return nla_put(skb, NFTA_DATA_VALUE, len, data->data); +} + +static const struct nla_policy nft_data_policy[NFTA_DATA_MAX + 1] = { + [NFTA_DATA_VALUE] = { .type = NLA_BINARY, + .len = FIELD_SIZEOF(struct nft_data, data) }, + [NFTA_DATA_VERDICT] = { .type = NLA_NESTED }, +}; + +/** + * nft_data_init - parse nf_tables data netlink attributes + * + * @ctx: context of the expression using the data + * @data: destination struct nft_data + * @desc: data description + * @nla: netlink attribute containing data + * + * Parse the netlink data attributes and initialize a struct nft_data. + * The type and length of data are returned in the data description. + * + * The caller can indicate that it only wants to accept data of type + * NFT_DATA_VALUE by passing NULL for the ctx argument. + */ +int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data, + struct nft_data_desc *desc, const struct nlattr *nla) +{ + struct nlattr *tb[NFTA_DATA_MAX + 1]; + int err; + + err = nla_parse_nested(tb, NFTA_DATA_MAX, nla, nft_data_policy); + if (err < 0) + return err; + + if (tb[NFTA_DATA_VALUE]) + return nft_value_init(ctx, data, desc, tb[NFTA_DATA_VALUE]); + if (tb[NFTA_DATA_VERDICT] && ctx != NULL) + return nft_verdict_init(ctx, data, desc, tb[NFTA_DATA_VERDICT]); + return -EINVAL; +} +EXPORT_SYMBOL_GPL(nft_data_init); + +/** + * nft_data_uninit - release a nft_data item + * + * @data: struct nft_data to release + * @type: type of data + * + * Release a nft_data item. NFT_DATA_VALUE types can be silently discarded, + * all others need to be released by calling this function. + */ +void nft_data_uninit(const struct nft_data *data, enum nft_data_types type) +{ + switch (type) { + case NFT_DATA_VALUE: + return; + case NFT_DATA_VERDICT: + return nft_verdict_uninit(data); + default: + WARN_ON(1); + } +} +EXPORT_SYMBOL_GPL(nft_data_uninit); + +int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data, + enum nft_data_types type, unsigned int len) +{ + struct nlattr *nest; + int err; + + nest = nla_nest_start(skb, attr); + if (nest == NULL) + return -1; + + switch (type) { + case NFT_DATA_VALUE: + err = nft_value_dump(skb, data, len); + break; + case NFT_DATA_VERDICT: + err = nft_verdict_dump(skb, data); + break; + default: + err = -EINVAL; + WARN_ON(1); + } + + nla_nest_end(skb, nest); + return err; +} +EXPORT_SYMBOL_GPL(nft_data_dump); + +static int __init nf_tables_module_init(void) +{ + int err; + + info = kmalloc(sizeof(struct nft_expr_info) * NFT_RULE_MAXEXPRS, + GFP_KERNEL); + if (info == NULL) { + err = -ENOMEM; + goto err1; + } + + err = nf_tables_core_module_init(); + if (err < 0) + goto err2; + + err = nfnetlink_subsys_register(&nf_tables_subsys); + if (err < 0) + goto err3; + + pr_info("nf_tables: (c) 2007-2009 Patrick McHardy \n"); + return 0; +err3: + nf_tables_core_module_exit(); +err2: + kfree(info); +err1: + return err; +} + +static void __exit nf_tables_module_exit(void) +{ + nfnetlink_subsys_unregister(&nf_tables_subsys); + nf_tables_core_module_exit(); + kfree(info); +} + +module_init(nf_tables_module_init); +module_exit(nf_tables_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFTABLES); diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c new file mode 100644 index 000000000000..bc7fb85d4002 --- /dev/null +++ b/net/netfilter/nf_tables_core.c @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define NFT_JUMP_STACK_SIZE 16 + +unsigned int nft_do_chain(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + const struct nft_chain *chain = ops->priv; + const struct nft_rule *rule; + const struct nft_expr *expr, *last; + struct nft_data data[NFT_REG_MAX + 1]; + const struct nft_pktinfo pkt = { + .skb = skb, + .in = in, + .out = out, + .hooknum = ops->hooknum, + }; + unsigned int stackptr = 0; + struct { + const struct nft_chain *chain; + const struct nft_rule *rule; + } jumpstack[NFT_JUMP_STACK_SIZE]; + +do_chain: + rule = list_entry(&chain->rules, struct nft_rule, list); +next_rule: + data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; + list_for_each_entry_continue_rcu(rule, &chain->rules, list) { + nft_rule_for_each_expr(expr, last, rule) { + expr->ops->eval(expr, data, &pkt); + if (data[NFT_REG_VERDICT].verdict != NFT_CONTINUE) + break; + } + + switch (data[NFT_REG_VERDICT].verdict) { + case NFT_BREAK: + data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; + /* fall through */ + case NFT_CONTINUE: + continue; + } + break; + } + + switch (data[NFT_REG_VERDICT].verdict) { + case NF_ACCEPT: + case NF_DROP: + case NF_QUEUE: + return data[NFT_REG_VERDICT].verdict; + case NFT_JUMP: + BUG_ON(stackptr >= NFT_JUMP_STACK_SIZE); + jumpstack[stackptr].chain = chain; + jumpstack[stackptr].rule = rule; + stackptr++; + /* fall through */ + case NFT_GOTO: + chain = data[NFT_REG_VERDICT].chain; + goto do_chain; + case NFT_RETURN: + case NFT_CONTINUE: + break; + default: + WARN_ON(1); + } + + if (stackptr > 0) { + stackptr--; + chain = jumpstack[stackptr].chain; + rule = jumpstack[stackptr].rule; + goto next_rule; + } + + return NF_ACCEPT; +} +EXPORT_SYMBOL_GPL(nft_do_chain); + +int __init nf_tables_core_module_init(void) +{ + int err; + + err = nft_immediate_module_init(); + if (err < 0) + goto err1; + + err = nft_cmp_module_init(); + if (err < 0) + goto err2; + + err = nft_lookup_module_init(); + if (err < 0) + goto err3; + + err = nft_bitwise_module_init(); + if (err < 0) + goto err4; + + err = nft_byteorder_module_init(); + if (err < 0) + goto err5; + + err = nft_payload_module_init(); + if (err < 0) + goto err6; + + return 0; + +err6: + nft_byteorder_module_exit(); +err5: + nft_bitwise_module_exit(); +err4: + nft_lookup_module_exit(); +err3: + nft_cmp_module_exit(); +err2: + nft_immediate_module_exit(); +err1: + return err; +} + +void nf_tables_core_module_exit(void) +{ + nft_payload_module_exit(); + nft_byteorder_module_exit(); + nft_bitwise_module_exit(); + nft_lookup_module_exit(); + nft_cmp_module_exit(); + nft_immediate_module_exit(); +} diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c new file mode 100644 index 000000000000..0f7501506367 --- /dev/null +++ b/net/netfilter/nft_bitwise.c @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_bitwise { + enum nft_registers sreg:8; + enum nft_registers dreg:8; + u8 len; + struct nft_data mask; + struct nft_data xor; +}; + +static void nft_bitwise_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_bitwise *priv = nft_expr_priv(expr); + const struct nft_data *src = &data[priv->sreg]; + struct nft_data *dst = &data[priv->dreg]; + unsigned int i; + + for (i = 0; i < DIV_ROUND_UP(priv->len, 4); i++) { + dst->data[i] = (src->data[i] & priv->mask.data[i]) ^ + priv->xor.data[i]; + } +} + +static const struct nla_policy nft_bitwise_policy[NFTA_BITWISE_MAX + 1] = { + [NFTA_BITWISE_SREG] = { .type = NLA_U32 }, + [NFTA_BITWISE_DREG] = { .type = NLA_U32 }, + [NFTA_BITWISE_LEN] = { .type = NLA_U32 }, + [NFTA_BITWISE_MASK] = { .type = NLA_NESTED }, + [NFTA_BITWISE_XOR] = { .type = NLA_NESTED }, +}; + +static int nft_bitwise_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_bitwise *priv = nft_expr_priv(expr); + struct nft_data_desc d1, d2; + int err; + + if (tb[NFTA_BITWISE_SREG] == NULL || + tb[NFTA_BITWISE_DREG] == NULL || + tb[NFTA_BITWISE_LEN] == NULL || + tb[NFTA_BITWISE_MASK] == NULL || + tb[NFTA_BITWISE_XOR] == NULL) + return -EINVAL; + + priv->sreg = ntohl(nla_get_be32(tb[NFTA_BITWISE_SREG])); + err = nft_validate_input_register(priv->sreg); + if (err < 0) + return err; + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_BITWISE_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); + if (err < 0) + return err; + + priv->len = ntohl(nla_get_be32(tb[NFTA_BITWISE_LEN])); + + err = nft_data_init(NULL, &priv->mask, &d1, tb[NFTA_BITWISE_MASK]); + if (err < 0) + return err; + if (d1.len != priv->len) + return -EINVAL; + + err = nft_data_init(NULL, &priv->xor, &d2, tb[NFTA_BITWISE_XOR]); + if (err < 0) + return err; + if (d2.len != priv->len) + return -EINVAL; + + return 0; +} + +static int nft_bitwise_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_bitwise *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_BITWISE_SREG, htonl(priv->sreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_BITWISE_DREG, htonl(priv->dreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_BITWISE_LEN, htonl(priv->len))) + goto nla_put_failure; + + if (nft_data_dump(skb, NFTA_BITWISE_MASK, &priv->mask, + NFT_DATA_VALUE, priv->len) < 0) + goto nla_put_failure; + + if (nft_data_dump(skb, NFTA_BITWISE_XOR, &priv->xor, + NFT_DATA_VALUE, priv->len) < 0) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_bitwise_ops __read_mostly = { + .name = "bitwise", + .size = NFT_EXPR_SIZE(sizeof(struct nft_bitwise)), + .owner = THIS_MODULE, + .eval = nft_bitwise_eval, + .init = nft_bitwise_init, + .dump = nft_bitwise_dump, + .policy = nft_bitwise_policy, + .maxattr = NFTA_BITWISE_MAX, +}; + +int __init nft_bitwise_module_init(void) +{ + return nft_register_expr(&nft_bitwise_ops); +} + +void nft_bitwise_module_exit(void) +{ + nft_unregister_expr(&nft_bitwise_ops); +} diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c new file mode 100644 index 000000000000..8b0657a4d17b --- /dev/null +++ b/net/netfilter/nft_byteorder.c @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_byteorder { + enum nft_registers sreg:8; + enum nft_registers dreg:8; + enum nft_byteorder_ops op:8; + u8 len; + u8 size; +}; + +static void nft_byteorder_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_byteorder *priv = nft_expr_priv(expr); + struct nft_data *src = &data[priv->sreg], *dst = &data[priv->dreg]; + union { u32 u32; u16 u16; } *s, *d; + unsigned int i; + + s = (void *)src->data; + d = (void *)dst->data; + + switch (priv->size) { + case 4: + switch (priv->op) { + case NFT_BYTEORDER_NTOH: + for (i = 0; i < priv->len / 4; i++) + d[i].u32 = ntohl((__force __be32)s[i].u32); + break; + case NFT_BYTEORDER_HTON: + for (i = 0; i < priv->len / 4; i++) + d[i].u32 = (__force __u32)htonl(s[i].u32); + break; + } + break; + case 2: + switch (priv->op) { + case NFT_BYTEORDER_NTOH: + for (i = 0; i < priv->len / 2; i++) + d[i].u16 = ntohs((__force __be16)s[i].u16); + break; + case NFT_BYTEORDER_HTON: + for (i = 0; i < priv->len / 2; i++) + d[i].u16 = (__force __u16)htons(s[i].u16); + break; + } + break; + } +} + +static const struct nla_policy nft_byteorder_policy[NFTA_BYTEORDER_MAX + 1] = { + [NFTA_BYTEORDER_SREG] = { .type = NLA_U32 }, + [NFTA_BYTEORDER_DREG] = { .type = NLA_U32 }, + [NFTA_BYTEORDER_OP] = { .type = NLA_U32 }, + [NFTA_BYTEORDER_LEN] = { .type = NLA_U32 }, + [NFTA_BYTEORDER_SIZE] = { .type = NLA_U32 }, +}; + +static int nft_byteorder_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_byteorder *priv = nft_expr_priv(expr); + int err; + + if (tb[NFTA_BYTEORDER_SREG] == NULL || + tb[NFTA_BYTEORDER_DREG] == NULL || + tb[NFTA_BYTEORDER_LEN] == NULL || + tb[NFTA_BYTEORDER_SIZE] == NULL || + tb[NFTA_BYTEORDER_OP] == NULL) + return -EINVAL; + + priv->sreg = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_SREG])); + err = nft_validate_input_register(priv->sreg); + if (err < 0) + return err; + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); + if (err < 0) + return err; + + priv->op = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_OP])); + switch (priv->op) { + case NFT_BYTEORDER_NTOH: + case NFT_BYTEORDER_HTON: + break; + default: + return -EINVAL; + } + + priv->len = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_LEN])); + if (priv->len == 0 || priv->len > FIELD_SIZEOF(struct nft_data, data)) + return -EINVAL; + + priv->size = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_SIZE])); + switch (priv->size) { + case 2: + case 4: + break; + default: + return -EINVAL; + } + + return 0; +} + +static int nft_byteorder_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_byteorder *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_BYTEORDER_SREG, htonl(priv->sreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_BYTEORDER_DREG, htonl(priv->dreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_BYTEORDER_OP, htonl(priv->op))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_BYTEORDER_LEN, htonl(priv->len))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_BYTEORDER_SIZE, htonl(priv->size))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_byteorder_ops __read_mostly = { + .name = "byteorder", + .size = NFT_EXPR_SIZE(sizeof(struct nft_byteorder)), + .owner = THIS_MODULE, + .eval = nft_byteorder_eval, + .init = nft_byteorder_init, + .dump = nft_byteorder_dump, + .policy = nft_byteorder_policy, + .maxattr = NFTA_BYTEORDER_MAX, +}; + +int __init nft_byteorder_module_init(void) +{ + return nft_register_expr(&nft_byteorder_ops); +} + +void nft_byteorder_module_exit(void) +{ + nft_unregister_expr(&nft_byteorder_ops); +} diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c new file mode 100644 index 000000000000..e734d670120a --- /dev/null +++ b/net/netfilter/nft_cmp.c @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_cmp_expr { + struct nft_data data; + enum nft_registers sreg:8; + u8 len; + enum nft_cmp_ops op:8; +}; + +static void nft_cmp_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_cmp_expr *priv = nft_expr_priv(expr); + int d; + + d = nft_data_cmp(&data[priv->sreg], &priv->data, priv->len); + switch (priv->op) { + case NFT_CMP_EQ: + if (d != 0) + goto mismatch; + break; + case NFT_CMP_NEQ: + if (d == 0) + goto mismatch; + break; + case NFT_CMP_LT: + if (d == 0) + goto mismatch; + case NFT_CMP_LTE: + if (d > 0) + goto mismatch; + break; + case NFT_CMP_GT: + if (d == 0) + goto mismatch; + case NFT_CMP_GTE: + if (d < 0) + goto mismatch; + break; + } + return; + +mismatch: + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static const struct nla_policy nft_cmp_policy[NFTA_CMP_MAX + 1] = { + [NFTA_CMP_SREG] = { .type = NLA_U32 }, + [NFTA_CMP_OP] = { .type = NLA_U32 }, + [NFTA_CMP_DATA] = { .type = NLA_NESTED }, +}; + +static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_cmp_expr *priv = nft_expr_priv(expr); + struct nft_data_desc desc; + int err; + + if (tb[NFTA_CMP_SREG] == NULL || + tb[NFTA_CMP_OP] == NULL || + tb[NFTA_CMP_DATA] == NULL) + return -EINVAL; + + priv->sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG])); + err = nft_validate_input_register(priv->sreg); + if (err < 0) + return err; + + priv->op = ntohl(nla_get_be32(tb[NFTA_CMP_OP])); + switch (priv->op) { + case NFT_CMP_EQ: + case NFT_CMP_NEQ: + case NFT_CMP_LT: + case NFT_CMP_LTE: + case NFT_CMP_GT: + case NFT_CMP_GTE: + break; + default: + return -EINVAL; + } + + err = nft_data_init(NULL, &priv->data, &desc, tb[NFTA_CMP_DATA]); + if (err < 0) + return err; + + priv->len = desc.len; + return 0; +} + +static int nft_cmp_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_cmp_expr *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_CMP_SREG, htonl(priv->sreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_CMP_OP, htonl(priv->op))) + goto nla_put_failure; + + if (nft_data_dump(skb, NFTA_CMP_DATA, &priv->data, + NFT_DATA_VALUE, priv->len) < 0) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_cmp_ops __read_mostly = { + .name = "cmp", + .size = NFT_EXPR_SIZE(sizeof(struct nft_cmp_expr)), + .owner = THIS_MODULE, + .eval = nft_cmp_eval, + .init = nft_cmp_init, + .dump = nft_cmp_dump, + .policy = nft_cmp_policy, + .maxattr = NFTA_CMP_MAX, +}; + +int __init nft_cmp_module_init(void) +{ + return nft_register_expr(&nft_cmp_ops); +} + +void nft_cmp_module_exit(void) +{ + nft_unregister_expr(&nft_cmp_ops); +} diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c new file mode 100644 index 000000000000..33c5d36819bb --- /dev/null +++ b/net/netfilter/nft_counter.c @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_counter { + seqlock_t lock; + u64 bytes; + u64 packets; +}; + +static void nft_counter_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + struct nft_counter *priv = nft_expr_priv(expr); + + write_seqlock_bh(&priv->lock); + priv->bytes += pkt->skb->len; + priv->packets++; + write_sequnlock_bh(&priv->lock); +} + +static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + struct nft_counter *priv = nft_expr_priv(expr); + unsigned int seq; + u64 bytes; + u64 packets; + + do { + seq = read_seqbegin(&priv->lock); + bytes = priv->bytes; + packets = priv->packets; + } while (read_seqretry(&priv->lock, seq)); + + if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(bytes))) + goto nla_put_failure; + if (nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(packets))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = { + [NFTA_COUNTER_PACKETS] = { .type = NLA_U64 }, + [NFTA_COUNTER_BYTES] = { .type = NLA_U64 }, +}; + +static int nft_counter_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_counter *priv = nft_expr_priv(expr); + + if (tb[NFTA_COUNTER_PACKETS]) + priv->packets = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS])); + if (tb[NFTA_COUNTER_BYTES]) + priv->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES])); + + seqlock_init(&priv->lock); + return 0; +} + +static struct nft_expr_ops nft_counter_ops __read_mostly = { + .name = "counter", + .size = NFT_EXPR_SIZE(sizeof(struct nft_counter)), + .policy = nft_counter_policy, + .maxattr = NFTA_COUNTER_MAX, + .owner = THIS_MODULE, + .eval = nft_counter_eval, + .init = nft_counter_init, + .dump = nft_counter_dump, +}; + +static int __init nft_counter_module_init(void) +{ + return nft_register_expr(&nft_counter_ops); +} + +static void __exit nft_counter_module_exit(void) +{ + nft_unregister_expr(&nft_counter_ops); +} + +module_init(nft_counter_module_init); +module_exit(nft_counter_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_EXPR("counter"); diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c new file mode 100644 index 000000000000..a1756d678226 --- /dev/null +++ b/net/netfilter/nft_ct.c @@ -0,0 +1,252 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_ct { + enum nft_ct_keys key:8; + enum ip_conntrack_dir dir:8; + enum nft_registers dreg:8; + uint8_t family; +}; + +static void nft_ct_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_ct *priv = nft_expr_priv(expr); + struct nft_data *dest = &data[priv->dreg]; + enum ip_conntrack_info ctinfo; + const struct nf_conn *ct; + const struct nf_conn_help *help; + const struct nf_conntrack_tuple *tuple; + const struct nf_conntrack_helper *helper; + long diff; + unsigned int state; + + ct = nf_ct_get(pkt->skb, &ctinfo); + + switch (priv->key) { + case NFT_CT_STATE: + if (ct == NULL) + state = NF_CT_STATE_INVALID_BIT; + else if (nf_ct_is_untracked(ct)) + state = NF_CT_STATE_UNTRACKED_BIT; + else + state = NF_CT_STATE_BIT(ctinfo); + dest->data[0] = state; + return; + } + + if (ct == NULL) + goto err; + + switch (priv->key) { + case NFT_CT_DIRECTION: + dest->data[0] = CTINFO2DIR(ctinfo); + return; + case NFT_CT_STATUS: + dest->data[0] = ct->status; + return; +#ifdef CONFIG_NF_CONNTRACK_MARK + case NFT_CT_MARK: + dest->data[0] = ct->mark; + return; +#endif +#ifdef CONFIG_NF_CONNTRACK_SECMARK + case NFT_CT_SECMARK: + dest->data[0] = ct->secmark; + return; +#endif + case NFT_CT_EXPIRATION: + diff = (long)jiffies - (long)ct->timeout.expires; + if (diff < 0) + diff = 0; + dest->data[0] = jiffies_to_msecs(diff); + return; + case NFT_CT_HELPER: + if (ct->master == NULL) + goto err; + help = nfct_help(ct->master); + if (help == NULL) + goto err; + helper = rcu_dereference(help->helper); + if (helper == NULL) + goto err; + if (strlen(helper->name) >= sizeof(dest->data)) + goto err; + strncpy((char *)dest->data, helper->name, sizeof(dest->data)); + return; + } + + tuple = &ct->tuplehash[priv->dir].tuple; + switch (priv->key) { + case NFT_CT_L3PROTOCOL: + dest->data[0] = nf_ct_l3num(ct); + return; + case NFT_CT_SRC: + memcpy(dest->data, tuple->src.u3.all, + nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16); + return; + case NFT_CT_DST: + memcpy(dest->data, tuple->dst.u3.all, + nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16); + return; + case NFT_CT_PROTOCOL: + dest->data[0] = nf_ct_protonum(ct); + return; + case NFT_CT_PROTO_SRC: + dest->data[0] = (__force __u16)tuple->src.u.all; + return; + case NFT_CT_PROTO_DST: + dest->data[0] = (__force __u16)tuple->dst.u.all; + return; + } + return; +err: + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static const struct nla_policy nft_ct_policy[NFTA_CT_MAX + 1] = { + [NFTA_CT_DREG] = { .type = NLA_U32 }, + [NFTA_CT_KEY] = { .type = NLA_U32 }, + [NFTA_CT_DIRECTION] = { .type = NLA_U8 }, +}; + +static int nft_ct_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_ct *priv = nft_expr_priv(expr); + int err; + + if (tb[NFTA_CT_DREG] == NULL || + tb[NFTA_CT_KEY] == NULL) + return -EINVAL; + + priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY])); + if (tb[NFTA_CT_DIRECTION] != NULL) { + priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]); + switch (priv->dir) { + case IP_CT_DIR_ORIGINAL: + case IP_CT_DIR_REPLY: + break; + default: + return -EINVAL; + } + } + + switch (priv->key) { + case NFT_CT_STATE: + case NFT_CT_DIRECTION: + case NFT_CT_STATUS: +#ifdef CONFIG_NF_CONNTRACK_MARK + case NFT_CT_MARK: +#endif +#ifdef CONFIG_NF_CONNTRACK_SECMARK + case NFT_CT_SECMARK: +#endif + case NFT_CT_EXPIRATION: + case NFT_CT_HELPER: + if (tb[NFTA_CT_DIRECTION] != NULL) + return -EINVAL; + break; + case NFT_CT_PROTOCOL: + case NFT_CT_SRC: + case NFT_CT_DST: + case NFT_CT_PROTO_SRC: + case NFT_CT_PROTO_DST: + if (tb[NFTA_CT_DIRECTION] == NULL) + return -EINVAL; + break; + default: + return -EOPNOTSUPP; + } + + err = nf_ct_l3proto_try_module_get(ctx->afi->family); + if (err < 0) + return err; + priv->family = ctx->afi->family; + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_CT_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + goto err1; + + err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); + if (err < 0) + goto err1; + return 0; + +err1: + nf_ct_l3proto_module_put(ctx->afi->family); + return err; +} + +static void nft_ct_destroy(const struct nft_expr *expr) +{ + struct nft_ct *priv = nft_expr_priv(expr); + + nf_ct_l3proto_module_put(priv->family); +} + +static int nft_ct_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_ct *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_CT_DREG, htonl(priv->dreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key))) + goto nla_put_failure; + if (nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir)) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_ct_ops __read_mostly = { + .name = "ct", + .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)), + .owner = THIS_MODULE, + .eval = nft_ct_eval, + .init = nft_ct_init, + .destroy = nft_ct_destroy, + .dump = nft_ct_dump, + .policy = nft_ct_policy, + .maxattr = NFTA_CT_MAX, +}; + +static int __init nft_ct_module_init(void) +{ + return nft_register_expr(&nft_ct_ops); +} + +static void __exit nft_ct_module_exit(void) +{ + nft_unregister_expr(&nft_ct_ops); +} + +module_init(nft_ct_module_init); +module_exit(nft_ct_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_EXPR("ct"); diff --git a/net/netfilter/nft_expr_template.c b/net/netfilter/nft_expr_template.c new file mode 100644 index 000000000000..9fc8eb308193 --- /dev/null +++ b/net/netfilter/nft_expr_template.c @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include + +struct nft_template { + +}; + +static void nft_template_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + struct nft_template *priv = nft_expr_priv(expr); + +} + +static const struct nla_policy nft_template_policy[NFTA_TEMPLATE_MAX + 1] = { + [NFTA_TEMPLATE_ATTR] = { .type = NLA_U32 }, +}; + +static int nft_template_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr *tb[]) +{ + struct nft_template *priv = nft_expr_priv(expr); + + return 0; +} + +static void nft_template_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + struct nft_template *priv = nft_expr_priv(expr); + +} + +static int nft_template_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_template *priv = nft_expr_priv(expr); + + NLA_PUT_BE32(skb, NFTA_TEMPLATE_ATTR, priv->field); + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops template_ops __read_mostly = { + .name = "template", + .size = NFT_EXPR_SIZE(sizeof(struct nft_template)), + .owner = THIS_MODULE, + .eval = nft_template_eval, + .init = nft_template_init, + .destroy = nft_template_destroy, + .dump = nft_template_dump, + .policy = nft_template_policy, + .maxattr = NFTA_TEMPLATE_MAX, +}; + +static int __init nft_template_module_init(void) +{ + return nft_register_expr(&template_ops); +} + +static void __exit nft_template_module_exit(void) +{ + nft_unregister_expr(&template_ops); +} + +module_init(nft_template_module_init); +module_exit(nft_template_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_EXPR("template"); diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c new file mode 100644 index 000000000000..21c6a6b7b662 --- /dev/null +++ b/net/netfilter/nft_exthdr.c @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +// FIXME: +#include + +struct nft_exthdr { + u8 type; + u8 offset; + u8 len; + enum nft_registers dreg:8; +}; + +static void nft_exthdr_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + struct nft_exthdr *priv = nft_expr_priv(expr); + struct nft_data *dest = &data[priv->dreg]; + unsigned int offset; + int err; + + err = ipv6_find_hdr(pkt->skb, &offset, priv->type, NULL, NULL); + if (err < 0) + goto err; + offset += priv->offset; + + if (skb_copy_bits(pkt->skb, offset, dest->data, priv->len) < 0) + goto err; + return; +err: + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = { + [NFTA_EXTHDR_DREG] = { .type = NLA_U32 }, + [NFTA_EXTHDR_TYPE] = { .type = NLA_U8 }, + [NFTA_EXTHDR_OFFSET] = { .type = NLA_U32 }, + [NFTA_EXTHDR_LEN] = { .type = NLA_U32 }, +}; + +static int nft_exthdr_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_exthdr *priv = nft_expr_priv(expr); + int err; + + if (tb[NFTA_EXTHDR_DREG] == NULL || + tb[NFTA_EXTHDR_TYPE] == NULL || + tb[NFTA_EXTHDR_OFFSET] == NULL || + tb[NFTA_EXTHDR_LEN] == NULL) + return -EINVAL; + + priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]); + priv->offset = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OFFSET])); + priv->len = ntohl(nla_get_be32(tb[NFTA_EXTHDR_LEN])); + if (priv->len == 0 || + priv->len > FIELD_SIZEOF(struct nft_data, data)) + return -EINVAL; + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_EXTHDR_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); +} + +static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_exthdr *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_EXTHDR_DREG, htonl(priv->dreg))) + goto nla_put_failure; + if (nla_put_u8(skb, NFTA_EXTHDR_TYPE, priv->type)) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_EXTHDR_OFFSET, htonl(priv->offset))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_EXTHDR_LEN, htonl(priv->len))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops exthdr_ops __read_mostly = { + .name = "exthdr", + .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)), + .owner = THIS_MODULE, + .eval = nft_exthdr_eval, + .init = nft_exthdr_init, + .dump = nft_exthdr_dump, + .policy = nft_exthdr_policy, + .maxattr = NFTA_EXTHDR_MAX, +}; + +static int __init nft_exthdr_module_init(void) +{ + return nft_register_expr(&exthdr_ops); +} + +static void __exit nft_exthdr_module_exit(void) +{ + nft_unregister_expr(&exthdr_ops); +} + +module_init(nft_exthdr_module_init); +module_exit(nft_exthdr_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_EXPR("exthdr"); diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c new file mode 100644 index 000000000000..67cc502881f1 --- /dev/null +++ b/net/netfilter/nft_hash.c @@ -0,0 +1,348 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_hash { + struct hlist_head *hash; + unsigned int hsize; + enum nft_registers sreg:8; + enum nft_registers dreg:8; + u8 klen; + u8 dlen; + u16 flags; +}; + +struct nft_hash_elem { + struct hlist_node hnode; + struct nft_data key; + struct nft_data data[]; +}; + +static u32 nft_hash_rnd __read_mostly; +static bool nft_hash_rnd_initted __read_mostly; + +static unsigned int nft_hash_data(const struct nft_data *data, + unsigned int hsize, unsigned int len) +{ + unsigned int h; + + // FIXME: can we reasonably guarantee the upper bits are fixed? + h = jhash2(data->data, len >> 2, nft_hash_rnd); + return ((u64)h * hsize) >> 32; +} + +static void nft_hash_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_hash *priv = nft_expr_priv(expr); + const struct nft_hash_elem *elem; + const struct nft_data *key = &data[priv->sreg]; + unsigned int h; + + h = nft_hash_data(key, priv->hsize, priv->klen); + hlist_for_each_entry(elem, &priv->hash[h], hnode) { + if (nft_data_cmp(&elem->key, key, priv->klen)) + continue; + if (priv->flags & NFT_HASH_MAP) + nft_data_copy(&data[priv->dreg], elem->data); + return; + } + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static void nft_hash_elem_destroy(const struct nft_expr *expr, + struct nft_hash_elem *elem) +{ + const struct nft_hash *priv = nft_expr_priv(expr); + + nft_data_uninit(&elem->key, NFT_DATA_VALUE); + if (priv->flags & NFT_HASH_MAP) + nft_data_uninit(elem->data, nft_dreg_to_type(priv->dreg)); + kfree(elem); +} + +static const struct nla_policy nft_he_policy[NFTA_HE_MAX + 1] = { + [NFTA_HE_KEY] = { .type = NLA_NESTED }, + [NFTA_HE_DATA] = { .type = NLA_NESTED }, +}; + +static int nft_hash_elem_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr *nla, + struct nft_hash_elem **new) +{ + struct nft_hash *priv = nft_expr_priv(expr); + struct nlattr *tb[NFTA_HE_MAX + 1]; + struct nft_hash_elem *elem; + struct nft_data_desc d1, d2; + unsigned int size; + int err; + + err = nla_parse_nested(tb, NFTA_HE_MAX, nla, nft_he_policy); + if (err < 0) + return err; + + if (tb[NFTA_HE_KEY] == NULL) + return -EINVAL; + size = sizeof(*elem); + + if (priv->flags & NFT_HASH_MAP) { + if (tb[NFTA_HE_DATA] == NULL) + return -EINVAL; + size += sizeof(elem->data[0]); + } else { + if (tb[NFTA_HE_DATA] != NULL) + return -EINVAL; + } + + elem = kzalloc(size, GFP_KERNEL); + if (elem == NULL) + return -ENOMEM; + + err = nft_data_init(ctx, &elem->key, &d1, tb[NFTA_HE_KEY]); + if (err < 0) + goto err1; + err = -EINVAL; + if (d1.type != NFT_DATA_VALUE || d1.len != priv->klen) + goto err2; + + if (tb[NFTA_HE_DATA] != NULL) { + err = nft_data_init(ctx, elem->data, &d2, tb[NFTA_HE_DATA]); + if (err < 0) + goto err2; + err = nft_validate_data_load(ctx, priv->dreg, elem->data, d2.type); + if (err < 0) + goto err3; + } + + *new = elem; + return 0; + +err3: + nft_data_uninit(elem->data, d2.type); +err2: + nft_data_uninit(&elem->key, d1.type); +err1: + kfree(elem); + return err; +} + +static int nft_hash_elem_dump(struct sk_buff *skb, const struct nft_expr *expr, + const struct nft_hash_elem *elem) + +{ + const struct nft_hash *priv = nft_expr_priv(expr); + struct nlattr *nest; + + nest = nla_nest_start(skb, NFTA_LIST_ELEM); + if (nest == NULL) + goto nla_put_failure; + + if (nft_data_dump(skb, NFTA_HE_KEY, &elem->key, + NFT_DATA_VALUE, priv->klen) < 0) + goto nla_put_failure; + + if (priv->flags & NFT_HASH_MAP) { + if (nft_data_dump(skb, NFTA_HE_DATA, elem->data, + NFT_DATA_VALUE, priv->dlen) < 0) + goto nla_put_failure; + } + + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + return -1; +} + +static void nft_hash_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + const struct nft_hash *priv = nft_expr_priv(expr); + const struct hlist_node *next; + struct nft_hash_elem *elem; + unsigned int i; + + for (i = 0; i < priv->hsize; i++) { + hlist_for_each_entry_safe(elem, next, &priv->hash[i], hnode) { + hlist_del(&elem->hnode); + nft_hash_elem_destroy(expr, elem); + } + } + kfree(priv->hash); +} + +static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = { + [NFTA_HASH_FLAGS] = { .type = NLA_U32 }, + [NFTA_HASH_SREG] = { .type = NLA_U32 }, + [NFTA_HASH_DREG] = { .type = NLA_U32 }, + [NFTA_HASH_KLEN] = { .type = NLA_U32 }, + [NFTA_HASH_ELEMENTS] = { .type = NLA_NESTED }, +}; + +static int nft_hash_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_hash *priv = nft_expr_priv(expr); + struct nft_hash_elem *elem, *uninitialized_var(new); + const struct nlattr *nla; + unsigned int cnt, i; + unsigned int h; + int err, rem; + + if (unlikely(!nft_hash_rnd_initted)) { + get_random_bytes(&nft_hash_rnd, 4); + nft_hash_rnd_initted = true; + } + + if (tb[NFTA_HASH_SREG] == NULL || + tb[NFTA_HASH_KLEN] == NULL || + tb[NFTA_HASH_ELEMENTS] == NULL) + return -EINVAL; + + if (tb[NFTA_HASH_FLAGS] != NULL) { + priv->flags = ntohl(nla_get_be32(tb[NFTA_HASH_FLAGS])); + if (priv->flags & ~NFT_HASH_MAP) + return -EINVAL; + } + + priv->sreg = ntohl(nla_get_be32(tb[NFTA_HASH_SREG])); + err = nft_validate_input_register(priv->sreg); + if (err < 0) + return err; + + if (tb[NFTA_HASH_DREG] != NULL) { + if (!(priv->flags & NFT_HASH_MAP)) + return -EINVAL; + priv->dreg = ntohl(nla_get_be32(tb[NFTA_HASH_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + } + + priv->klen = ntohl(nla_get_be32(tb[NFTA_HASH_KLEN])); + if (priv->klen == 0) + return -EINVAL; + + cnt = 0; + nla_for_each_nested(nla, tb[NFTA_HASH_ELEMENTS], rem) { + if (nla_type(nla) != NFTA_LIST_ELEM) + return -EINVAL; + cnt++; + } + + /* Aim for a load factor of 0.75 */ + cnt = cnt * 4 / 3; + + priv->hash = kcalloc(cnt, sizeof(struct hlist_head), GFP_KERNEL); + if (priv->hash == NULL) + return -ENOMEM; + priv->hsize = cnt; + + for (i = 0; i < cnt; i++) + INIT_HLIST_HEAD(&priv->hash[i]); + + err = -ENOMEM; + nla_for_each_nested(nla, tb[NFTA_HASH_ELEMENTS], rem) { + err = nft_hash_elem_init(ctx, expr, nla, &new); + if (err < 0) + goto err1; + + h = nft_hash_data(&new->key, priv->hsize, priv->klen); + hlist_for_each_entry(elem, &priv->hash[h], hnode) { + if (nft_data_cmp(&elem->key, &new->key, priv->klen)) + continue; + nft_hash_elem_destroy(expr, new); + err = -EEXIST; + goto err1; + } + hlist_add_head(&new->hnode, &priv->hash[h]); + } + return 0; + +err1: + nft_hash_destroy(ctx, expr); + return err; +} + +static int nft_hash_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_hash *priv = nft_expr_priv(expr); + const struct nft_hash_elem *elem; + struct nlattr *list; + unsigned int i; + + if (priv->flags) + if (nla_put_be32(skb, NFTA_HASH_FLAGS, htonl(priv->flags))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_HASH_SREG, htonl(priv->sreg))) + goto nla_put_failure; + if (priv->flags & NFT_HASH_MAP) + if (nla_put_be32(skb, NFTA_HASH_DREG, htonl(priv->dreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_HASH_KLEN, htonl(priv->klen))) + goto nla_put_failure; + + list = nla_nest_start(skb, NFTA_HASH_ELEMENTS); + if (list == NULL) + goto nla_put_failure; + + for (i = 0; i < priv->hsize; i++) { + hlist_for_each_entry(elem, &priv->hash[i], hnode) { + if (nft_hash_elem_dump(skb, expr, elem) < 0) + goto nla_put_failure; + } + } + + nla_nest_end(skb, list); + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_hash_ops __read_mostly = { + .name = "hash", + .size = NFT_EXPR_SIZE(sizeof(struct nft_hash)), + .owner = THIS_MODULE, + .eval = nft_hash_eval, + .init = nft_hash_init, + .destroy = nft_hash_destroy, + .dump = nft_hash_dump, + .policy = nft_hash_policy, + .maxattr = NFTA_HASH_MAX, +}; + +static int __init nft_hash_module_init(void) +{ + return nft_register_expr(&nft_hash_ops); +} + +static void __exit nft_hash_module_exit(void) +{ + nft_unregister_expr(&nft_hash_ops); +} + +module_init(nft_hash_module_init); +module_exit(nft_hash_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_EXPR("hash"); diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c new file mode 100644 index 000000000000..3bf42c3cc49a --- /dev/null +++ b/net/netfilter/nft_immediate.c @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_immediate_expr { + struct nft_data data; + enum nft_registers dreg:8; + u8 dlen; +}; + +static void nft_immediate_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_immediate_expr *priv = nft_expr_priv(expr); + + nft_data_copy(&data[priv->dreg], &priv->data); +} + +static const struct nla_policy nft_immediate_policy[NFTA_IMMEDIATE_MAX + 1] = { + [NFTA_IMMEDIATE_DREG] = { .type = NLA_U32 }, + [NFTA_IMMEDIATE_DATA] = { .type = NLA_NESTED }, +}; + +static int nft_immediate_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_immediate_expr *priv = nft_expr_priv(expr); + struct nft_data_desc desc; + int err; + + if (tb[NFTA_IMMEDIATE_DREG] == NULL || + tb[NFTA_IMMEDIATE_DATA] == NULL) + return -EINVAL; + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_IMMEDIATE_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + + err = nft_data_init(ctx, &priv->data, &desc, tb[NFTA_IMMEDIATE_DATA]); + if (err < 0) + return err; + priv->dlen = desc.len; + + err = nft_validate_data_load(ctx, priv->dreg, &priv->data, desc.type); + if (err < 0) + goto err1; + + return 0; + +err1: + nft_data_uninit(&priv->data, desc.type); + return err; +} + +static void nft_immediate_destroy(const struct nft_expr *expr) +{ + const struct nft_immediate_expr *priv = nft_expr_priv(expr); + return nft_data_uninit(&priv->data, nft_dreg_to_type(priv->dreg)); +} + +static int nft_immediate_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_immediate_expr *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_IMMEDIATE_DREG, htonl(priv->dreg))) + goto nla_put_failure; + + return nft_data_dump(skb, NFTA_IMMEDIATE_DATA, &priv->data, + nft_dreg_to_type(priv->dreg), priv->dlen); + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_imm_ops __read_mostly = { + .name = "immediate", + .size = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)), + .owner = THIS_MODULE, + .eval = nft_immediate_eval, + .init = nft_immediate_init, + .destroy = nft_immediate_destroy, + .dump = nft_immediate_dump, + .policy = nft_immediate_policy, + .maxattr = NFTA_IMMEDIATE_MAX, +}; + +int __init nft_immediate_module_init(void) +{ + return nft_register_expr(&nft_imm_ops); +} + +void nft_immediate_module_exit(void) +{ + nft_unregister_expr(&nft_imm_ops); +} diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c new file mode 100644 index 000000000000..e0e3fc8aebc3 --- /dev/null +++ b/net/netfilter/nft_limit.c @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +static DEFINE_SPINLOCK(limit_lock); + +struct nft_limit { + u64 tokens; + u64 rate; + u64 unit; + unsigned long stamp; +}; + +static void nft_limit_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + struct nft_limit *priv = nft_expr_priv(expr); + + spin_lock_bh(&limit_lock); + if (time_after_eq(jiffies, priv->stamp)) { + priv->tokens = priv->rate; + priv->stamp = jiffies + priv->unit * HZ; + } + + if (priv->tokens >= 1) { + priv->tokens--; + spin_unlock_bh(&limit_lock); + return; + } + spin_unlock_bh(&limit_lock); + + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static const struct nla_policy nft_limit_policy[NFTA_LIMIT_MAX + 1] = { + [NFTA_LIMIT_RATE] = { .type = NLA_U64 }, + [NFTA_LIMIT_UNIT] = { .type = NLA_U64 }, +}; + +static int nft_limit_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_limit *priv = nft_expr_priv(expr); + + if (tb[NFTA_LIMIT_RATE] == NULL || + tb[NFTA_LIMIT_UNIT] == NULL) + return -EINVAL; + + priv->rate = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_RATE])); + priv->unit = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_UNIT])); + priv->stamp = jiffies + priv->unit * HZ; + priv->tokens = priv->rate; + return 0; +} + +static int nft_limit_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_limit *priv = nft_expr_priv(expr); + + if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(priv->rate))) + goto nla_put_failure; + if (nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(priv->unit))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_limit_ops __read_mostly = { + .name = "limit", + .size = NFT_EXPR_SIZE(sizeof(struct nft_limit)), + .owner = THIS_MODULE, + .eval = nft_limit_eval, + .init = nft_limit_init, + .dump = nft_limit_dump, + .policy = nft_limit_policy, + .maxattr = NFTA_LIMIT_MAX, +}; + +static int __init nft_limit_module_init(void) +{ + return nft_register_expr(&nft_limit_ops); +} + +static void __exit nft_limit_module_exit(void) +{ + nft_unregister_expr(&nft_limit_ops); +} + +module_init(nft_limit_module_init); +module_exit(nft_limit_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_EXPR("limit"); diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c new file mode 100644 index 000000000000..da495c3b1e7e --- /dev/null +++ b/net/netfilter/nft_log.c @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static const char *nft_log_null_prefix = ""; + +struct nft_log { + struct nf_loginfo loginfo; + char *prefix; + int family; +}; + +static void nft_log_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_log *priv = nft_expr_priv(expr); + struct net *net = dev_net(pkt->in ? pkt->in : pkt->out); + + nf_log_packet(net, priv->family, pkt->hooknum, pkt->skb, pkt->in, + pkt->out, &priv->loginfo, "%s", priv->prefix); +} + +static const struct nla_policy nft_log_policy[NFTA_LOG_MAX + 1] = { + [NFTA_LOG_GROUP] = { .type = NLA_U16 }, + [NFTA_LOG_PREFIX] = { .type = NLA_STRING }, + [NFTA_LOG_SNAPLEN] = { .type = NLA_U32 }, + [NFTA_LOG_QTHRESHOLD] = { .type = NLA_U16 }, +}; + +static int nft_log_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_log *priv = nft_expr_priv(expr); + struct nf_loginfo *li = &priv->loginfo; + const struct nlattr *nla; + + priv->family = ctx->afi->family; + + nla = tb[NFTA_LOG_PREFIX]; + if (nla != NULL) { + priv->prefix = kmalloc(nla_len(nla) + 1, GFP_KERNEL); + if (priv->prefix == NULL) + return -ENOMEM; + nla_strlcpy(priv->prefix, nla, nla_len(nla) + 1); + } else + priv->prefix = (char *)nft_log_null_prefix; + + li->type = NF_LOG_TYPE_ULOG; + if (tb[NFTA_LOG_GROUP] != NULL) + li->u.ulog.group = ntohs(nla_get_be16(tb[NFTA_LOG_GROUP])); + + if (tb[NFTA_LOG_SNAPLEN] != NULL) + li->u.ulog.copy_len = ntohl(nla_get_be32(tb[NFTA_LOG_SNAPLEN])); + if (tb[NFTA_LOG_QTHRESHOLD] != NULL) { + li->u.ulog.qthreshold = + ntohs(nla_get_be16(tb[NFTA_LOG_QTHRESHOLD])); + } + + return 0; +} + +static void nft_log_destroy(const struct nft_expr *expr) +{ + struct nft_log *priv = nft_expr_priv(expr); + + if (priv->prefix != nft_log_null_prefix) + kfree(priv->prefix); +} + +static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_log *priv = nft_expr_priv(expr); + const struct nf_loginfo *li = &priv->loginfo; + + if (priv->prefix != nft_log_null_prefix) + if (nla_put_string(skb, NFTA_LOG_PREFIX, priv->prefix)) + goto nla_put_failure; + if (li->u.ulog.group) + if (nla_put_be16(skb, NFTA_LOG_GROUP, htons(li->u.ulog.group))) + goto nla_put_failure; + if (li->u.ulog.copy_len) + if (nla_put_be32(skb, NFTA_LOG_SNAPLEN, + htonl(li->u.ulog.copy_len))) + goto nla_put_failure; + if (li->u.ulog.qthreshold) + if (nla_put_be16(skb, NFTA_LOG_QTHRESHOLD, + htons(li->u.ulog.qthreshold))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_log_ops __read_mostly = { + .name = "log", + .size = NFT_EXPR_SIZE(sizeof(struct nft_log)), + .owner = THIS_MODULE, + .eval = nft_log_eval, + .init = nft_log_init, + .destroy = nft_log_destroy, + .dump = nft_log_dump, + .policy = nft_log_policy, + .maxattr = NFTA_LOG_MAX, +}; + +static int __init nft_log_module_init(void) +{ + return nft_register_expr(&nft_log_ops); +} + +static void __exit nft_log_module_exit(void) +{ + nft_unregister_expr(&nft_log_ops); +} + +module_init(nft_log_module_init); +module_exit(nft_log_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_EXPR("log"); diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c new file mode 100644 index 000000000000..96735aa2f039 --- /dev/null +++ b/net/netfilter/nft_meta.c @@ -0,0 +1,222 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include /* for TCP_TIME_WAIT */ +#include + +struct nft_meta { + enum nft_meta_keys key:8; + enum nft_registers dreg:8; +}; + +static void nft_meta_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_meta *priv = nft_expr_priv(expr); + const struct sk_buff *skb = pkt->skb; + const struct net_device *in = pkt->in, *out = pkt->out; + struct nft_data *dest = &data[priv->dreg]; + + switch (priv->key) { + case NFT_META_LEN: + dest->data[0] = skb->len; + break; + case NFT_META_PROTOCOL: + *(__be16 *)dest->data = skb->protocol; + break; + case NFT_META_PRIORITY: + dest->data[0] = skb->priority; + break; + case NFT_META_MARK: + dest->data[0] = skb->mark; + break; + case NFT_META_IIF: + if (in == NULL) + goto err; + dest->data[0] = in->ifindex; + break; + case NFT_META_OIF: + if (out == NULL) + goto err; + dest->data[0] = out->ifindex; + break; + case NFT_META_IIFNAME: + if (in == NULL) + goto err; + strncpy((char *)dest->data, in->name, sizeof(dest->data)); + break; + case NFT_META_OIFNAME: + if (out == NULL) + goto err; + strncpy((char *)dest->data, out->name, sizeof(dest->data)); + break; + case NFT_META_IIFTYPE: + if (in == NULL) + goto err; + *(u16 *)dest->data = in->type; + break; + case NFT_META_OIFTYPE: + if (out == NULL) + goto err; + *(u16 *)dest->data = out->type; + break; + case NFT_META_SKUID: + if (skb->sk == NULL || skb->sk->sk_state == TCP_TIME_WAIT) + goto err; + + read_lock_bh(&skb->sk->sk_callback_lock); + if (skb->sk->sk_socket == NULL || + skb->sk->sk_socket->file == NULL) { + read_unlock_bh(&skb->sk->sk_callback_lock); + goto err; + } + + dest->data[0] = + from_kuid_munged(&init_user_ns, + skb->sk->sk_socket->file->f_cred->fsuid); + read_unlock_bh(&skb->sk->sk_callback_lock); + break; + case NFT_META_SKGID: + if (skb->sk == NULL || skb->sk->sk_state == TCP_TIME_WAIT) + goto err; + + read_lock_bh(&skb->sk->sk_callback_lock); + if (skb->sk->sk_socket == NULL || + skb->sk->sk_socket->file == NULL) { + read_unlock_bh(&skb->sk->sk_callback_lock); + goto err; + } + dest->data[0] = + from_kgid_munged(&init_user_ns, + skb->sk->sk_socket->file->f_cred->fsgid); + read_unlock_bh(&skb->sk->sk_callback_lock); + break; +#ifdef CONFIG_NET_CLS_ROUTE + case NFT_META_RTCLASSID: { + const struct dst_entry *dst = skb_dst(skb); + + if (dst == NULL) + goto err; + dest->data[0] = dst->tclassid; + break; + } +#endif +#ifdef CONFIG_NETWORK_SECMARK + case NFT_META_SECMARK: + dest->data[0] = skb->secmark; + break; +#endif + default: + WARN_ON(1); + goto err; + } + return; + +err: + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = { + [NFTA_META_DREG] = { .type = NLA_U32 }, + [NFTA_META_KEY] = { .type = NLA_U32 }, +}; + +static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_meta *priv = nft_expr_priv(expr); + int err; + + if (tb[NFTA_META_DREG] == NULL || + tb[NFTA_META_KEY] == NULL) + return -EINVAL; + + priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); + switch (priv->key) { + case NFT_META_LEN: + case NFT_META_PROTOCOL: + case NFT_META_PRIORITY: + case NFT_META_MARK: + case NFT_META_IIF: + case NFT_META_OIF: + case NFT_META_IIFNAME: + case NFT_META_OIFNAME: + case NFT_META_IIFTYPE: + case NFT_META_OIFTYPE: + case NFT_META_SKUID: + case NFT_META_SKGID: +#ifdef CONFIG_NET_CLS_ROUTE + case NFT_META_RTCLASSID: +#endif +#ifdef CONFIG_NETWORK_SECMARK + case NFT_META_SECMARK: +#endif + break; + default: + return -EOPNOTSUPP; + } + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); +} + +static int nft_meta_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_meta *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_META_DREG, htonl(priv->dreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_META_KEY, htonl(priv->key))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_meta_ops __read_mostly = { + .name = "meta", + .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), + .owner = THIS_MODULE, + .eval = nft_meta_eval, + .init = nft_meta_init, + .dump = nft_meta_dump, + .policy = nft_meta_policy, + .maxattr = NFTA_META_MAX, +}; + +static int __init nft_meta_module_init(void) +{ + return nft_register_expr(&nft_meta_ops); +} + +static void __exit nft_meta_module_exit(void) +{ + nft_unregister_expr(&nft_meta_ops); +} + +module_init(nft_meta_module_init); +module_exit(nft_meta_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_EXPR("meta"); diff --git a/net/netfilter/nft_meta_target.c b/net/netfilter/nft_meta_target.c new file mode 100644 index 000000000000..71177df75ffb --- /dev/null +++ b/net/netfilter/nft_meta_target.c @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_meta { + enum nft_meta_keys key; +}; + +static void nft_meta_eval(const struct nft_expr *expr, + struct nft_data *nfres, + struct nft_data *data, + const struct nft_pktinfo *pkt) +{ + const struct nft_meta *meta = nft_expr_priv(expr); + struct sk_buff *skb = pkt->skb; + u32 val = data->data[0]; + + switch (meta->key) { + case NFT_META_MARK: + skb->mark = val; + break; + case NFT_META_PRIORITY: + skb->priority = val; + break; + case NFT_META_NFTRACE: + skb->nf_trace = val; + break; +#ifdef CONFIG_NETWORK_SECMARK + case NFT_META_SECMARK: + skb->secmark = val; + break; +#endif + default: + WARN_ON(1); + } +} + +static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = { + [NFTA_META_KEY] = { .type = NLA_U32 }, +}; + +static int nft_meta_init(const struct nft_expr *expr, struct nlattr *tb[]) +{ + struct nft_meta *meta = nft_expr_priv(expr); + + if (tb[NFTA_META_KEY] == NULL) + return -EINVAL; + + meta->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); + switch (meta->key) { + case NFT_META_MARK: + case NFT_META_PRIORITY: + case NFT_META_NFTRACE: +#ifdef CONFIG_NETWORK_SECMARK + case NFT_META_SECMARK: +#endif + break; + default: + return -EINVAL; + } + + return 0; +} + +static int nft_meta_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + struct nft_meta *meta = nft_expr_priv(expr); + + NLA_PUT_BE32(skb, NFTA_META_KEY, htonl(meta->key)); + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops meta_target __read_mostly = { + .name = "meta", + .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), + .owner = THIS_MODULE, + .eval = nft_meta_eval, + .init = nft_meta_init, + .dump = nft_meta_dump, + .policy = nft_meta_policy, + .maxattr = NFTA_META_MAX, +}; + +static int __init nft_meta_target_init(void) +{ + return nft_register_expr(&meta_target); +} + +static void __exit nft_meta_target_exit(void) +{ + nft_unregister_expr(&meta_target); +} + +module_init(nft_meta_target_init); +module_exit(nft_meta_target_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_EXPR("meta"); diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c new file mode 100644 index 000000000000..329f134b3f89 --- /dev/null +++ b/net/netfilter/nft_payload.c @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_payload { + enum nft_payload_bases base:8; + u8 offset; + u8 len; + enum nft_registers dreg:8; +}; + +static void nft_payload_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_payload *priv = nft_expr_priv(expr); + const struct sk_buff *skb = pkt->skb; + struct nft_data *dest = &data[priv->dreg]; + int offset; + + switch (priv->base) { + case NFT_PAYLOAD_LL_HEADER: + if (!skb_mac_header_was_set(skb)) + goto err; + offset = skb_mac_header(skb) - skb->data; + break; + case NFT_PAYLOAD_NETWORK_HEADER: + offset = skb_network_offset(skb); + break; + case NFT_PAYLOAD_TRANSPORT_HEADER: + offset = skb_transport_offset(skb); + break; + default: + BUG(); + } + offset += priv->offset; + + if (skb_copy_bits(skb, offset, dest->data, priv->len) < 0) + goto err; + return; +err: + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = { + [NFTA_PAYLOAD_DREG] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_BASE] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_OFFSET] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_LEN] = { .type = NLA_U32 }, +}; + +static int nft_payload_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_payload *priv = nft_expr_priv(expr); + int err; + + if (tb[NFTA_PAYLOAD_DREG] == NULL || + tb[NFTA_PAYLOAD_BASE] == NULL || + tb[NFTA_PAYLOAD_OFFSET] == NULL || + tb[NFTA_PAYLOAD_LEN] == NULL) + return -EINVAL; + + priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE])); + switch (priv->base) { + case NFT_PAYLOAD_LL_HEADER: + case NFT_PAYLOAD_NETWORK_HEADER: + case NFT_PAYLOAD_TRANSPORT_HEADER: + break; + default: + return -EOPNOTSUPP; + } + + priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET])); + priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); + if (priv->len == 0 || + priv->len > FIELD_SIZEOF(struct nft_data, data)) + return -EINVAL; + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); +} + +static int nft_payload_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_payload *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_PAYLOAD_DREG, htonl(priv->dreg)) || + nla_put_be32(skb, NFTA_PAYLOAD_BASE, htonl(priv->base)) || + nla_put_be32(skb, NFTA_PAYLOAD_OFFSET, htonl(priv->offset)) || + nla_put_be32(skb, NFTA_PAYLOAD_LEN, htonl(priv->len))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_payload_ops __read_mostly = { + .name = "payload", + .size = NFT_EXPR_SIZE(sizeof(struct nft_payload)), + .owner = THIS_MODULE, + .eval = nft_payload_eval, + .init = nft_payload_init, + .dump = nft_payload_dump, + .policy = nft_payload_policy, + .maxattr = NFTA_PAYLOAD_MAX, +}; + +int __init nft_payload_module_init(void) +{ + return nft_register_expr(&nft_payload_ops); +} + +void nft_payload_module_exit(void) +{ + nft_unregister_expr(&nft_payload_ops); +} diff --git a/net/netfilter/nft_set.c b/net/netfilter/nft_set.c new file mode 100644 index 000000000000..7b7c8354c327 --- /dev/null +++ b/net/netfilter/nft_set.c @@ -0,0 +1,381 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_set { + struct rb_root root; + enum nft_registers sreg:8; + enum nft_registers dreg:8; + u8 klen; + u8 dlen; + u16 flags; +}; + +struct nft_set_elem { + struct rb_node node; + enum nft_set_elem_flags flags; + struct nft_data key; + struct nft_data data[]; +}; + +static void nft_set_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_set *priv = nft_expr_priv(expr); + const struct rb_node *parent = priv->root.rb_node; + const struct nft_set_elem *elem, *interval = NULL; + const struct nft_data *key = &data[priv->sreg]; + int d; + + while (parent != NULL) { + elem = rb_entry(parent, struct nft_set_elem, node); + + d = nft_data_cmp(&elem->key, key, priv->klen); + if (d < 0) { + parent = parent->rb_left; + interval = elem; + } else if (d > 0) + parent = parent->rb_right; + else { +found: + if (elem->flags & NFT_SE_INTERVAL_END) + goto out; + if (priv->flags & NFT_SET_MAP) + nft_data_copy(&data[priv->dreg], elem->data); + return; + } + } + + if (priv->flags & NFT_SET_INTERVAL && interval != NULL) { + elem = interval; + goto found; + } +out: + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static void nft_set_elem_destroy(const struct nft_expr *expr, + struct nft_set_elem *elem) +{ + const struct nft_set *priv = nft_expr_priv(expr); + + nft_data_uninit(&elem->key, NFT_DATA_VALUE); + if (priv->flags & NFT_SET_MAP) + nft_data_uninit(elem->data, nft_dreg_to_type(priv->dreg)); + kfree(elem); +} + +static const struct nla_policy nft_se_policy[NFTA_SE_MAX + 1] = { + [NFTA_SE_KEY] = { .type = NLA_NESTED }, + [NFTA_SE_DATA] = { .type = NLA_NESTED }, + [NFTA_SE_FLAGS] = { .type = NLA_U32 }, +}; + +static int nft_set_elem_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr *nla, + struct nft_set_elem **new) +{ + struct nft_set *priv = nft_expr_priv(expr); + struct nlattr *tb[NFTA_SE_MAX + 1]; + struct nft_set_elem *elem; + struct nft_data_desc d1, d2; + enum nft_set_elem_flags flags = 0; + unsigned int size; + int err; + + err = nla_parse_nested(tb, NFTA_SE_MAX, nla, nft_se_policy); + if (err < 0) + return err; + + if (tb[NFTA_SE_KEY] == NULL) + return -EINVAL; + + if (tb[NFTA_SE_FLAGS] != NULL) { + flags = ntohl(nla_get_be32(tb[NFTA_SE_FLAGS])); + if (flags & ~NFT_SE_INTERVAL_END) + return -EINVAL; + } + + size = sizeof(*elem); + if (priv->flags & NFT_SET_MAP) { + if (tb[NFTA_SE_DATA] == NULL && !(flags & NFT_SE_INTERVAL_END)) + return -EINVAL; + size += sizeof(elem->data[0]); + } else { + if (tb[NFTA_SE_DATA] != NULL) + return -EINVAL; + } + + elem = kzalloc(size, GFP_KERNEL); + if (elem == NULL) + return -ENOMEM; + elem->flags = flags; + + err = nft_data_init(ctx, &elem->key, &d1, tb[NFTA_SE_KEY]); + if (err < 0) + goto err1; + err = -EINVAL; + if (d1.type != NFT_DATA_VALUE || d1.len != priv->klen) + goto err2; + + if (tb[NFTA_SE_DATA] != NULL) { + err = nft_data_init(ctx, elem->data, &d2, tb[NFTA_SE_DATA]); + if (err < 0) + goto err2; + err = -EINVAL; + if (priv->dreg != NFT_REG_VERDICT && d2.len != priv->dlen) + goto err2; + err = nft_validate_data_load(ctx, priv->dreg, elem->data, d2.type); + if (err < 0) + goto err3; + } + + *new = elem; + return 0; + +err3: + nft_data_uninit(elem->data, d2.type); +err2: + nft_data_uninit(&elem->key, d1.type); +err1: + kfree(elem); + return err; +} + +static int nft_set_elem_dump(struct sk_buff *skb, const struct nft_expr *expr, + const struct nft_set_elem *elem) + +{ + const struct nft_set *priv = nft_expr_priv(expr); + struct nlattr *nest; + + nest = nla_nest_start(skb, NFTA_LIST_ELEM); + if (nest == NULL) + goto nla_put_failure; + + if (nft_data_dump(skb, NFTA_SE_KEY, &elem->key, + NFT_DATA_VALUE, priv->klen) < 0) + goto nla_put_failure; + + if (priv->flags & NFT_SET_MAP && !(elem->flags & NFT_SE_INTERVAL_END)) { + if (nft_data_dump(skb, NFTA_SE_DATA, elem->data, + nft_dreg_to_type(priv->dreg), priv->dlen) < 0) + goto nla_put_failure; + } + + if (elem->flags){ + if (nla_put_be32(skb, NFTA_SE_FLAGS, htonl(elem->flags))) + goto nla_put_failure; + } + + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + return -1; +} + +static void nft_set_destroy(const struct nft_expr *expr) +{ + struct nft_set *priv = nft_expr_priv(expr); + struct nft_set_elem *elem; + struct rb_node *node; + + while ((node = priv->root.rb_node) != NULL) { + rb_erase(node, &priv->root); + elem = rb_entry(node, struct nft_set_elem, node); + nft_set_elem_destroy(expr, elem); + } +} + +static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = { + [NFTA_SET_FLAGS] = { .type = NLA_U32 }, + [NFTA_SET_SREG] = { .type = NLA_U32 }, + [NFTA_SET_DREG] = { .type = NLA_U32 }, + [NFTA_SET_KLEN] = { .type = NLA_U32 }, + [NFTA_SET_DLEN] = { .type = NLA_U32 }, + [NFTA_SET_ELEMENTS] = { .type = NLA_NESTED }, +}; + +static int nft_set_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_set *priv = nft_expr_priv(expr); + struct nft_set_elem *elem, *uninitialized_var(new); + struct rb_node *parent, **p; + const struct nlattr *nla; + int err, rem, d; + + if (tb[NFTA_SET_SREG] == NULL || + tb[NFTA_SET_KLEN] == NULL || + tb[NFTA_SET_ELEMENTS] == NULL) + return -EINVAL; + + priv->root = RB_ROOT; + + if (tb[NFTA_SET_FLAGS] != NULL) { + priv->flags = ntohl(nla_get_be32(tb[NFTA_SET_FLAGS])); + if (priv->flags & ~(NFT_SET_INTERVAL | NFT_SET_MAP)) + return -EINVAL; + } + + priv->sreg = ntohl(nla_get_be32(tb[NFTA_SET_SREG])); + err = nft_validate_input_register(priv->sreg); + if (err < 0) + return err; + + if (tb[NFTA_SET_DREG] != NULL) { + if (!(priv->flags & NFT_SET_MAP)) + return -EINVAL; + if (tb[NFTA_SET_DLEN] == NULL) + return -EINVAL; + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_SET_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + + if (priv->dreg == NFT_REG_VERDICT) + priv->dlen = FIELD_SIZEOF(struct nft_data, data); + else { + priv->dlen = ntohl(nla_get_be32(tb[NFTA_SET_DLEN])); + if (priv->dlen == 0 || + priv->dlen > FIELD_SIZEOF(struct nft_data, data)) + return -EINVAL; + } + } else { + if (priv->flags & NFT_SET_MAP) + return -EINVAL; + if (tb[NFTA_SET_DLEN] != NULL) + return -EINVAL; + } + + priv->klen = ntohl(nla_get_be32(tb[NFTA_SET_KLEN])); + if (priv->klen == 0 || + priv->klen > FIELD_SIZEOF(struct nft_data, data)) + return -EINVAL; + + nla_for_each_nested(nla, tb[NFTA_SET_ELEMENTS], rem) { + err = -EINVAL; + if (nla_type(nla) != NFTA_LIST_ELEM) + goto err1; + + err = nft_set_elem_init(ctx, expr, nla, &new); + if (err < 0) + goto err1; + + parent = NULL; + p = &priv->root.rb_node; + while (*p != NULL) { + parent = *p; + elem = rb_entry(parent, struct nft_set_elem, node); + d = nft_data_cmp(&elem->key, &new->key, priv->klen); + if (d < 0) + p = &parent->rb_left; + else if (d > 0) + p = &parent->rb_right; + else { + err = -EEXIST; + goto err2; + } + } + rb_link_node(&new->node, parent, p); + rb_insert_color(&new->node, &priv->root); + } + + return 0; + +err2: + nft_set_elem_destroy(expr, new); +err1: + nft_set_destroy(expr); + return err; +} + +static int nft_set_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + struct nft_set *priv = nft_expr_priv(expr); + const struct nft_set_elem *elem; + struct rb_node *node; + struct nlattr *list; + + if (priv->flags) { + if (nla_put_be32(skb, NFTA_SET_FLAGS, htonl(priv->flags))) + goto nla_put_failure; + } + + if (nla_put_be32(skb, NFTA_SET_SREG, htonl(priv->sreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_SET_KLEN, htonl(priv->klen))) + goto nla_put_failure; + + if (priv->flags & NFT_SET_MAP) { + if (nla_put_be32(skb, NFTA_SET_DREG, htonl(priv->dreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_SET_DLEN, htonl(priv->dlen))) + goto nla_put_failure; + } + + list = nla_nest_start(skb, NFTA_SET_ELEMENTS); + if (list == NULL) + goto nla_put_failure; + + for (node = rb_first(&priv->root); node; node = rb_next(node)) { + elem = rb_entry(node, struct nft_set_elem, node); + if (nft_set_elem_dump(skb, expr, elem) < 0) + goto nla_put_failure; + } + + nla_nest_end(skb, list); + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_set_ops __read_mostly = { + .name = "set", + .size = NFT_EXPR_SIZE(sizeof(struct nft_set)), + .owner = THIS_MODULE, + .eval = nft_set_eval, + .init = nft_set_init, + .destroy = nft_set_destroy, + .dump = nft_set_dump, + .policy = nft_set_policy, + .maxattr = NFTA_SET_MAX, +}; + +static int __init nft_set_module_init(void) +{ + return nft_register_expr(&nft_set_ops); +} + +static void __exit nft_set_module_exit(void) +{ + nft_unregister_expr(&nft_set_ops); +} + +module_init(nft_set_module_init); +module_exit(nft_set_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_EXPR("set"); From 20a69341f2d00cd042e81c82289fba8a13c05a25 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 11 Oct 2013 12:06:22 +0200 Subject: [PATCH 04/17] netfilter: nf_tables: add netlink set API This patch adds the new netlink API for maintaining nf_tables sets independently of the ruleset. The API supports the following operations: - creation of sets - deletion of sets - querying of specific sets - dumping of all sets - addition of set elements - removal of set elements - dumping of all set elements Sets are identified by name, each table defines an individual namespace. The name of a set may be allocated automatically, this is mostly useful in combination with the NFT_SET_ANONYMOUS flag, which destroys a set automatically once the last reference has been released. Sets can be marked constant, meaning they're not allowed to change while linked to a rule. This allows to perform lockless operation for set types that would otherwise require locking. Additionally, if the implementation supports it, sets can (as before) be used as maps, associating a data value with each key (or range), by specifying the NFT_SET_MAP flag and can be used for interval queries by specifying the NFT_SET_INTERVAL flag. Set elements are added and removed incrementally. All element operations support batching, reducing netlink message and set lookup overhead. The old "set" and "hash" expressions are replaced by a generic "lookup" expression, which binds to the specified set. Userspace is not aware of the actual set implementation used by the kernel anymore, all configuration options are generic. Currently the implementation selection logic is largely missing and the kernel will simply use the first registered implementation supporting the requested operation. Eventually, the plan is to have userspace supply a description of the data characteristics and select the implementation based on expected performance and memory use. This patch includes the new 'lookup' expression to look up for element matching in the set. This patch includes kernel-doc descriptions for this set API and it also includes the following fixes. From Patrick McHardy: * netfilter: nf_tables: fix set element data type in dumps * netfilter: nf_tables: fix indentation of struct nft_set_elem comments * netfilter: nf_tables: fix oops in nft_validate_data_load() * netfilter: nf_tables: fix oops while listing sets of built-in tables * netfilter: nf_tables: destroy anonymous sets immediately if binding fails * netfilter: nf_tables: propagate context to set iter callback * netfilter: nf_tables: add loop detection From Pablo Neira Ayuso: * netfilter: nf_tables: allow to dump all existing sets * netfilter: nf_tables: fix wrong type for flags variable in newelem Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 149 ++- include/uapi/linux/netfilter/nf_tables.h | 189 +++- net/netfilter/Kconfig | 6 +- net/netfilter/Makefile | 2 +- net/netfilter/nf_tables_api.c | 1078 +++++++++++++++++++++- net/netfilter/nf_tables_core.c | 2 - net/netfilter/nft_hash.c | 333 +++---- net/netfilter/nft_immediate.c | 11 + net/netfilter/nft_lookup.c | 135 +++ net/netfilter/nft_rbtree.c | 247 +++++ net/netfilter/nft_set.c | 381 -------- 11 files changed, 1855 insertions(+), 678 deletions(-) create mode 100644 net/netfilter/nft_lookup.c create mode 100644 net/netfilter/nft_rbtree.c delete mode 100644 net/netfilter/nft_set.c diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index d26dfa345f49..677dd79380ed 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -6,6 +6,8 @@ #include #include +#define NFT_JUMP_STACK_SIZE 16 + struct nft_pktinfo { struct sk_buff *skb; const struct net_device *in; @@ -48,23 +50,22 @@ static inline void nft_data_debug(const struct nft_data *data) } /** - * struct nft_ctx - nf_tables rule context + * struct nft_ctx - nf_tables rule/set context * + * @skb: netlink skb + * @nlh: netlink message header * @afi: address family info * @table: the table the chain is contained in * @chain: the chain the rule is contained in */ struct nft_ctx { + const struct sk_buff *skb; + const struct nlmsghdr *nlh; const struct nft_af_info *afi; const struct nft_table *table; const struct nft_chain *chain; }; -enum nft_data_types { - NFT_DATA_VALUE, - NFT_DATA_VERDICT, -}; - struct nft_data_desc { enum nft_data_types type; unsigned int len; @@ -83,6 +84,11 @@ static inline enum nft_data_types nft_dreg_to_type(enum nft_registers reg) return reg == NFT_REG_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE; } +static inline enum nft_registers nft_type_to_reg(enum nft_data_types type) +{ + return type == NFT_DATA_VERDICT ? NFT_REG_VERDICT : NFT_REG_1; +} + extern int nft_validate_input_register(enum nft_registers reg); extern int nft_validate_output_register(enum nft_registers reg); extern int nft_validate_data_load(const struct nft_ctx *ctx, @@ -90,6 +96,132 @@ extern int nft_validate_data_load(const struct nft_ctx *ctx, const struct nft_data *data, enum nft_data_types type); +/** + * struct nft_set_elem - generic representation of set elements + * + * @cookie: implementation specific element cookie + * @key: element key + * @data: element data (maps only) + * @flags: element flags (end of interval) + * + * The cookie can be used to store a handle to the element for subsequent + * removal. + */ +struct nft_set_elem { + void *cookie; + struct nft_data key; + struct nft_data data; + u32 flags; +}; + +struct nft_set; +struct nft_set_iter { + unsigned int count; + unsigned int skip; + int err; + int (*fn)(const struct nft_ctx *ctx, + const struct nft_set *set, + const struct nft_set_iter *iter, + const struct nft_set_elem *elem); +}; + +/** + * struct nft_set_ops - nf_tables set operations + * + * @lookup: look up an element within the set + * @insert: insert new element into set + * @remove: remove element from set + * @walk: iterate over all set elemeennts + * @privsize: function to return size of set private data + * @init: initialize private data of new set instance + * @destroy: destroy private data of set instance + * @list: nf_tables_set_ops list node + * @owner: module reference + * @features: features supported by the implementation + */ +struct nft_set_ops { + bool (*lookup)(const struct nft_set *set, + const struct nft_data *key, + struct nft_data *data); + int (*get)(const struct nft_set *set, + struct nft_set_elem *elem); + int (*insert)(const struct nft_set *set, + const struct nft_set_elem *elem); + void (*remove)(const struct nft_set *set, + const struct nft_set_elem *elem); + void (*walk)(const struct nft_ctx *ctx, + const struct nft_set *set, + struct nft_set_iter *iter); + + unsigned int (*privsize)(const struct nlattr * const nla[]); + int (*init)(const struct nft_set *set, + const struct nlattr * const nla[]); + void (*destroy)(const struct nft_set *set); + + struct list_head list; + struct module *owner; + u32 features; +}; + +extern int nft_register_set(struct nft_set_ops *ops); +extern void nft_unregister_set(struct nft_set_ops *ops); + +/** + * struct nft_set - nf_tables set instance + * + * @list: table set list node + * @bindings: list of set bindings + * @name: name of the set + * @ktype: key type (numeric type defined by userspace, not used in the kernel) + * @dtype: data type (verdict or numeric type defined by userspace) + * @ops: set ops + * @flags: set flags + * @klen: key length + * @dlen: data length + * @data: private set data + */ +struct nft_set { + struct list_head list; + struct list_head bindings; + char name[IFNAMSIZ]; + u32 ktype; + u32 dtype; + /* runtime data below here */ + const struct nft_set_ops *ops ____cacheline_aligned; + u16 flags; + u8 klen; + u8 dlen; + unsigned char data[] + __attribute__((aligned(__alignof__(u64)))); +}; + +static inline void *nft_set_priv(const struct nft_set *set) +{ + return (void *)set->data; +} + +extern struct nft_set *nf_tables_set_lookup(const struct nft_table *table, + const struct nlattr *nla); + +/** + * struct nft_set_binding - nf_tables set binding + * + * @list: set bindings list node + * @chain: chain containing the rule bound to the set + * + * A set binding contains all information necessary for validation + * of new elements added to a bound set. + */ +struct nft_set_binding { + struct list_head list; + const struct nft_chain *chain; +}; + +extern int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, + struct nft_set_binding *binding); +extern void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, + struct nft_set_binding *binding); + /** * struct nft_expr_ops - nf_tables expression operations * @@ -115,7 +247,7 @@ struct nft_expr_ops { void (*destroy)(const struct nft_expr *expr); int (*dump)(struct sk_buff *skb, const struct nft_expr *expr); - + const struct nft_data * (*get_verdict)(const struct nft_expr *expr); struct list_head list; const char *name; struct module *owner; @@ -298,4 +430,7 @@ extern void nft_unregister_expr(struct nft_expr_ops *); #define MODULE_ALIAS_NFT_EXPR(name) \ MODULE_ALIAS("nft-expr-" name) +#define MODULE_ALIAS_NFT_SET() \ + MODULE_ALIAS("nft-set") + #endif /* _NET_NF_TABLES_H */ diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index ec6d84a8ed1e..9e924014efe3 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -44,6 +44,12 @@ enum nft_verdicts { * @NFT_MSG_NEWRULE: create a new rule (enum nft_rule_attributes) * @NFT_MSG_GETRULE: get a rule (enum nft_rule_attributes) * @NFT_MSG_DELRULE: delete a rule (enum nft_rule_attributes) + * @NFT_MSG_NEWSET: create a new set (enum nft_set_attributes) + * @NFT_MSG_GETSET: get a set (enum nft_set_attributes) + * @NFT_MSG_DELSET: delete a set (enum nft_set_attributes) + * @NFT_MSG_NEWSETELEM: create a new set element (enum nft_set_elem_attributes) + * @NFT_MSG_GETSETELEM: get a set element (enum nft_set_elem_attributes) + * @NFT_MSG_DELSETELEM: delete a set element (enum nft_set_elem_attributes) */ enum nf_tables_msg_types { NFT_MSG_NEWTABLE, @@ -55,9 +61,20 @@ enum nf_tables_msg_types { NFT_MSG_NEWRULE, NFT_MSG_GETRULE, NFT_MSG_DELRULE, + NFT_MSG_NEWSET, + NFT_MSG_GETSET, + NFT_MSG_DELSET, + NFT_MSG_NEWSETELEM, + NFT_MSG_GETSETELEM, + NFT_MSG_DELSETELEM, NFT_MSG_MAX, }; +/** + * enum nft_list_attributes - nf_tables generic list netlink attributes + * + * @NFTA_LIST_ELEM: list element (NLA_NESTED) + */ enum nft_list_attributes { NFTA_LIST_UNPEC, NFTA_LIST_ELEM, @@ -127,6 +144,113 @@ enum nft_rule_attributes { }; #define NFTA_RULE_MAX (__NFTA_RULE_MAX - 1) +/** + * enum nft_set_flags - nf_tables set flags + * + * @NFT_SET_ANONYMOUS: name allocation, automatic cleanup on unlink + * @NFT_SET_CONSTANT: set contents may not change while bound + * @NFT_SET_INTERVAL: set contains intervals + * @NFT_SET_MAP: set is used as a dictionary + */ +enum nft_set_flags { + NFT_SET_ANONYMOUS = 0x1, + NFT_SET_CONSTANT = 0x2, + NFT_SET_INTERVAL = 0x4, + NFT_SET_MAP = 0x8, +}; + +/** + * enum nft_set_attributes - nf_tables set netlink attributes + * + * @NFTA_SET_TABLE: table name (NLA_STRING) + * @NFTA_SET_NAME: set name (NLA_STRING) + * @NFTA_SET_FLAGS: bitmask of enum nft_set_flags (NLA_U32) + * @NFTA_SET_KEY_TYPE: key data type, informational purpose only (NLA_U32) + * @NFTA_SET_KEY_LEN: key data length (NLA_U32) + * @NFTA_SET_DATA_TYPE: mapping data type (NLA_U32) + * @NFTA_SET_DATA_LEN: mapping data length (NLA_U32) + */ +enum nft_set_attributes { + NFTA_SET_UNSPEC, + NFTA_SET_TABLE, + NFTA_SET_NAME, + NFTA_SET_FLAGS, + NFTA_SET_KEY_TYPE, + NFTA_SET_KEY_LEN, + NFTA_SET_DATA_TYPE, + NFTA_SET_DATA_LEN, + __NFTA_SET_MAX +}; +#define NFTA_SET_MAX (__NFTA_SET_MAX - 1) + +/** + * enum nft_set_elem_flags - nf_tables set element flags + * + * @NFT_SET_ELEM_INTERVAL_END: element ends the previous interval + */ +enum nft_set_elem_flags { + NFT_SET_ELEM_INTERVAL_END = 0x1, +}; + +/** + * enum nft_set_elem_attributes - nf_tables set element netlink attributes + * + * @NFTA_SET_ELEM_KEY: key value (NLA_NESTED: nft_data) + * @NFTA_SET_ELEM_DATA: data value of mapping (NLA_NESTED: nft_data_attributes) + * @NFTA_SET_ELEM_FLAGS: bitmask of nft_set_elem_flags (NLA_U32) + */ +enum nft_set_elem_attributes { + NFTA_SET_ELEM_UNSPEC, + NFTA_SET_ELEM_KEY, + NFTA_SET_ELEM_DATA, + NFTA_SET_ELEM_FLAGS, + __NFTA_SET_ELEM_MAX +}; +#define NFTA_SET_ELEM_MAX (__NFTA_SET_ELEM_MAX - 1) + +/** + * enum nft_set_elem_list_attributes - nf_tables set element list netlink attributes + * + * @NFTA_SET_ELEM_LIST_TABLE: table of the set to be changed (NLA_STRING) + * @NFTA_SET_ELEM_LIST_SET: name of the set to be changed (NLA_STRING) + * @NFTA_SET_ELEM_LIST_ELEMENTS: list of set elements (NLA_NESTED: nft_set_elem_attributes) + */ +enum nft_set_elem_list_attributes { + NFTA_SET_ELEM_LIST_UNSPEC, + NFTA_SET_ELEM_LIST_TABLE, + NFTA_SET_ELEM_LIST_SET, + NFTA_SET_ELEM_LIST_ELEMENTS, + __NFTA_SET_ELEM_LIST_MAX +}; +#define NFTA_SET_ELEM_LIST_MAX (__NFTA_SET_ELEM_LIST_MAX - 1) + +/** + * enum nft_data_types - nf_tables data types + * + * @NFT_DATA_VALUE: generic data + * @NFT_DATA_VERDICT: netfilter verdict + * + * The type of data is usually determined by the kernel directly and is not + * explicitly specified by userspace. The only difference are sets, where + * userspace specifies the key and mapping data types. + * + * The values 0xffffff00-0xffffffff are reserved for internally used types. + * The remaining range can be freely used by userspace to encode types, all + * values are equivalent to NFT_DATA_VALUE. + */ +enum nft_data_types { + NFT_DATA_VALUE, + NFT_DATA_VERDICT = 0xffffff00U, +}; + +#define NFT_DATA_RESERVED_MASK 0xffffff00U + +/** + * enum nft_data_attributes - nf_tables data netlink attributes + * + * @NFTA_DATA_VALUE: generic data (NLA_BINARY) + * @NFTA_DATA_VERDICT: nf_tables verdict (NLA_NESTED: nft_verdict_attributes) + */ enum nft_data_attributes { NFTA_DATA_UNSPEC, NFTA_DATA_VALUE, @@ -275,58 +399,21 @@ enum nft_cmp_attributes { }; #define NFTA_CMP_MAX (__NFTA_CMP_MAX - 1) -enum nft_set_elem_flags { - NFT_SE_INTERVAL_END = 0x1, +/** + * enum nft_lookup_attributes - nf_tables set lookup expression netlink attributes + * + * @NFTA_LOOKUP_SET: name of the set where to look for (NLA_STRING) + * @NFTA_LOOKUP_SREG: source register of the data to look for (NLA_U32: nft_registers) + * @NFTA_LOOKUP_DREG: destination register (NLA_U32: nft_registers) + */ +enum nft_lookup_attributes { + NFTA_LOOKUP_UNSPEC, + NFTA_LOOKUP_SET, + NFTA_LOOKUP_SREG, + NFTA_LOOKUP_DREG, + __NFTA_LOOKUP_MAX }; - -enum nft_set_elem_attributes { - NFTA_SE_UNSPEC, - NFTA_SE_KEY, - NFTA_SE_DATA, - NFTA_SE_FLAGS, - __NFTA_SE_MAX -}; -#define NFTA_SE_MAX (__NFTA_SE_MAX - 1) - -enum nft_set_flags { - NFT_SET_INTERVAL = 0x1, - NFT_SET_MAP = 0x2, -}; - -enum nft_set_attributes { - NFTA_SET_UNSPEC, - NFTA_SET_FLAGS, - NFTA_SET_SREG, - NFTA_SET_DREG, - NFTA_SET_KLEN, - NFTA_SET_DLEN, - NFTA_SET_ELEMENTS, - __NFTA_SET_MAX -}; -#define NFTA_SET_MAX (__NFTA_SET_MAX - 1) - -enum nft_hash_flags { - NFT_HASH_MAP = 0x1, -}; - -enum nft_hash_elem_attributes { - NFTA_HE_UNSPEC, - NFTA_HE_KEY, - NFTA_HE_DATA, - __NFTA_HE_MAX -}; -#define NFTA_HE_MAX (__NFTA_HE_MAX - 1) - -enum nft_hash_attributes { - NFTA_HASH_UNSPEC, - NFTA_HASH_FLAGS, - NFTA_HASH_SREG, - NFTA_HASH_DREG, - NFTA_HASH_KLEN, - NFTA_HASH_ELEMENTS, - __NFTA_HASH_MAX -}; -#define NFTA_HASH_MAX (__NFTA_HASH_MAX - 1) +#define NFTA_LOOKUP_MAX (__NFTA_LOOKUP_MAX - 1) /** * enum nft_payload_bases - nf_tables payload expression offset bases diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index c271e1af93b5..aa184a46bbf3 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -430,13 +430,13 @@ config NFT_CT depends on NF_CONNTRACK tristate "Netfilter nf_tables conntrack module" -config NFT_SET +config NFT_RBTREE depends on NF_TABLES - tristate "Netfilter nf_tables set module" + tristate "Netfilter nf_tables rbtree set module" config NFT_HASH depends on NF_TABLES - tristate "Netfilter nf_tables hash module" + tristate "Netfilter nf_tables hash set module" config NFT_COUNTER depends on NF_TABLES diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 1ca3f3932826..b6b78754e4cc 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -75,7 +75,7 @@ obj-$(CONFIG_NFT_META) += nft_meta.o obj-$(CONFIG_NFT_CT) += nft_ct.o obj-$(CONFIG_NFT_LIMIT) += nft_limit.o #nf_tables-objs += nft_meta_target.o -obj-$(CONFIG_NFT_SET) += nft_set.o +obj-$(CONFIG_NFT_RBTREE) += nft_rbtree.o obj-$(CONFIG_NFT_HASH) += nft_hash.o obj-$(CONFIG_NFT_COUNTER) += nft_counter.o obj-$(CONFIG_NFT_LOG) += nft_log.o diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 7d59c89c6c75..5092c817c222 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007, 2008 Patrick McHardy + * Copyright (c) 2007-2009 Patrick McHardy * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -315,6 +315,7 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, nla_strlcpy(table->name, name, nla_len(name)); INIT_LIST_HEAD(&table->chains); + INIT_LIST_HEAD(&table->sets); list_add_tail(&table->list, &afi->tables); nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family); @@ -409,6 +410,7 @@ again: } table->flags |= NFT_TABLE_BUILTIN; + INIT_LIST_HEAD(&table->sets); list_add_tail(&table->list, &afi->tables); nf_tables_table_notify(NULL, NULL, table, NFT_MSG_NEWTABLE, family); list_for_each_entry(chain, &table->chains, list) @@ -820,10 +822,14 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, } static void nft_ctx_init(struct nft_ctx *ctx, + const struct sk_buff *skb, + const struct nlmsghdr *nlh, const struct nft_af_info *afi, const struct nft_table *table, const struct nft_chain *chain) { + ctx->skb = skb; + ctx->nlh = nlh; ctx->afi = afi; ctx->table = table; ctx->chain = chain; @@ -1301,7 +1307,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, rule->handle = handle; rule->dlen = size; - nft_ctx_init(&ctx, afi, table, chain); + nft_ctx_init(&ctx, skb, nlh, afi, table, chain); expr = nft_expr_first(rule); for (i = 0; i < n; i++) { err = nf_tables_newexpr(&ctx, &info[i], expr); @@ -1392,6 +1398,939 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, return 0; } +/* + * Sets + */ + +static LIST_HEAD(nf_tables_set_ops); + +int nft_register_set(struct nft_set_ops *ops) +{ + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_add_tail(&ops->list, &nf_tables_set_ops); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + return 0; +} +EXPORT_SYMBOL_GPL(nft_register_set); + +void nft_unregister_set(struct nft_set_ops *ops) +{ + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_del(&ops->list); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); +} +EXPORT_SYMBOL_GPL(nft_unregister_set); + +static const struct nft_set_ops *nft_select_set_ops(const struct nlattr * const nla[]) +{ + const struct nft_set_ops *ops; + u32 features; + +#ifdef CONFIG_MODULES + if (list_empty(&nf_tables_set_ops)) { + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + request_module("nft-set"); + nfnl_lock(NFNL_SUBSYS_NFTABLES); + if (!list_empty(&nf_tables_set_ops)) + return ERR_PTR(-EAGAIN); + } +#endif + features = 0; + if (nla[NFTA_SET_FLAGS] != NULL) { + features = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS])); + features &= NFT_SET_INTERVAL | NFT_SET_MAP; + } + + // FIXME: implement selection properly + list_for_each_entry(ops, &nf_tables_set_ops, list) { + if ((ops->features & features) != features) + continue; + if (!try_module_get(ops->owner)) + continue; + return ops; + } + + return ERR_PTR(-EOPNOTSUPP); +} + +static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = { + [NFTA_SET_TABLE] = { .type = NLA_STRING }, + [NFTA_SET_NAME] = { .type = NLA_STRING }, + [NFTA_SET_FLAGS] = { .type = NLA_U32 }, + [NFTA_SET_KEY_TYPE] = { .type = NLA_U32 }, + [NFTA_SET_KEY_LEN] = { .type = NLA_U32 }, + [NFTA_SET_DATA_TYPE] = { .type = NLA_U32 }, + [NFTA_SET_DATA_LEN] = { .type = NLA_U32 }, +}; + +static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, + const struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nft_af_info *afi; + const struct nft_table *table = NULL; + + afi = nf_tables_afinfo_lookup(nfmsg->nfgen_family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + if (nla[NFTA_SET_TABLE] != NULL) { + table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE], false); + if (IS_ERR(table)) + return PTR_ERR(table); + } + + nft_ctx_init(ctx, skb, nlh, afi, table, NULL); + return 0; +} + +struct nft_set *nf_tables_set_lookup(const struct nft_table *table, + const struct nlattr *nla) +{ + struct nft_set *set; + + if (nla == NULL) + return ERR_PTR(-EINVAL); + + list_for_each_entry(set, &table->sets, list) { + if (!nla_strcmp(nla, set->name)) + return set; + } + return ERR_PTR(-ENOENT); +} + +static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set, + const char *name) +{ + const struct nft_set *i; + const char *p; + unsigned long *inuse; + unsigned int n = 0; + + p = strnchr(name, IFNAMSIZ, '%'); + if (p != NULL) { + if (p[1] != 'd' || strchr(p + 2, '%')) + return -EINVAL; + + inuse = (unsigned long *)get_zeroed_page(GFP_KERNEL); + if (inuse == NULL) + return -ENOMEM; + + list_for_each_entry(i, &ctx->table->sets, list) { + if (!sscanf(i->name, name, &n)) + continue; + if (n < 0 || n > BITS_PER_LONG * PAGE_SIZE) + continue; + set_bit(n, inuse); + } + + n = find_first_zero_bit(inuse, BITS_PER_LONG * PAGE_SIZE); + free_page((unsigned long)inuse); + } + + snprintf(set->name, sizeof(set->name), name, n); + list_for_each_entry(i, &ctx->table->sets, list) { + if (!strcmp(set->name, i->name)) + return -ENFILE; + } + return 0; +} + +static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, + const struct nft_set *set, u16 event, u16 flags) +{ + struct nfgenmsg *nfmsg; + struct nlmsghdr *nlh; + u32 portid = NETLINK_CB(ctx->skb).portid; + u32 seq = ctx->nlh->nlmsg_seq; + + event |= NFNL_SUBSYS_NFTABLES << 8; + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), + flags); + if (nlh == NULL) + goto nla_put_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = ctx->afi->family; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name)) + goto nla_put_failure; + if (nla_put_string(skb, NFTA_SET_NAME, set->name)) + goto nla_put_failure; + if (set->flags != 0) + if (nla_put_be32(skb, NFTA_SET_FLAGS, htonl(set->flags))) + goto nla_put_failure; + + if (nla_put_be32(skb, NFTA_SET_KEY_TYPE, htonl(set->ktype))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_SET_KEY_LEN, htonl(set->klen))) + goto nla_put_failure; + if (set->flags & NFT_SET_MAP) { + if (nla_put_be32(skb, NFTA_SET_DATA_TYPE, htonl(set->dtype))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_SET_DATA_LEN, htonl(set->dlen))) + goto nla_put_failure; + } + + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_trim(skb, nlh); + return -1; +} + +static int nf_tables_set_notify(const struct nft_ctx *ctx, + const struct nft_set *set, + int event) +{ + struct sk_buff *skb; + u32 portid = NETLINK_CB(ctx->skb).portid; + struct net *net = sock_net(ctx->skb->sk); + bool report; + int err; + + report = nlmsg_report(ctx->nlh); + if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) + return 0; + + err = -ENOBUFS; + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb == NULL) + goto err; + + err = nf_tables_fill_set(skb, ctx, set, event, 0); + if (err < 0) { + kfree_skb(skb); + goto err; + } + + err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, + GFP_KERNEL); +err: + if (err < 0) + nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err); + return err; +} + +static int nf_tables_dump_sets_table(struct nft_ctx *ctx, struct sk_buff *skb, + struct netlink_callback *cb) +{ + const struct nft_set *set; + unsigned int idx = 0, s_idx = cb->args[0]; + + if (cb->args[1]) + return skb->len; + + list_for_each_entry(set, &ctx->table->sets, list) { + if (idx < s_idx) + goto cont; + if (nf_tables_fill_set(skb, ctx, set, NFT_MSG_NEWSET, + NLM_F_MULTI) < 0) { + cb->args[0] = idx; + goto done; + } +cont: + idx++; + } + cb->args[1] = 1; +done: + return skb->len; +} + +static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb, + struct netlink_callback *cb) +{ + const struct nft_set *set; + unsigned int idx = 0, s_idx = cb->args[0]; + struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2]; + + if (cb->args[1]) + return skb->len; + + list_for_each_entry(table, &ctx->afi->tables, list) { + if (cur_table && cur_table != table) + continue; + + ctx->table = table; + list_for_each_entry(set, &ctx->table->sets, list) { + if (idx < s_idx) + goto cont; + if (nf_tables_fill_set(skb, ctx, set, NFT_MSG_NEWSET, + NLM_F_MULTI) < 0) { + cb->args[0] = idx; + cb->args[2] = (unsigned long) table; + goto done; + } +cont: + idx++; + } + } + cb->args[1] = 1; +done: + return skb->len; +} + +static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + struct nlattr *nla[NFTA_SET_MAX + 1]; + struct nft_ctx ctx; + int err, ret; + + err = nlmsg_parse(cb->nlh, sizeof(*nfmsg), nla, NFTA_SET_MAX, + nft_set_policy); + if (err < 0) + return err; + + err = nft_ctx_init_from_setattr(&ctx, cb->skb, cb->nlh, (void *)nla); + if (err < 0) + return err; + + if (ctx.table == NULL) + ret = nf_tables_dump_sets_all(&ctx, skb, cb); + else + ret = nf_tables_dump_sets_table(&ctx, skb, cb); + + return ret; +} + +static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nft_set *set; + struct nft_ctx ctx; + struct sk_buff *skb2; + int err; + + /* Verify existance before starting dump */ + err = nft_ctx_init_from_setattr(&ctx, skb, nlh, nla); + if (err < 0) + return err; + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = nf_tables_dump_sets, + }; + return netlink_dump_start(nlsk, skb, nlh, &c); + } + + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]); + if (IS_ERR(set)) + return PTR_ERR(set); + + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb2 == NULL) + return -ENOMEM; + + err = nf_tables_fill_set(skb2, &ctx, set, NFT_MSG_NEWSET, 0); + if (err < 0) + goto err; + + return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid); + +err: + kfree_skb(skb2); + return err; +} + +static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nft_set_ops *ops; + const struct nft_af_info *afi; + struct nft_table *table; + struct nft_set *set; + struct nft_ctx ctx; + char name[IFNAMSIZ]; + unsigned int size; + bool create; + u32 ktype, klen, dlen, dtype, flags; + int err; + + if (nla[NFTA_SET_TABLE] == NULL || + nla[NFTA_SET_NAME] == NULL || + nla[NFTA_SET_KEY_LEN] == NULL) + return -EINVAL; + + ktype = NFT_DATA_VALUE; + if (nla[NFTA_SET_KEY_TYPE] != NULL) { + ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE])); + if ((ktype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK) + return -EINVAL; + } + + klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN])); + if (klen == 0 || klen > FIELD_SIZEOF(struct nft_data, data)) + return -EINVAL; + + flags = 0; + if (nla[NFTA_SET_FLAGS] != NULL) { + flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS])); + if (flags & ~(NFT_SET_ANONYMOUS | NFT_SET_CONSTANT | + NFT_SET_INTERVAL | NFT_SET_MAP)) + return -EINVAL; + } + + dtype = 0; + dlen = 0; + if (nla[NFTA_SET_DATA_TYPE] != NULL) { + if (!(flags & NFT_SET_MAP)) + return -EINVAL; + + dtype = ntohl(nla_get_be32(nla[NFTA_SET_DATA_TYPE])); + if ((dtype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK && + dtype != NFT_DATA_VERDICT) + return -EINVAL; + + if (dtype != NFT_DATA_VERDICT) { + if (nla[NFTA_SET_DATA_LEN] == NULL) + return -EINVAL; + dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN])); + if (dlen == 0 || + dlen > FIELD_SIZEOF(struct nft_data, data)) + return -EINVAL; + } else + dlen = sizeof(struct nft_data); + } else if (flags & NFT_SET_MAP) + return -EINVAL; + + create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; + + afi = nf_tables_afinfo_lookup(nfmsg->nfgen_family, create); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE], create); + if (IS_ERR(table)) + return PTR_ERR(table); + + nft_ctx_init(&ctx, skb, nlh, afi, table, NULL); + + set = nf_tables_set_lookup(table, nla[NFTA_SET_NAME]); + if (IS_ERR(set)) { + if (PTR_ERR(set) != -ENOENT) + return PTR_ERR(set); + set = NULL; + } + + if (set != NULL) { + if (nlh->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; + if (nlh->nlmsg_flags & NLM_F_REPLACE) + return -EOPNOTSUPP; + return 0; + } + + if (!(nlh->nlmsg_flags & NLM_F_CREATE)) + return -ENOENT; + + ops = nft_select_set_ops(nla); + if (IS_ERR(ops)) + return PTR_ERR(ops); + + size = 0; + if (ops->privsize != NULL) + size = ops->privsize(nla); + + err = -ENOMEM; + set = kzalloc(sizeof(*set) + size, GFP_KERNEL); + if (set == NULL) + goto err1; + + nla_strlcpy(name, nla[NFTA_SET_NAME], sizeof(set->name)); + err = nf_tables_set_alloc_name(&ctx, set, name); + if (err < 0) + goto err2; + + INIT_LIST_HEAD(&set->bindings); + set->ops = ops; + set->ktype = ktype; + set->klen = klen; + set->dtype = dtype; + set->dlen = dlen; + set->flags = flags; + + err = ops->init(set, nla); + if (err < 0) + goto err2; + + list_add_tail(&set->list, &table->sets); + nf_tables_set_notify(&ctx, set, NFT_MSG_NEWSET); + return 0; + +err2: + kfree(set); +err1: + module_put(ops->owner); + return err; +} + +static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set) +{ + list_del(&set->list); + if (!(set->flags & NFT_SET_ANONYMOUS)) + nf_tables_set_notify(ctx, set, NFT_MSG_DELSET); + + set->ops->destroy(set); + module_put(set->ops->owner); + kfree(set); +} + +static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + struct nft_set *set; + struct nft_ctx ctx; + int err; + + if (nla[NFTA_SET_TABLE] == NULL) + return -EINVAL; + + err = nft_ctx_init_from_setattr(&ctx, skb, nlh, nla); + if (err < 0) + return err; + + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]); + if (IS_ERR(set)) + return PTR_ERR(set); + if (!list_empty(&set->bindings)) + return -EBUSY; + + nf_tables_set_destroy(&ctx, set); + return 0; +} + +static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx, + const struct nft_set *set, + const struct nft_set_iter *iter, + const struct nft_set_elem *elem) +{ + enum nft_registers dreg; + + dreg = nft_type_to_reg(set->dtype); + return nft_validate_data_load(ctx, dreg, &elem->data, set->dtype); +} + +int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, + struct nft_set_binding *binding) +{ + struct nft_set_binding *i; + struct nft_set_iter iter; + + if (!list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS) + return -EBUSY; + + if (set->flags & NFT_SET_MAP) { + /* If the set is already bound to the same chain all + * jumps are already validated for that chain. + */ + list_for_each_entry(i, &set->bindings, list) { + if (i->chain == binding->chain) + goto bind; + } + + iter.skip = 0; + iter.count = 0; + iter.err = 0; + iter.fn = nf_tables_bind_check_setelem; + + set->ops->walk(ctx, set, &iter); + if (iter.err < 0) { + /* Destroy anonymous sets if binding fails */ + if (set->flags & NFT_SET_ANONYMOUS) + nf_tables_set_destroy(ctx, set); + + return iter.err; + } + } +bind: + binding->chain = ctx->chain; + list_add_tail(&binding->list, &set->bindings); + return 0; +} + +void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, + struct nft_set_binding *binding) +{ + list_del(&binding->list); + + if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS) + nf_tables_set_destroy(ctx, set); +} + +/* + * Set elements + */ + +static const struct nla_policy nft_set_elem_policy[NFTA_SET_ELEM_MAX + 1] = { + [NFTA_SET_ELEM_KEY] = { .type = NLA_NESTED }, + [NFTA_SET_ELEM_DATA] = { .type = NLA_NESTED }, + [NFTA_SET_ELEM_FLAGS] = { .type = NLA_U32 }, +}; + +static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + 1] = { + [NFTA_SET_ELEM_LIST_TABLE] = { .type = NLA_STRING }, + [NFTA_SET_ELEM_LIST_SET] = { .type = NLA_STRING }, + [NFTA_SET_ELEM_LIST_ELEMENTS] = { .type = NLA_NESTED }, +}; + +static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, + const struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nft_af_info *afi; + const struct nft_table *table; + + afi = nf_tables_afinfo_lookup(nfmsg->nfgen_family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE], false); + if (IS_ERR(table)) + return PTR_ERR(table); + + nft_ctx_init(ctx, skb, nlh, afi, table, NULL); + return 0; +} + +static int nf_tables_fill_setelem(struct sk_buff *skb, + const struct nft_set *set, + const struct nft_set_elem *elem) +{ + unsigned char *b = skb_tail_pointer(skb); + struct nlattr *nest; + + nest = nla_nest_start(skb, NFTA_LIST_ELEM); + if (nest == NULL) + goto nla_put_failure; + + if (nft_data_dump(skb, NFTA_SET_ELEM_KEY, &elem->key, NFT_DATA_VALUE, + set->klen) < 0) + goto nla_put_failure; + + if (set->flags & NFT_SET_MAP && + !(elem->flags & NFT_SET_ELEM_INTERVAL_END) && + nft_data_dump(skb, NFTA_SET_ELEM_DATA, &elem->data, + set->dtype == NFT_DATA_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE, + set->dlen) < 0) + goto nla_put_failure; + + if (elem->flags != 0) + if (nla_put_be32(skb, NFTA_SET_ELEM_FLAGS, htonl(elem->flags))) + goto nla_put_failure; + + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + nlmsg_trim(skb, b); + return -EMSGSIZE; +} + +struct nft_set_dump_args { + const struct netlink_callback *cb; + struct nft_set_iter iter; + struct sk_buff *skb; +}; + +static int nf_tables_dump_setelem(const struct nft_ctx *ctx, + const struct nft_set *set, + const struct nft_set_iter *iter, + const struct nft_set_elem *elem) +{ + struct nft_set_dump_args *args; + + args = container_of(iter, struct nft_set_dump_args, iter); + return nf_tables_fill_setelem(args->skb, set, elem); +} + +static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) +{ + const struct nft_set *set; + struct nft_set_dump_args args; + struct nft_ctx ctx; + struct nlattr *nla[NFTA_SET_ELEM_LIST_MAX + 1]; + struct nfgenmsg *nfmsg; + struct nlmsghdr *nlh; + struct nlattr *nest; + u32 portid, seq; + int event, err; + + nfmsg = nlmsg_data(cb->nlh); + err = nlmsg_parse(cb->nlh, sizeof(*nfmsg), nla, NFTA_SET_ELEM_LIST_MAX, + nft_set_elem_list_policy); + if (err < 0) + return err; + + err = nft_ctx_init_from_elemattr(&ctx, cb->skb, cb->nlh, (void *)nla); + if (err < 0) + return err; + + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); + if (IS_ERR(set)) + return PTR_ERR(set); + + event = NFT_MSG_NEWSETELEM; + event |= NFNL_SUBSYS_NFTABLES << 8; + portid = NETLINK_CB(cb->skb).portid; + seq = cb->nlh->nlmsg_seq; + + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), + NLM_F_MULTI); + if (nlh == NULL) + goto nla_put_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = NFPROTO_UNSPEC; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (nla_put_string(skb, NFTA_SET_ELEM_LIST_TABLE, ctx.table->name)) + goto nla_put_failure; + if (nla_put_string(skb, NFTA_SET_ELEM_LIST_SET, set->name)) + goto nla_put_failure; + + nest = nla_nest_start(skb, NFTA_SET_ELEM_LIST_ELEMENTS); + if (nest == NULL) + goto nla_put_failure; + + args.cb = cb; + args.skb = skb; + args.iter.skip = cb->args[0]; + args.iter.count = 0; + args.iter.err = 0; + args.iter.fn = nf_tables_dump_setelem; + set->ops->walk(&ctx, set, &args.iter); + + nla_nest_end(skb, nest); + nlmsg_end(skb, nlh); + + if (args.iter.err && args.iter.err != -EMSGSIZE) + return args.iter.err; + if (args.iter.count == cb->args[0]) + return 0; + + cb->args[0] = args.iter.count; + return skb->len; + +nla_put_failure: + return -ENOSPC; +} + +static int nf_tables_getsetelem(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nft_set *set; + struct nft_ctx ctx; + int err; + + err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla); + if (err < 0) + return err; + + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); + if (IS_ERR(set)) + return PTR_ERR(set); + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = nf_tables_dump_set, + }; + return netlink_dump_start(nlsk, skb, nlh, &c); + } + return -EOPNOTSUPP; +} + +static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set, + const struct nlattr *attr) +{ + struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; + struct nft_data_desc d1, d2; + struct nft_set_elem elem; + struct nft_set_binding *binding; + enum nft_registers dreg; + int err; + + err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr, + nft_set_elem_policy); + if (err < 0) + return err; + + if (nla[NFTA_SET_ELEM_KEY] == NULL) + return -EINVAL; + + elem.flags = 0; + if (nla[NFTA_SET_ELEM_FLAGS] != NULL) { + elem.flags = ntohl(nla_get_be32(nla[NFTA_SET_ELEM_FLAGS])); + if (elem.flags & ~NFT_SET_ELEM_INTERVAL_END) + return -EINVAL; + } + + if (set->flags & NFT_SET_MAP) { + if (nla[NFTA_SET_ELEM_DATA] == NULL && + !(elem.flags & NFT_SET_ELEM_INTERVAL_END)) + return -EINVAL; + } else { + if (nla[NFTA_SET_ELEM_DATA] != NULL) + return -EINVAL; + } + + err = nft_data_init(ctx, &elem.key, &d1, nla[NFTA_SET_ELEM_KEY]); + if (err < 0) + goto err1; + err = -EINVAL; + if (d1.type != NFT_DATA_VALUE || d1.len != set->klen) + goto err2; + + err = -EEXIST; + if (set->ops->get(set, &elem) == 0) + goto err2; + + if (nla[NFTA_SET_ELEM_DATA] != NULL) { + err = nft_data_init(ctx, &elem.data, &d2, nla[NFTA_SET_ELEM_DATA]); + if (err < 0) + goto err2; + + err = -EINVAL; + if (set->dtype != NFT_DATA_VERDICT && d2.len != set->dlen) + goto err3; + + dreg = nft_type_to_reg(set->dtype); + list_for_each_entry(binding, &set->bindings, list) { + struct nft_ctx bind_ctx = { + .afi = ctx->afi, + .table = ctx->table, + .chain = binding->chain, + }; + + err = nft_validate_data_load(&bind_ctx, dreg, + &elem.data, d2.type); + if (err < 0) + goto err3; + } + } + + err = set->ops->insert(set, &elem); + if (err < 0) + goto err3; + + return 0; + +err3: + if (nla[NFTA_SET_ELEM_DATA] != NULL) + nft_data_uninit(&elem.data, d2.type); +err2: + nft_data_uninit(&elem.key, d1.type); +err1: + return err; +} + +static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nlattr *attr; + struct nft_set *set; + struct nft_ctx ctx; + int rem, err; + + err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla); + if (err < 0) + return err; + + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); + if (IS_ERR(set)) + return PTR_ERR(set); + if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT) + return -EBUSY; + + nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { + err = nft_add_set_elem(&ctx, set, attr); + if (err < 0) + return err; + } + return 0; +} + +static int nft_del_setelem(const struct nft_ctx *ctx, struct nft_set *set, + const struct nlattr *attr) +{ + struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; + struct nft_data_desc desc; + struct nft_set_elem elem; + int err; + + err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr, + nft_set_elem_policy); + if (err < 0) + goto err1; + + err = -EINVAL; + if (nla[NFTA_SET_ELEM_KEY] == NULL) + goto err1; + + err = nft_data_init(ctx, &elem.key, &desc, nla[NFTA_SET_ELEM_KEY]); + if (err < 0) + goto err1; + + err = -EINVAL; + if (desc.type != NFT_DATA_VALUE || desc.len != set->klen) + goto err2; + + err = set->ops->get(set, &elem); + if (err < 0) + goto err2; + + set->ops->remove(set, &elem); + + nft_data_uninit(&elem.key, NFT_DATA_VALUE); + if (set->flags & NFT_SET_MAP) + nft_data_uninit(&elem.data, set->dtype); + +err2: + nft_data_uninit(&elem.key, desc.type); +err1: + return err; +} + +static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nlattr *attr; + struct nft_set *set; + struct nft_ctx ctx; + int rem, err; + + err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla); + if (err < 0) + return err; + + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); + if (IS_ERR(set)) + return PTR_ERR(set); + if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT) + return -EBUSY; + + nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { + err = nft_del_setelem(&ctx, set, attr); + if (err < 0) + return err; + } + return 0; +} + static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { [NFT_MSG_NEWTABLE] = { .call = nf_tables_newtable, @@ -1438,6 +2377,36 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .attr_count = NFTA_RULE_MAX, .policy = nft_rule_policy, }, + [NFT_MSG_NEWSET] = { + .call = nf_tables_newset, + .attr_count = NFTA_SET_MAX, + .policy = nft_set_policy, + }, + [NFT_MSG_GETSET] = { + .call = nf_tables_getset, + .attr_count = NFTA_SET_MAX, + .policy = nft_set_policy, + }, + [NFT_MSG_DELSET] = { + .call = nf_tables_delset, + .attr_count = NFTA_SET_MAX, + .policy = nft_set_policy, + }, + [NFT_MSG_NEWSETELEM] = { + .call = nf_tables_newsetelem, + .attr_count = NFTA_SET_ELEM_LIST_MAX, + .policy = nft_set_elem_list_policy, + }, + [NFT_MSG_GETSETELEM] = { + .call = nf_tables_getsetelem, + .attr_count = NFTA_SET_ELEM_LIST_MAX, + .policy = nft_set_elem_list_policy, + }, + [NFT_MSG_DELSETELEM] = { + .call = nf_tables_delsetelem, + .attr_count = NFTA_SET_ELEM_LIST_MAX, + .policy = nft_set_elem_list_policy, + }, }; static const struct nfnetlink_subsystem nf_tables_subsys = { @@ -1447,6 +2416,90 @@ static const struct nfnetlink_subsystem nf_tables_subsys = { .cb = nf_tables_cb, }; +/* + * Loop detection - walk through the ruleset beginning at the destination chain + * of a new jump until either the source chain is reached (loop) or all + * reachable chains have been traversed. + * + * The loop check is performed whenever a new jump verdict is added to an + * expression or verdict map or a verdict map is bound to a new chain. + */ + +static int nf_tables_check_loops(const struct nft_ctx *ctx, + const struct nft_chain *chain); + +static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx, + const struct nft_set *set, + const struct nft_set_iter *iter, + const struct nft_set_elem *elem) +{ + switch (elem->data.verdict) { + case NFT_JUMP: + case NFT_GOTO: + return nf_tables_check_loops(ctx, elem->data.chain); + default: + return 0; + } +} + +static int nf_tables_check_loops(const struct nft_ctx *ctx, + const struct nft_chain *chain) +{ + const struct nft_rule *rule; + const struct nft_expr *expr, *last; + const struct nft_data *data; + const struct nft_set *set; + struct nft_set_binding *binding; + struct nft_set_iter iter; + int err; + + if (ctx->chain == chain) + return -ELOOP; + + list_for_each_entry(rule, &chain->rules, list) { + nft_rule_for_each_expr(expr, last, rule) { + if (!expr->ops->get_verdict) + continue; + + data = expr->ops->get_verdict(expr); + if (data == NULL) + break; + + switch (data->verdict) { + case NFT_JUMP: + case NFT_GOTO: + err = nf_tables_check_loops(ctx, data->chain); + if (err < 0) + return err; + default: + break; + } + } + } + + list_for_each_entry(set, &ctx->table->sets, list) { + if (!(set->flags & NFT_SET_MAP) || + set->dtype != NFT_DATA_VERDICT) + continue; + + list_for_each_entry(binding, &set->bindings, list) { + if (binding->chain != chain) + continue; + + iter.skip = 0; + iter.count = 0; + iter.err = 0; + iter.fn = nf_tables_loop_check_setelem; + + set->ops->walk(ctx, set, &iter); + if (iter.err < 0) + return iter.err; + } + } + + return 0; +} + /** * nft_validate_input_register - validate an expressions' input register * @@ -1500,11 +2553,25 @@ int nft_validate_data_load(const struct nft_ctx *ctx, enum nft_registers reg, const struct nft_data *data, enum nft_data_types type) { + int err; + switch (reg) { case NFT_REG_VERDICT: if (data == NULL || type != NFT_DATA_VERDICT) return -EINVAL; - // FIXME: do loop detection + + if (data->verdict == NFT_GOTO || data->verdict == NFT_JUMP) { + err = nf_tables_check_loops(ctx, data->chain); + if (err < 0) + return err; + + if (ctx->chain->level + 1 > data->chain->level) { + if (ctx->chain->level + 1 == NFT_JUMP_STACK_SIZE) + return -EMLINK; + data->chain->level = ctx->chain->level + 1; + } + } + return 0; default: if (data != NULL && type != NFT_DATA_VALUE) @@ -1555,11 +2622,6 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, if (chain->flags & NFT_BASE_CHAIN) return -EOPNOTSUPP; - if (ctx->chain->level + 1 > chain->level) { - if (ctx->chain->level + 1 == 16) - return -EMLINK; - chain->level = ctx->chain->level + 1; - } chain->use++; data->chain = chain; desc->len = sizeof(data); diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index bc7fb85d4002..fd0ecd3255c1 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -20,8 +20,6 @@ #include #include -#define NFT_JUMP_STACK_SIZE 16 - unsigned int nft_do_chain(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 67cc502881f1..3d3f8fce10a5 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2008-2009 Patrick McHardy * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -21,11 +21,6 @@ struct nft_hash { struct hlist_head *hash; unsigned int hsize; - enum nft_registers sreg:8; - enum nft_registers dreg:8; - u8 klen; - u8 dlen; - u16 flags; }; struct nft_hash_elem { @@ -42,213 +37,140 @@ static unsigned int nft_hash_data(const struct nft_data *data, { unsigned int h; - // FIXME: can we reasonably guarantee the upper bits are fixed? - h = jhash2(data->data, len >> 2, nft_hash_rnd); + h = jhash(data->data, len, nft_hash_rnd); return ((u64)h * hsize) >> 32; } -static void nft_hash_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], - const struct nft_pktinfo *pkt) +static bool nft_hash_lookup(const struct nft_set *set, + const struct nft_data *key, + struct nft_data *data) { - const struct nft_hash *priv = nft_expr_priv(expr); - const struct nft_hash_elem *elem; - const struct nft_data *key = &data[priv->sreg]; + const struct nft_hash *priv = nft_set_priv(set); + const struct nft_hash_elem *he; unsigned int h; - h = nft_hash_data(key, priv->hsize, priv->klen); - hlist_for_each_entry(elem, &priv->hash[h], hnode) { - if (nft_data_cmp(&elem->key, key, priv->klen)) + h = nft_hash_data(key, priv->hsize, set->klen); + hlist_for_each_entry(he, &priv->hash[h], hnode) { + if (nft_data_cmp(&he->key, key, set->klen)) continue; - if (priv->flags & NFT_HASH_MAP) - nft_data_copy(&data[priv->dreg], elem->data); - return; + if (set->flags & NFT_SET_MAP) + nft_data_copy(data, he->data); + return true; } - data[NFT_REG_VERDICT].verdict = NFT_BREAK; + return false; } -static void nft_hash_elem_destroy(const struct nft_expr *expr, - struct nft_hash_elem *elem) +static void nft_hash_elem_destroy(const struct nft_set *set, + struct nft_hash_elem *he) { - const struct nft_hash *priv = nft_expr_priv(expr); - - nft_data_uninit(&elem->key, NFT_DATA_VALUE); - if (priv->flags & NFT_HASH_MAP) - nft_data_uninit(elem->data, nft_dreg_to_type(priv->dreg)); - kfree(elem); + nft_data_uninit(&he->key, NFT_DATA_VALUE); + if (set->flags & NFT_SET_MAP) + nft_data_uninit(he->data, set->dtype); + kfree(he); } -static const struct nla_policy nft_he_policy[NFTA_HE_MAX + 1] = { - [NFTA_HE_KEY] = { .type = NLA_NESTED }, - [NFTA_HE_DATA] = { .type = NLA_NESTED }, -}; - -static int nft_hash_elem_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr *nla, - struct nft_hash_elem **new) +static int nft_hash_insert(const struct nft_set *set, + const struct nft_set_elem *elem) { - struct nft_hash *priv = nft_expr_priv(expr); - struct nlattr *tb[NFTA_HE_MAX + 1]; - struct nft_hash_elem *elem; - struct nft_data_desc d1, d2; - unsigned int size; - int err; + struct nft_hash *priv = nft_set_priv(set); + struct nft_hash_elem *he; + unsigned int size, h; - err = nla_parse_nested(tb, NFTA_HE_MAX, nla, nft_he_policy); - if (err < 0) - return err; - - if (tb[NFTA_HE_KEY] == NULL) + if (elem->flags != 0) return -EINVAL; - size = sizeof(*elem); - if (priv->flags & NFT_HASH_MAP) { - if (tb[NFTA_HE_DATA] == NULL) - return -EINVAL; - size += sizeof(elem->data[0]); - } else { - if (tb[NFTA_HE_DATA] != NULL) - return -EINVAL; - } + size = sizeof(*he); + if (set->flags & NFT_SET_MAP) + size += sizeof(he->data[0]); - elem = kzalloc(size, GFP_KERNEL); - if (elem == NULL) + he = kzalloc(size, GFP_KERNEL); + if (he == NULL) return -ENOMEM; - err = nft_data_init(ctx, &elem->key, &d1, tb[NFTA_HE_KEY]); - if (err < 0) - goto err1; - err = -EINVAL; - if (d1.type != NFT_DATA_VALUE || d1.len != priv->klen) - goto err2; + nft_data_copy(&he->key, &elem->key); + if (set->flags & NFT_SET_MAP) + nft_data_copy(he->data, &elem->data); - if (tb[NFTA_HE_DATA] != NULL) { - err = nft_data_init(ctx, elem->data, &d2, tb[NFTA_HE_DATA]); - if (err < 0) - goto err2; - err = nft_validate_data_load(ctx, priv->dreg, elem->data, d2.type); - if (err < 0) - goto err3; - } - - *new = elem; + h = nft_hash_data(&he->key, priv->hsize, set->klen); + hlist_add_head_rcu(&he->hnode, &priv->hash[h]); return 0; - -err3: - nft_data_uninit(elem->data, d2.type); -err2: - nft_data_uninit(&elem->key, d1.type); -err1: - kfree(elem); - return err; } -static int nft_hash_elem_dump(struct sk_buff *skb, const struct nft_expr *expr, - const struct nft_hash_elem *elem) - +static void nft_hash_remove(const struct nft_set *set, + const struct nft_set_elem *elem) { - const struct nft_hash *priv = nft_expr_priv(expr); - struct nlattr *nest; + struct nft_hash_elem *he = elem->cookie; - nest = nla_nest_start(skb, NFTA_LIST_ELEM); - if (nest == NULL) - goto nla_put_failure; - - if (nft_data_dump(skb, NFTA_HE_KEY, &elem->key, - NFT_DATA_VALUE, priv->klen) < 0) - goto nla_put_failure; - - if (priv->flags & NFT_HASH_MAP) { - if (nft_data_dump(skb, NFTA_HE_DATA, elem->data, - NFT_DATA_VALUE, priv->dlen) < 0) - goto nla_put_failure; - } - - nla_nest_end(skb, nest); - return 0; - -nla_put_failure: - return -1; + hlist_del_rcu(&he->hnode); + kfree(he); } -static void nft_hash_destroy(const struct nft_ctx *ctx, - const struct nft_expr *expr) +static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem) { - const struct nft_hash *priv = nft_expr_priv(expr); - const struct hlist_node *next; - struct nft_hash_elem *elem; + const struct nft_hash *priv = nft_set_priv(set); + struct nft_hash_elem *he; + unsigned int h; + + h = nft_hash_data(&elem->key, priv->hsize, set->klen); + hlist_for_each_entry(he, &priv->hash[h], hnode) { + if (nft_data_cmp(&he->key, &elem->key, set->klen)) + continue; + + elem->cookie = he; + elem->flags = 0; + if (set->flags & NFT_SET_MAP) + nft_data_copy(&elem->data, he->data); + return 0; + } + return -ENOENT; +} + +static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, + struct nft_set_iter *iter) +{ + const struct nft_hash *priv = nft_set_priv(set); + const struct nft_hash_elem *he; + struct nft_set_elem elem; unsigned int i; for (i = 0; i < priv->hsize; i++) { - hlist_for_each_entry_safe(elem, next, &priv->hash[i], hnode) { - hlist_del(&elem->hnode); - nft_hash_elem_destroy(expr, elem); + hlist_for_each_entry(he, &priv->hash[i], hnode) { + if (iter->count < iter->skip) + goto cont; + + memcpy(&elem.key, &he->key, sizeof(elem.key)); + if (set->flags & NFT_SET_MAP) + memcpy(&elem.data, he->data, sizeof(elem.data)); + elem.flags = 0; + + iter->err = iter->fn(ctx, set, iter, &elem); + if (iter->err < 0) + return; +cont: + iter->count++; } } - kfree(priv->hash); } -static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = { - [NFTA_HASH_FLAGS] = { .type = NLA_U32 }, - [NFTA_HASH_SREG] = { .type = NLA_U32 }, - [NFTA_HASH_DREG] = { .type = NLA_U32 }, - [NFTA_HASH_KLEN] = { .type = NLA_U32 }, - [NFTA_HASH_ELEMENTS] = { .type = NLA_NESTED }, -}; +static unsigned int nft_hash_privsize(const struct nlattr * const nla[]) +{ + return sizeof(struct nft_hash); +} -static int nft_hash_init(const struct nft_ctx *ctx, const struct nft_expr *expr, +static int nft_hash_init(const struct nft_set *set, const struct nlattr * const tb[]) { - struct nft_hash *priv = nft_expr_priv(expr); - struct nft_hash_elem *elem, *uninitialized_var(new); - const struct nlattr *nla; + struct nft_hash *priv = nft_set_priv(set); unsigned int cnt, i; - unsigned int h; - int err, rem; if (unlikely(!nft_hash_rnd_initted)) { get_random_bytes(&nft_hash_rnd, 4); nft_hash_rnd_initted = true; } - if (tb[NFTA_HASH_SREG] == NULL || - tb[NFTA_HASH_KLEN] == NULL || - tb[NFTA_HASH_ELEMENTS] == NULL) - return -EINVAL; - - if (tb[NFTA_HASH_FLAGS] != NULL) { - priv->flags = ntohl(nla_get_be32(tb[NFTA_HASH_FLAGS])); - if (priv->flags & ~NFT_HASH_MAP) - return -EINVAL; - } - - priv->sreg = ntohl(nla_get_be32(tb[NFTA_HASH_SREG])); - err = nft_validate_input_register(priv->sreg); - if (err < 0) - return err; - - if (tb[NFTA_HASH_DREG] != NULL) { - if (!(priv->flags & NFT_HASH_MAP)) - return -EINVAL; - priv->dreg = ntohl(nla_get_be32(tb[NFTA_HASH_DREG])); - err = nft_validate_output_register(priv->dreg); - if (err < 0) - return err; - } - - priv->klen = ntohl(nla_get_be32(tb[NFTA_HASH_KLEN])); - if (priv->klen == 0) - return -EINVAL; - - cnt = 0; - nla_for_each_nested(nla, tb[NFTA_HASH_ELEMENTS], rem) { - if (nla_type(nla) != NFTA_LIST_ELEM) - return -EINVAL; - cnt++; - } - /* Aim for a load factor of 0.75 */ + // FIXME: temporarily broken until we have set descriptions + cnt = 100; cnt = cnt * 4 / 3; priv->hash = kcalloc(cnt, sizeof(struct hlist_head), GFP_KERNEL); @@ -259,85 +181,46 @@ static int nft_hash_init(const struct nft_ctx *ctx, const struct nft_expr *expr, for (i = 0; i < cnt; i++) INIT_HLIST_HEAD(&priv->hash[i]); - err = -ENOMEM; - nla_for_each_nested(nla, tb[NFTA_HASH_ELEMENTS], rem) { - err = nft_hash_elem_init(ctx, expr, nla, &new); - if (err < 0) - goto err1; - - h = nft_hash_data(&new->key, priv->hsize, priv->klen); - hlist_for_each_entry(elem, &priv->hash[h], hnode) { - if (nft_data_cmp(&elem->key, &new->key, priv->klen)) - continue; - nft_hash_elem_destroy(expr, new); - err = -EEXIST; - goto err1; - } - hlist_add_head(&new->hnode, &priv->hash[h]); - } return 0; - -err1: - nft_hash_destroy(ctx, expr); - return err; } -static int nft_hash_dump(struct sk_buff *skb, const struct nft_expr *expr) +static void nft_hash_destroy(const struct nft_set *set) { - const struct nft_hash *priv = nft_expr_priv(expr); - const struct nft_hash_elem *elem; - struct nlattr *list; + const struct nft_hash *priv = nft_set_priv(set); + const struct hlist_node *next; + struct nft_hash_elem *elem; unsigned int i; - if (priv->flags) - if (nla_put_be32(skb, NFTA_HASH_FLAGS, htonl(priv->flags))) - goto nla_put_failure; - if (nla_put_be32(skb, NFTA_HASH_SREG, htonl(priv->sreg))) - goto nla_put_failure; - if (priv->flags & NFT_HASH_MAP) - if (nla_put_be32(skb, NFTA_HASH_DREG, htonl(priv->dreg))) - goto nla_put_failure; - if (nla_put_be32(skb, NFTA_HASH_KLEN, htonl(priv->klen))) - goto nla_put_failure; - - list = nla_nest_start(skb, NFTA_HASH_ELEMENTS); - if (list == NULL) - goto nla_put_failure; - for (i = 0; i < priv->hsize; i++) { - hlist_for_each_entry(elem, &priv->hash[i], hnode) { - if (nft_hash_elem_dump(skb, expr, elem) < 0) - goto nla_put_failure; + hlist_for_each_entry_safe(elem, next, &priv->hash[i], hnode) { + hlist_del(&elem->hnode); + nft_hash_elem_destroy(set, elem); } } - - nla_nest_end(skb, list); - return 0; - -nla_put_failure: - return -1; + kfree(priv->hash); } -static struct nft_expr_ops nft_hash_ops __read_mostly = { - .name = "hash", - .size = NFT_EXPR_SIZE(sizeof(struct nft_hash)), - .owner = THIS_MODULE, - .eval = nft_hash_eval, +static struct nft_set_ops nft_hash_ops __read_mostly = { + .privsize = nft_hash_privsize, .init = nft_hash_init, .destroy = nft_hash_destroy, - .dump = nft_hash_dump, - .policy = nft_hash_policy, - .maxattr = NFTA_HASH_MAX, + .get = nft_hash_get, + .insert = nft_hash_insert, + .remove = nft_hash_remove, + .lookup = nft_hash_lookup, + .walk = nft_hash_walk, + .features = NFT_SET_MAP, + .owner = THIS_MODULE, }; static int __init nft_hash_module_init(void) { - return nft_register_expr(&nft_hash_ops); + return nft_register_set(&nft_hash_ops); } static void __exit nft_hash_module_exit(void) { - nft_unregister_expr(&nft_hash_ops); + nft_unregister_set(&nft_hash_ops); } module_init(nft_hash_module_init); @@ -345,4 +228,4 @@ module_exit(nft_hash_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy "); -MODULE_ALIAS_NFT_EXPR("hash"); +MODULE_ALIAS_NFT_SET(); diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 3bf42c3cc49a..78334bf37007 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -90,6 +90,16 @@ nla_put_failure: return -1; } +static const struct nft_data *nft_immediate_get_verdict(const struct nft_expr *expr) +{ + const struct nft_immediate_expr *priv = nft_expr_priv(expr); + + if (priv->dreg == NFT_REG_VERDICT) + return &priv->data; + else + return NULL; +} + static struct nft_expr_ops nft_imm_ops __read_mostly = { .name = "immediate", .size = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)), @@ -98,6 +108,7 @@ static struct nft_expr_ops nft_imm_ops __read_mostly = { .init = nft_immediate_init, .destroy = nft_immediate_destroy, .dump = nft_immediate_dump, + .get_verdict = nft_immediate_get_verdict, .policy = nft_immediate_policy, .maxattr = NFTA_IMMEDIATE_MAX, }; diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c new file mode 100644 index 000000000000..4962d2173678 --- /dev/null +++ b/net/netfilter/nft_lookup.c @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2009 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_lookup { + struct nft_set *set; + enum nft_registers sreg:8; + enum nft_registers dreg:8; + struct nft_set_binding binding; +}; + +static void nft_lookup_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_lookup *priv = nft_expr_priv(expr); + const struct nft_set *set = priv->set; + + if (set->ops->lookup(set, &data[priv->sreg], &data[priv->dreg])) + return; + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = { + [NFTA_LOOKUP_SET] = { .type = NLA_STRING }, + [NFTA_LOOKUP_SREG] = { .type = NLA_U32 }, + [NFTA_LOOKUP_DREG] = { .type = NLA_U32 }, +}; + +static int nft_lookup_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_lookup *priv = nft_expr_priv(expr); + struct nft_set *set; + int err; + + if (tb[NFTA_LOOKUP_SET] == NULL || + tb[NFTA_LOOKUP_SREG] == NULL) + return -EINVAL; + + set = nf_tables_set_lookup(ctx->table, tb[NFTA_LOOKUP_SET]); + if (IS_ERR(set)) + return PTR_ERR(set); + + priv->sreg = ntohl(nla_get_be32(tb[NFTA_LOOKUP_SREG])); + err = nft_validate_input_register(priv->sreg); + if (err < 0) + return err; + + if (tb[NFTA_LOOKUP_DREG] != NULL) { + if (!(set->flags & NFT_SET_MAP)) + return -EINVAL; + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_LOOKUP_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + + if (priv->dreg == NFT_REG_VERDICT) { + if (set->dtype != NFT_DATA_VERDICT) + return -EINVAL; + } else if (set->dtype == NFT_DATA_VERDICT) + return -EINVAL; + } else if (set->flags & NFT_SET_MAP) + return -EINVAL; + + err = nf_tables_bind_set(ctx, set, &priv->binding); + if (err < 0) + return err; + + priv->set = set; + return 0; +} + +static void nft_lookup_destroy(const struct nft_expr *expr) +{ + struct nft_lookup *priv = nft_expr_priv(expr); + + nf_tables_unbind_set(NULL, priv->set, &priv->binding); +} + +static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_lookup *priv = nft_expr_priv(expr); + + if (nla_put_string(skb, NFTA_LOOKUP_SET, priv->set->name)) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_LOOKUP_SREG, htonl(priv->sreg))) + goto nla_put_failure; + if (priv->set->flags & NFT_SET_MAP) + if (nla_put_be32(skb, NFTA_LOOKUP_DREG, htonl(priv->dreg))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_lookup_ops __read_mostly = { + .name = "lookup", + .size = NFT_EXPR_SIZE(sizeof(struct nft_lookup)), + .owner = THIS_MODULE, + .eval = nft_lookup_eval, + .init = nft_lookup_init, + .destroy = nft_lookup_destroy, + .dump = nft_lookup_dump, + .policy = nft_lookup_policy, + .maxattr = NFTA_LOOKUP_MAX, +}; + +int __init nft_lookup_module_init(void) +{ + return nft_register_expr(&nft_lookup_ops); +} + +void nft_lookup_module_exit(void) +{ + nft_unregister_expr(&nft_lookup_ops); +} diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c new file mode 100644 index 000000000000..ca0c1b231bfe --- /dev/null +++ b/net/netfilter/nft_rbtree.c @@ -0,0 +1,247 @@ +/* + * Copyright (c) 2008-2009 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_rbtree { + struct rb_root root; +}; + +struct nft_rbtree_elem { + struct rb_node node; + u16 flags; + struct nft_data key; + struct nft_data data[]; +}; + +static bool nft_rbtree_lookup(const struct nft_set *set, + const struct nft_data *key, + struct nft_data *data) +{ + const struct nft_rbtree *priv = nft_set_priv(set); + const struct nft_rbtree_elem *rbe, *interval = NULL; + const struct rb_node *parent = priv->root.rb_node; + int d; + + while (parent != NULL) { + rbe = rb_entry(parent, struct nft_rbtree_elem, node); + + d = nft_data_cmp(&rbe->key, key, set->klen); + if (d < 0) { + parent = parent->rb_left; + interval = rbe; + } else if (d > 0) + parent = parent->rb_right; + else { +found: + if (rbe->flags & NFT_SET_ELEM_INTERVAL_END) + goto out; + if (set->flags & NFT_SET_MAP) + nft_data_copy(data, rbe->data); + return true; + } + } + + if (set->flags & NFT_SET_INTERVAL && interval != NULL) { + rbe = interval; + goto found; + } +out: + return false; +} + +static void nft_rbtree_elem_destroy(const struct nft_set *set, + struct nft_rbtree_elem *rbe) +{ + nft_data_uninit(&rbe->key, NFT_DATA_VALUE); + if (set->flags & NFT_SET_MAP) + nft_data_uninit(rbe->data, set->dtype); + kfree(rbe); +} + +static int __nft_rbtree_insert(const struct nft_set *set, + struct nft_rbtree_elem *new) +{ + struct nft_rbtree *priv = nft_set_priv(set); + struct nft_rbtree_elem *rbe; + struct rb_node *parent, **p; + int d; + + parent = NULL; + p = &priv->root.rb_node; + while (*p != NULL) { + parent = *p; + rbe = rb_entry(parent, struct nft_rbtree_elem, node); + d = nft_data_cmp(&rbe->key, &new->key, set->klen); + if (d < 0) + p = &parent->rb_left; + else if (d > 0) + p = &parent->rb_right; + else + return -EEXIST; + } + rb_link_node(&new->node, parent, p); + rb_insert_color(&new->node, &priv->root); + return 0; +} + +static int nft_rbtree_insert(const struct nft_set *set, + const struct nft_set_elem *elem) +{ + struct nft_rbtree_elem *rbe; + unsigned int size; + int err; + + size = sizeof(*rbe); + if (set->flags & NFT_SET_MAP) + size += sizeof(rbe->data[0]); + + rbe = kzalloc(size, GFP_KERNEL); + if (rbe == NULL) + return -ENOMEM; + + rbe->flags = elem->flags; + nft_data_copy(&rbe->key, &elem->key); + if (set->flags & NFT_SET_MAP) + nft_data_copy(rbe->data, &elem->data); + + err = __nft_rbtree_insert(set, rbe); + if (err < 0) + kfree(rbe); + return err; +} + +static void nft_rbtree_remove(const struct nft_set *set, + const struct nft_set_elem *elem) +{ + struct nft_rbtree *priv = nft_set_priv(set); + struct nft_rbtree_elem *rbe = elem->cookie; + + rb_erase(&rbe->node, &priv->root); + kfree(rbe); +} + +static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem) +{ + const struct nft_rbtree *priv = nft_set_priv(set); + const struct rb_node *parent = priv->root.rb_node; + struct nft_rbtree_elem *rbe; + int d; + + while (parent != NULL) { + rbe = rb_entry(parent, struct nft_rbtree_elem, node); + + d = nft_data_cmp(&rbe->key, &elem->key, set->klen); + if (d < 0) + parent = parent->rb_left; + else if (d > 0) + parent = parent->rb_right; + else { + elem->cookie = rbe; + if (set->flags & NFT_SET_MAP) + nft_data_copy(&elem->data, rbe->data); + elem->flags = rbe->flags; + return 0; + } + } + return -ENOENT; +} + +static void nft_rbtree_walk(const struct nft_ctx *ctx, + const struct nft_set *set, + struct nft_set_iter *iter) +{ + const struct nft_rbtree *priv = nft_set_priv(set); + const struct nft_rbtree_elem *rbe; + struct nft_set_elem elem; + struct rb_node *node; + + for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) { + if (iter->count < iter->skip) + goto cont; + + rbe = rb_entry(node, struct nft_rbtree_elem, node); + nft_data_copy(&elem.key, &rbe->key); + if (set->flags & NFT_SET_MAP) + nft_data_copy(&elem.data, rbe->data); + elem.flags = rbe->flags; + + iter->err = iter->fn(ctx, set, iter, &elem); + if (iter->err < 0) + return; +cont: + iter->count++; + } +} + +static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[]) +{ + return sizeof(struct nft_rbtree); +} + +static int nft_rbtree_init(const struct nft_set *set, + const struct nlattr * const nla[]) +{ + struct nft_rbtree *priv = nft_set_priv(set); + + priv->root = RB_ROOT; + return 0; +} + +static void nft_rbtree_destroy(const struct nft_set *set) +{ + struct nft_rbtree *priv = nft_set_priv(set); + struct nft_rbtree_elem *rbe; + struct rb_node *node; + + while ((node = priv->root.rb_node) != NULL) { + rb_erase(node, &priv->root); + rbe = rb_entry(node, struct nft_rbtree_elem, node); + nft_rbtree_elem_destroy(set, rbe); + } +} + +static struct nft_set_ops nft_rbtree_ops __read_mostly = { + .privsize = nft_rbtree_privsize, + .init = nft_rbtree_init, + .destroy = nft_rbtree_destroy, + .insert = nft_rbtree_insert, + .remove = nft_rbtree_remove, + .get = nft_rbtree_get, + .lookup = nft_rbtree_lookup, + .walk = nft_rbtree_walk, + .features = NFT_SET_INTERVAL | NFT_SET_MAP, + .owner = THIS_MODULE, +}; + +static int __init nft_rbtree_module_init(void) +{ + return nft_register_set(&nft_rbtree_ops); +} + +static void __exit nft_rbtree_module_exit(void) +{ + nft_unregister_set(&nft_rbtree_ops); +} + +module_init(nft_rbtree_module_init); +module_exit(nft_rbtree_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_SET(); diff --git a/net/netfilter/nft_set.c b/net/netfilter/nft_set.c deleted file mode 100644 index 7b7c8354c327..000000000000 --- a/net/netfilter/nft_set.c +++ /dev/null @@ -1,381 +0,0 @@ -/* - * Copyright (c) 2008 Patrick McHardy - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Development of this code funded by Astaro AG (http://www.astaro.com/) - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct nft_set { - struct rb_root root; - enum nft_registers sreg:8; - enum nft_registers dreg:8; - u8 klen; - u8 dlen; - u16 flags; -}; - -struct nft_set_elem { - struct rb_node node; - enum nft_set_elem_flags flags; - struct nft_data key; - struct nft_data data[]; -}; - -static void nft_set_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], - const struct nft_pktinfo *pkt) -{ - const struct nft_set *priv = nft_expr_priv(expr); - const struct rb_node *parent = priv->root.rb_node; - const struct nft_set_elem *elem, *interval = NULL; - const struct nft_data *key = &data[priv->sreg]; - int d; - - while (parent != NULL) { - elem = rb_entry(parent, struct nft_set_elem, node); - - d = nft_data_cmp(&elem->key, key, priv->klen); - if (d < 0) { - parent = parent->rb_left; - interval = elem; - } else if (d > 0) - parent = parent->rb_right; - else { -found: - if (elem->flags & NFT_SE_INTERVAL_END) - goto out; - if (priv->flags & NFT_SET_MAP) - nft_data_copy(&data[priv->dreg], elem->data); - return; - } - } - - if (priv->flags & NFT_SET_INTERVAL && interval != NULL) { - elem = interval; - goto found; - } -out: - data[NFT_REG_VERDICT].verdict = NFT_BREAK; -} - -static void nft_set_elem_destroy(const struct nft_expr *expr, - struct nft_set_elem *elem) -{ - const struct nft_set *priv = nft_expr_priv(expr); - - nft_data_uninit(&elem->key, NFT_DATA_VALUE); - if (priv->flags & NFT_SET_MAP) - nft_data_uninit(elem->data, nft_dreg_to_type(priv->dreg)); - kfree(elem); -} - -static const struct nla_policy nft_se_policy[NFTA_SE_MAX + 1] = { - [NFTA_SE_KEY] = { .type = NLA_NESTED }, - [NFTA_SE_DATA] = { .type = NLA_NESTED }, - [NFTA_SE_FLAGS] = { .type = NLA_U32 }, -}; - -static int nft_set_elem_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr *nla, - struct nft_set_elem **new) -{ - struct nft_set *priv = nft_expr_priv(expr); - struct nlattr *tb[NFTA_SE_MAX + 1]; - struct nft_set_elem *elem; - struct nft_data_desc d1, d2; - enum nft_set_elem_flags flags = 0; - unsigned int size; - int err; - - err = nla_parse_nested(tb, NFTA_SE_MAX, nla, nft_se_policy); - if (err < 0) - return err; - - if (tb[NFTA_SE_KEY] == NULL) - return -EINVAL; - - if (tb[NFTA_SE_FLAGS] != NULL) { - flags = ntohl(nla_get_be32(tb[NFTA_SE_FLAGS])); - if (flags & ~NFT_SE_INTERVAL_END) - return -EINVAL; - } - - size = sizeof(*elem); - if (priv->flags & NFT_SET_MAP) { - if (tb[NFTA_SE_DATA] == NULL && !(flags & NFT_SE_INTERVAL_END)) - return -EINVAL; - size += sizeof(elem->data[0]); - } else { - if (tb[NFTA_SE_DATA] != NULL) - return -EINVAL; - } - - elem = kzalloc(size, GFP_KERNEL); - if (elem == NULL) - return -ENOMEM; - elem->flags = flags; - - err = nft_data_init(ctx, &elem->key, &d1, tb[NFTA_SE_KEY]); - if (err < 0) - goto err1; - err = -EINVAL; - if (d1.type != NFT_DATA_VALUE || d1.len != priv->klen) - goto err2; - - if (tb[NFTA_SE_DATA] != NULL) { - err = nft_data_init(ctx, elem->data, &d2, tb[NFTA_SE_DATA]); - if (err < 0) - goto err2; - err = -EINVAL; - if (priv->dreg != NFT_REG_VERDICT && d2.len != priv->dlen) - goto err2; - err = nft_validate_data_load(ctx, priv->dreg, elem->data, d2.type); - if (err < 0) - goto err3; - } - - *new = elem; - return 0; - -err3: - nft_data_uninit(elem->data, d2.type); -err2: - nft_data_uninit(&elem->key, d1.type); -err1: - kfree(elem); - return err; -} - -static int nft_set_elem_dump(struct sk_buff *skb, const struct nft_expr *expr, - const struct nft_set_elem *elem) - -{ - const struct nft_set *priv = nft_expr_priv(expr); - struct nlattr *nest; - - nest = nla_nest_start(skb, NFTA_LIST_ELEM); - if (nest == NULL) - goto nla_put_failure; - - if (nft_data_dump(skb, NFTA_SE_KEY, &elem->key, - NFT_DATA_VALUE, priv->klen) < 0) - goto nla_put_failure; - - if (priv->flags & NFT_SET_MAP && !(elem->flags & NFT_SE_INTERVAL_END)) { - if (nft_data_dump(skb, NFTA_SE_DATA, elem->data, - nft_dreg_to_type(priv->dreg), priv->dlen) < 0) - goto nla_put_failure; - } - - if (elem->flags){ - if (nla_put_be32(skb, NFTA_SE_FLAGS, htonl(elem->flags))) - goto nla_put_failure; - } - - nla_nest_end(skb, nest); - return 0; - -nla_put_failure: - return -1; -} - -static void nft_set_destroy(const struct nft_expr *expr) -{ - struct nft_set *priv = nft_expr_priv(expr); - struct nft_set_elem *elem; - struct rb_node *node; - - while ((node = priv->root.rb_node) != NULL) { - rb_erase(node, &priv->root); - elem = rb_entry(node, struct nft_set_elem, node); - nft_set_elem_destroy(expr, elem); - } -} - -static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = { - [NFTA_SET_FLAGS] = { .type = NLA_U32 }, - [NFTA_SET_SREG] = { .type = NLA_U32 }, - [NFTA_SET_DREG] = { .type = NLA_U32 }, - [NFTA_SET_KLEN] = { .type = NLA_U32 }, - [NFTA_SET_DLEN] = { .type = NLA_U32 }, - [NFTA_SET_ELEMENTS] = { .type = NLA_NESTED }, -}; - -static int nft_set_init(const struct nft_ctx *ctx, const struct nft_expr *expr, - const struct nlattr * const tb[]) -{ - struct nft_set *priv = nft_expr_priv(expr); - struct nft_set_elem *elem, *uninitialized_var(new); - struct rb_node *parent, **p; - const struct nlattr *nla; - int err, rem, d; - - if (tb[NFTA_SET_SREG] == NULL || - tb[NFTA_SET_KLEN] == NULL || - tb[NFTA_SET_ELEMENTS] == NULL) - return -EINVAL; - - priv->root = RB_ROOT; - - if (tb[NFTA_SET_FLAGS] != NULL) { - priv->flags = ntohl(nla_get_be32(tb[NFTA_SET_FLAGS])); - if (priv->flags & ~(NFT_SET_INTERVAL | NFT_SET_MAP)) - return -EINVAL; - } - - priv->sreg = ntohl(nla_get_be32(tb[NFTA_SET_SREG])); - err = nft_validate_input_register(priv->sreg); - if (err < 0) - return err; - - if (tb[NFTA_SET_DREG] != NULL) { - if (!(priv->flags & NFT_SET_MAP)) - return -EINVAL; - if (tb[NFTA_SET_DLEN] == NULL) - return -EINVAL; - - priv->dreg = ntohl(nla_get_be32(tb[NFTA_SET_DREG])); - err = nft_validate_output_register(priv->dreg); - if (err < 0) - return err; - - if (priv->dreg == NFT_REG_VERDICT) - priv->dlen = FIELD_SIZEOF(struct nft_data, data); - else { - priv->dlen = ntohl(nla_get_be32(tb[NFTA_SET_DLEN])); - if (priv->dlen == 0 || - priv->dlen > FIELD_SIZEOF(struct nft_data, data)) - return -EINVAL; - } - } else { - if (priv->flags & NFT_SET_MAP) - return -EINVAL; - if (tb[NFTA_SET_DLEN] != NULL) - return -EINVAL; - } - - priv->klen = ntohl(nla_get_be32(tb[NFTA_SET_KLEN])); - if (priv->klen == 0 || - priv->klen > FIELD_SIZEOF(struct nft_data, data)) - return -EINVAL; - - nla_for_each_nested(nla, tb[NFTA_SET_ELEMENTS], rem) { - err = -EINVAL; - if (nla_type(nla) != NFTA_LIST_ELEM) - goto err1; - - err = nft_set_elem_init(ctx, expr, nla, &new); - if (err < 0) - goto err1; - - parent = NULL; - p = &priv->root.rb_node; - while (*p != NULL) { - parent = *p; - elem = rb_entry(parent, struct nft_set_elem, node); - d = nft_data_cmp(&elem->key, &new->key, priv->klen); - if (d < 0) - p = &parent->rb_left; - else if (d > 0) - p = &parent->rb_right; - else { - err = -EEXIST; - goto err2; - } - } - rb_link_node(&new->node, parent, p); - rb_insert_color(&new->node, &priv->root); - } - - return 0; - -err2: - nft_set_elem_destroy(expr, new); -err1: - nft_set_destroy(expr); - return err; -} - -static int nft_set_dump(struct sk_buff *skb, const struct nft_expr *expr) -{ - struct nft_set *priv = nft_expr_priv(expr); - const struct nft_set_elem *elem; - struct rb_node *node; - struct nlattr *list; - - if (priv->flags) { - if (nla_put_be32(skb, NFTA_SET_FLAGS, htonl(priv->flags))) - goto nla_put_failure; - } - - if (nla_put_be32(skb, NFTA_SET_SREG, htonl(priv->sreg))) - goto nla_put_failure; - if (nla_put_be32(skb, NFTA_SET_KLEN, htonl(priv->klen))) - goto nla_put_failure; - - if (priv->flags & NFT_SET_MAP) { - if (nla_put_be32(skb, NFTA_SET_DREG, htonl(priv->dreg))) - goto nla_put_failure; - if (nla_put_be32(skb, NFTA_SET_DLEN, htonl(priv->dlen))) - goto nla_put_failure; - } - - list = nla_nest_start(skb, NFTA_SET_ELEMENTS); - if (list == NULL) - goto nla_put_failure; - - for (node = rb_first(&priv->root); node; node = rb_next(node)) { - elem = rb_entry(node, struct nft_set_elem, node); - if (nft_set_elem_dump(skb, expr, elem) < 0) - goto nla_put_failure; - } - - nla_nest_end(skb, list); - return 0; - -nla_put_failure: - return -1; -} - -static struct nft_expr_ops nft_set_ops __read_mostly = { - .name = "set", - .size = NFT_EXPR_SIZE(sizeof(struct nft_set)), - .owner = THIS_MODULE, - .eval = nft_set_eval, - .init = nft_set_init, - .destroy = nft_set_destroy, - .dump = nft_set_dump, - .policy = nft_set_policy, - .maxattr = NFTA_SET_MAX, -}; - -static int __init nft_set_module_init(void) -{ - return nft_register_expr(&nft_set_ops); -} - -static void __exit nft_set_module_exit(void) -{ - nft_unregister_expr(&nft_set_ops); -} - -module_init(nft_set_module_init); -module_exit(nft_set_module_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Patrick McHardy "); -MODULE_ALIAS_NFT_EXPR("set"); From ef1f7df9170dbd875ce198ba84e6ab80f6fc139e Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 10 Oct 2013 11:41:20 +0200 Subject: [PATCH 05/17] netfilter: nf_tables: expression ops overloading Split the expression ops into two parts and support overloading of the runtime expression ops based on the requested function through a ->select_ops() callback. This can be used to provide optimized implementations, for instance for loading small aligned amounts of data from the packet or inlining frequently used operations into the main evaluation loop. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 42 ++++++---- net/ipv4/netfilter/nf_table_nat_ipv4.c | 18 +++-- net/ipv4/netfilter/nft_reject_ipv4.c | 18 +++-- net/netfilter/nf_tables_api.c | 101 ++++++++++++++----------- net/netfilter/nft_bitwise.c | 18 +++-- net/netfilter/nft_byteorder.c | 18 +++-- net/netfilter/nft_cmp.c | 18 +++-- net/netfilter/nft_counter.c | 22 ++++-- net/netfilter/nft_ct.c | 18 +++-- net/netfilter/nft_expr_template.c | 20 +++-- net/netfilter/nft_exthdr.c | 16 ++-- net/netfilter/nft_immediate.c | 18 +++-- net/netfilter/nft_limit.c | 18 +++-- net/netfilter/nft_log.c | 18 +++-- net/netfilter/nft_lookup.c | 16 ++-- net/netfilter/nft_meta.c | 18 +++-- net/netfilter/nft_payload.c | 18 +++-- 17 files changed, 267 insertions(+), 148 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 677dd79380ed..66d0359702c6 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -222,25 +222,45 @@ extern int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, extern void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding); + /** - * struct nft_expr_ops - nf_tables expression operations + * struct nft_expr_type - nf_tables expression type * - * @eval: Expression evaluation function - * @init: initialization function - * @destroy: destruction function - * @dump: function to dump parameters + * @select_ops: function to select nft_expr_ops + * @ops: default ops, used when no select_ops functions is present * @list: used internally * @name: Identifier * @owner: module reference * @policy: netlink attribute policy * @maxattr: highest netlink attribute number + */ +struct nft_expr_type { + const struct nft_expr_ops *(*select_ops)(const struct nlattr * const tb[]); + const struct nft_expr_ops *ops; + struct list_head list; + const char *name; + struct module *owner; + const struct nla_policy *policy; + unsigned int maxattr; +}; + +/** + * struct nft_expr_ops - nf_tables expression operations + * + * @eval: Expression evaluation function * @size: full expression size, including private data size + * @init: initialization function + * @destroy: destruction function + * @dump: function to dump parameters + * @type: expression type */ struct nft_expr; struct nft_expr_ops { void (*eval)(const struct nft_expr *expr, struct nft_data data[NFT_REG_MAX + 1], const struct nft_pktinfo *pkt); + unsigned int size; + int (*init)(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]); @@ -248,14 +268,10 @@ struct nft_expr_ops { int (*dump)(struct sk_buff *skb, const struct nft_expr *expr); const struct nft_data * (*get_verdict)(const struct nft_expr *expr); - struct list_head list; - const char *name; - struct module *owner; - const struct nla_policy *policy; - unsigned int maxattr; - unsigned int size; + const struct nft_expr_type *type; }; +#define NFT_EXPR_MAXATTR 16 #define NFT_EXPR_SIZE(size) (sizeof(struct nft_expr) + \ ALIGN(size, __alignof__(struct nft_expr))) @@ -418,8 +434,8 @@ extern void nft_unregister_afinfo(struct nft_af_info *); extern int nft_register_table(struct nft_table *, int family); extern void nft_unregister_table(struct nft_table *, int family); -extern int nft_register_expr(struct nft_expr_ops *); -extern void nft_unregister_expr(struct nft_expr_ops *); +extern int nft_register_expr(struct nft_expr_type *); +extern void nft_unregister_expr(struct nft_expr_type *); #define MODULE_ALIAS_NFT_FAMILY(family) \ MODULE_ALIAS("nft-afinfo-" __stringify(family)) diff --git a/net/ipv4/netfilter/nf_table_nat_ipv4.c b/net/ipv4/netfilter/nf_table_nat_ipv4.c index 2a6f184c10bd..2ecce39077a3 100644 --- a/net/ipv4/netfilter/nf_table_nat_ipv4.c +++ b/net/ipv4/netfilter/nf_table_nat_ipv4.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2008-2009 Patrick McHardy * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -149,15 +149,21 @@ nla_put_failure: return -1; } -static struct nft_expr_ops nft_nat_ops __read_mostly = { - .name = "nat", +static struct nft_expr_type nft_nat_type; +static const struct nft_expr_ops nft_nat_ops = { + .type = &nft_nat_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_nat)), - .owner = THIS_MODULE, .eval = nft_nat_eval, .init = nft_nat_init, .dump = nft_nat_dump, +}; + +static struct nft_expr_type nft_nat_type __read_mostly = { + .name = "nat", + .ops = &nft_nat_ops, .policy = nft_nat_policy, .maxattr = NFTA_NAT_MAX, + .owner = THIS_MODULE, }; /* @@ -382,7 +388,7 @@ static int __init nf_table_nat_init(void) if (err < 0) goto err1; - err = nft_register_expr(&nft_nat_ops); + err = nft_register_expr(&nft_nat_type); if (err < 0) goto err2; @@ -396,7 +402,7 @@ err1: static void __exit nf_table_nat_exit(void) { - nft_unregister_expr(&nft_nat_ops); + nft_unregister_expr(&nft_nat_type); nft_unregister_table(&nf_table_nat_ipv4, AF_INET); } diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c index b4ee8d3bb1e4..fff5ba1a33b7 100644 --- a/net/ipv4/netfilter/nft_reject_ipv4.c +++ b/net/ipv4/netfilter/nft_reject_ipv4.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2008-2009 Patrick McHardy * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -88,25 +88,31 @@ nla_put_failure: return -1; } -static struct nft_expr_ops reject_ops __read_mostly = { - .name = "reject", +static struct nft_expr_type nft_reject_type; +static const struct nft_expr_ops nft_reject_ops = { + .type = &nft_reject_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)), - .owner = THIS_MODULE, .eval = nft_reject_eval, .init = nft_reject_init, .dump = nft_reject_dump, +}; + +static struct nft_expr_type nft_reject_type __read_mostly = { + .name = "reject", + .ops = &nft_reject_ops, .policy = nft_reject_policy, .maxattr = NFTA_REJECT_MAX, + .owner = THIS_MODULE, }; static int __init nft_reject_module_init(void) { - return nft_register_expr(&reject_ops); + return nft_register_expr(&nft_reject_type); } static void __exit nft_reject_module_exit(void) { - nft_unregister_expr(&reject_ops); + nft_unregister_expr(&nft_reject_type); } module_init(nft_reject_module_init); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 5092c817c222..6dac9a3c9c40 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -840,64 +840,64 @@ static void nft_ctx_init(struct nft_ctx *ctx, */ /** - * nft_register_expr - register nf_tables expr operations - * @ops: expr operations + * nft_register_expr - register nf_tables expr type + * @ops: expr type * - * Registers the expr operations for use with nf_tables. Returns zero on + * Registers the expr type for use with nf_tables. Returns zero on * success or a negative errno code otherwise. */ -int nft_register_expr(struct nft_expr_ops *ops) +int nft_register_expr(struct nft_expr_type *type) { nfnl_lock(NFNL_SUBSYS_NFTABLES); - list_add_tail(&ops->list, &nf_tables_expressions); + list_add_tail(&type->list, &nf_tables_expressions); nfnl_unlock(NFNL_SUBSYS_NFTABLES); return 0; } EXPORT_SYMBOL_GPL(nft_register_expr); /** - * nft_unregister_expr - unregister nf_tables expr operations - * @ops: expr operations + * nft_unregister_expr - unregister nf_tables expr type + * @ops: expr type * - * Unregisters the expr operations for use with nf_tables. + * Unregisters the expr typefor use with nf_tables. */ -void nft_unregister_expr(struct nft_expr_ops *ops) +void nft_unregister_expr(struct nft_expr_type *type) { nfnl_lock(NFNL_SUBSYS_NFTABLES); - list_del(&ops->list); + list_del(&type->list); nfnl_unlock(NFNL_SUBSYS_NFTABLES); } EXPORT_SYMBOL_GPL(nft_unregister_expr); -static const struct nft_expr_ops *__nft_expr_ops_get(struct nlattr *nla) +static const struct nft_expr_type *__nft_expr_type_get(struct nlattr *nla) { - const struct nft_expr_ops *ops; + const struct nft_expr_type *type; - list_for_each_entry(ops, &nf_tables_expressions, list) { - if (!nla_strcmp(nla, ops->name)) - return ops; + list_for_each_entry(type, &nf_tables_expressions, list) { + if (!nla_strcmp(nla, type->name)) + return type; } return NULL; } -static const struct nft_expr_ops *nft_expr_ops_get(struct nlattr *nla) +static const struct nft_expr_type *nft_expr_type_get(struct nlattr *nla) { - const struct nft_expr_ops *ops; + const struct nft_expr_type *type; if (nla == NULL) return ERR_PTR(-EINVAL); - ops = __nft_expr_ops_get(nla); - if (ops != NULL && try_module_get(ops->owner)) - return ops; + type = __nft_expr_type_get(nla); + if (type != NULL && try_module_get(type->owner)) + return type; #ifdef CONFIG_MODULES - if (ops == NULL) { + if (type == NULL) { nfnl_unlock(NFNL_SUBSYS_NFTABLES); request_module("nft-expr-%.*s", nla_len(nla), (char *)nla_data(nla)); nfnl_lock(NFNL_SUBSYS_NFTABLES); - if (__nft_expr_ops_get(nla)) + if (__nft_expr_type_get(nla)) return ERR_PTR(-EAGAIN); } #endif @@ -912,7 +912,7 @@ static const struct nla_policy nft_expr_policy[NFTA_EXPR_MAX + 1] = { static int nf_tables_fill_expr_info(struct sk_buff *skb, const struct nft_expr *expr) { - if (nla_put_string(skb, NFTA_EXPR_NAME, expr->ops->name)) + if (nla_put_string(skb, NFTA_EXPR_NAME, expr->ops->type->name)) goto nla_put_failure; if (expr->ops->dump) { @@ -932,28 +932,52 @@ nla_put_failure: struct nft_expr_info { const struct nft_expr_ops *ops; - struct nlattr *tb[NFTA_EXPR_MAX + 1]; + struct nlattr *tb[NFT_EXPR_MAXATTR + 1]; }; static int nf_tables_expr_parse(const struct nlattr *nla, struct nft_expr_info *info) { + const struct nft_expr_type *type; const struct nft_expr_ops *ops; + struct nlattr *tb[NFTA_EXPR_MAX + 1]; int err; - err = nla_parse_nested(info->tb, NFTA_EXPR_MAX, nla, nft_expr_policy); + err = nla_parse_nested(tb, NFTA_EXPR_MAX, nla, nft_expr_policy); if (err < 0) return err; - ops = nft_expr_ops_get(info->tb[NFTA_EXPR_NAME]); - if (IS_ERR(ops)) - return PTR_ERR(ops); + type = nft_expr_type_get(tb[NFTA_EXPR_NAME]); + if (IS_ERR(type)) + return PTR_ERR(type); + + if (tb[NFTA_EXPR_DATA]) { + err = nla_parse_nested(info->tb, type->maxattr, + tb[NFTA_EXPR_DATA], type->policy); + if (err < 0) + goto err1; + } else + memset(info->tb, 0, sizeof(info->tb[0]) * (type->maxattr + 1)); + + if (type->select_ops != NULL) { + ops = type->select_ops((const struct nlattr * const *)info->tb); + if (IS_ERR(ops)) { + err = PTR_ERR(ops); + goto err1; + } + } else + ops = type->ops; + info->ops = ops; return 0; + +err1: + module_put(type->owner); + return err; } static int nf_tables_newexpr(const struct nft_ctx *ctx, - struct nft_expr_info *info, + const struct nft_expr_info *info, struct nft_expr *expr) { const struct nft_expr_ops *ops = info->ops; @@ -961,23 +985,11 @@ static int nf_tables_newexpr(const struct nft_ctx *ctx, expr->ops = ops; if (ops->init) { - struct nlattr *ma[ops->maxattr + 1]; - - if (info->tb[NFTA_EXPR_DATA]) { - err = nla_parse_nested(ma, ops->maxattr, - info->tb[NFTA_EXPR_DATA], - ops->policy); - if (err < 0) - goto err1; - } else - memset(ma, 0, sizeof(ma[0]) * (ops->maxattr + 1)); - - err = ops->init(ctx, expr, (const struct nlattr **)ma); + err = ops->init(ctx, expr, (const struct nlattr **)info->tb); if (err < 0) goto err1; } - info->ops = NULL; return 0; err1: @@ -989,7 +1001,7 @@ static void nf_tables_expr_destroy(struct nft_expr *expr) { if (expr->ops->destroy) expr->ops->destroy(expr); - module_put(expr->ops->owner); + module_put(expr->ops->type->owner); } /* @@ -1313,6 +1325,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, err = nf_tables_newexpr(&ctx, &info[i], expr); if (err < 0) goto err2; + info[i].ops = NULL; expr = nft_expr_next(expr); } @@ -1341,7 +1354,7 @@ err2: err1: for (i = 0; i < n; i++) { if (info[i].ops != NULL) - module_put(info[i].ops->owner); + module_put(info[i].ops->type->owner); } return err; } diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index 0f7501506367..4fb6ee2c1106 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2008-2009 Patrick McHardy * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -118,23 +118,29 @@ nla_put_failure: return -1; } -static struct nft_expr_ops nft_bitwise_ops __read_mostly = { - .name = "bitwise", +static struct nft_expr_type nft_bitwise_type; +static const struct nft_expr_ops nft_bitwise_ops = { + .type = &nft_bitwise_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_bitwise)), - .owner = THIS_MODULE, .eval = nft_bitwise_eval, .init = nft_bitwise_init, .dump = nft_bitwise_dump, +}; + +static struct nft_expr_type nft_bitwise_type __read_mostly = { + .name = "bitwise", + .ops = &nft_bitwise_ops, .policy = nft_bitwise_policy, .maxattr = NFTA_BITWISE_MAX, + .owner = THIS_MODULE, }; int __init nft_bitwise_module_init(void) { - return nft_register_expr(&nft_bitwise_ops); + return nft_register_expr(&nft_bitwise_type); } void nft_bitwise_module_exit(void) { - nft_unregister_expr(&nft_bitwise_ops); + nft_unregister_expr(&nft_bitwise_type); } diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index 8b0657a4d17b..c39ed8d29df1 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2008-2009 Patrick McHardy * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -145,23 +145,29 @@ nla_put_failure: return -1; } -static struct nft_expr_ops nft_byteorder_ops __read_mostly = { - .name = "byteorder", +static struct nft_expr_type nft_byteorder_type; +static const struct nft_expr_ops nft_byteorder_ops = { + .type = &nft_byteorder_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_byteorder)), - .owner = THIS_MODULE, .eval = nft_byteorder_eval, .init = nft_byteorder_init, .dump = nft_byteorder_dump, +}; + +static struct nft_expr_type nft_byteorder_type __read_mostly = { + .name = "byteorder", + .ops = &nft_byteorder_ops, .policy = nft_byteorder_policy, .maxattr = NFTA_BYTEORDER_MAX, + .owner = THIS_MODULE, }; int __init nft_byteorder_module_init(void) { - return nft_register_expr(&nft_byteorder_ops); + return nft_register_expr(&nft_byteorder_type); } void nft_byteorder_module_exit(void) { - nft_unregister_expr(&nft_byteorder_ops); + nft_unregister_expr(&nft_byteorder_type); } diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index e734d670120a..2c9d5fef2e63 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2008-2009 Patrick McHardy * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -124,23 +124,29 @@ nla_put_failure: return -1; } -static struct nft_expr_ops nft_cmp_ops __read_mostly = { - .name = "cmp", +static struct nft_expr_type nft_cmp_type; +static const struct nft_expr_ops nft_cmp_ops = { + .type = &nft_cmp_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_cmp_expr)), - .owner = THIS_MODULE, .eval = nft_cmp_eval, .init = nft_cmp_init, .dump = nft_cmp_dump, +}; + +static struct nft_expr_type nft_cmp_type __read_mostly = { + .name = "cmp", + .ops = &nft_cmp_ops, .policy = nft_cmp_policy, .maxattr = NFTA_CMP_MAX, + .owner = THIS_MODULE, }; int __init nft_cmp_module_init(void) { - return nft_register_expr(&nft_cmp_ops); + return nft_register_expr(&nft_cmp_type); } void nft_cmp_module_exit(void) { - nft_unregister_expr(&nft_cmp_ops); + nft_unregister_expr(&nft_cmp_type); } diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c index 33c5d36819bb..c89ee486ce54 100644 --- a/net/netfilter/nft_counter.c +++ b/net/netfilter/nft_counter.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2008-2009 Patrick McHardy * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -78,25 +78,31 @@ static int nft_counter_init(const struct nft_ctx *ctx, return 0; } -static struct nft_expr_ops nft_counter_ops __read_mostly = { - .name = "counter", +static struct nft_expr_type nft_counter_type; +static const struct nft_expr_ops nft_counter_ops = { + .type = &nft_counter_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_counter)), - .policy = nft_counter_policy, - .maxattr = NFTA_COUNTER_MAX, - .owner = THIS_MODULE, .eval = nft_counter_eval, .init = nft_counter_init, .dump = nft_counter_dump, }; +static struct nft_expr_type nft_counter_type __read_mostly = { + .name = "counter", + .ops = &nft_counter_ops, + .policy = nft_counter_policy, + .maxattr = NFTA_COUNTER_MAX, + .owner = THIS_MODULE, +}; + static int __init nft_counter_module_init(void) { - return nft_register_expr(&nft_counter_ops); + return nft_register_expr(&nft_counter_type); } static void __exit nft_counter_module_exit(void) { - nft_unregister_expr(&nft_counter_ops); + nft_unregister_expr(&nft_counter_type); } module_init(nft_counter_module_init); diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index a1756d678226..955f4e6e7089 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2008-2009 Patrick McHardy * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -222,26 +222,32 @@ nla_put_failure: return -1; } -static struct nft_expr_ops nft_ct_ops __read_mostly = { - .name = "ct", +static struct nft_expr_type nft_ct_type; +static const struct nft_expr_ops nft_ct_ops = { + .type = &nft_ct_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)), - .owner = THIS_MODULE, .eval = nft_ct_eval, .init = nft_ct_init, .destroy = nft_ct_destroy, .dump = nft_ct_dump, +}; + +static struct nft_expr_type nft_ct_type __read_mostly = { + .name = "ct", + .ops = &nft_ct_ops, .policy = nft_ct_policy, .maxattr = NFTA_CT_MAX, + .owner = THIS_MODULE, }; static int __init nft_ct_module_init(void) { - return nft_register_expr(&nft_ct_ops); + return nft_register_expr(&nft_ct_type); } static void __exit nft_ct_module_exit(void) { - nft_unregister_expr(&nft_ct_ops); + nft_unregister_expr(&nft_ct_type); } module_init(nft_ct_module_init); diff --git a/net/netfilter/nft_expr_template.c b/net/netfilter/nft_expr_template.c index 9fc8eb308193..b6eed4d5a096 100644 --- a/net/netfilter/nft_expr_template.c +++ b/net/netfilter/nft_expr_template.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2008-2009 Patrick McHardy * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -33,7 +33,7 @@ static const struct nla_policy nft_template_policy[NFTA_TEMPLATE_MAX + 1] = { static int nft_template_init(const struct nft_ctx *ctx, const struct nft_expr *expr, - const struct nlattr *tb[]) + const struct nlattr * const tb[]) { struct nft_template *priv = nft_expr_priv(expr); @@ -58,26 +58,32 @@ nla_put_failure: return -1; } -static struct nft_expr_ops template_ops __read_mostly = { - .name = "template", +static struct nft_expr_type nft_template_type; +static const struct nft_expr_ops nft_template_ops = { + .type = &nft_template_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_template)), - .owner = THIS_MODULE, .eval = nft_template_eval, .init = nft_template_init, .destroy = nft_template_destroy, .dump = nft_template_dump, +}; + +static struct nft_expr_type nft_template_type __read_mostly = { + .name = "template", + .ops = &nft_template_ops, .policy = nft_template_policy, .maxattr = NFTA_TEMPLATE_MAX, + .owner = THIS_MODULE, }; static int __init nft_template_module_init(void) { - return nft_register_expr(&template_ops); + return nft_register_expr(&nft_template_type); } static void __exit nft_template_module_exit(void) { - nft_unregister_expr(&template_ops); + nft_unregister_expr(&nft_template_type); } module_init(nft_template_module_init); diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index 21c6a6b7b662..8e0bb75e7c51 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -98,25 +98,31 @@ nla_put_failure: return -1; } -static struct nft_expr_ops exthdr_ops __read_mostly = { - .name = "exthdr", +static struct nft_expr_type nft_exthdr_type; +static const struct nft_expr_ops nft_exthdr_ops = { + .type = &nft_exthdr_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)), - .owner = THIS_MODULE, .eval = nft_exthdr_eval, .init = nft_exthdr_init, .dump = nft_exthdr_dump, +}; + +static struct nft_expr_type nft_exthdr_type __read_mostly = { + .name = "exthdr", + .ops = &nft_exthdr_ops, .policy = nft_exthdr_policy, .maxattr = NFTA_EXTHDR_MAX, + .owner = THIS_MODULE, }; static int __init nft_exthdr_module_init(void) { - return nft_register_expr(&exthdr_ops); + return nft_register_expr(&nft_exthdr_type); } static void __exit nft_exthdr_module_exit(void) { - nft_unregister_expr(&exthdr_ops); + nft_unregister_expr(&nft_exthdr_type); } module_init(nft_exthdr_module_init); diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 78334bf37007..1bfeeaf865b6 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2008-2009 Patrick McHardy * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -100,25 +100,31 @@ static const struct nft_data *nft_immediate_get_verdict(const struct nft_expr *e return NULL; } -static struct nft_expr_ops nft_imm_ops __read_mostly = { - .name = "immediate", +static struct nft_expr_type nft_imm_type; +static const struct nft_expr_ops nft_imm_ops = { + .type = &nft_imm_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)), - .owner = THIS_MODULE, .eval = nft_immediate_eval, .init = nft_immediate_init, .destroy = nft_immediate_destroy, .dump = nft_immediate_dump, .get_verdict = nft_immediate_get_verdict, +}; + +static struct nft_expr_type nft_imm_type __read_mostly = { + .name = "immediate", + .ops = &nft_imm_ops, .policy = nft_immediate_policy, .maxattr = NFTA_IMMEDIATE_MAX, + .owner = THIS_MODULE, }; int __init nft_immediate_module_init(void) { - return nft_register_expr(&nft_imm_ops); + return nft_register_expr(&nft_imm_type); } void nft_immediate_module_exit(void) { - nft_unregister_expr(&nft_imm_ops); + nft_unregister_expr(&nft_imm_type); } diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c index e0e3fc8aebc3..85da5bd02f64 100644 --- a/net/netfilter/nft_limit.c +++ b/net/netfilter/nft_limit.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2008-2009 Patrick McHardy * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -84,25 +84,31 @@ nla_put_failure: return -1; } -static struct nft_expr_ops nft_limit_ops __read_mostly = { - .name = "limit", +static struct nft_expr_type nft_limit_type; +static const struct nft_expr_ops nft_limit_ops = { + .type = &nft_limit_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_limit)), - .owner = THIS_MODULE, .eval = nft_limit_eval, .init = nft_limit_init, .dump = nft_limit_dump, +}; + +static struct nft_expr_type nft_limit_type __read_mostly = { + .name = "limit", + .ops = &nft_limit_ops, .policy = nft_limit_policy, .maxattr = NFTA_LIMIT_MAX, + .owner = THIS_MODULE, }; static int __init nft_limit_module_init(void) { - return nft_register_expr(&nft_limit_ops); + return nft_register_expr(&nft_limit_type); } static void __exit nft_limit_module_exit(void) { - nft_unregister_expr(&nft_limit_ops); + nft_unregister_expr(&nft_limit_type); } module_init(nft_limit_module_init); diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index da495c3b1e7e..57cad072a13e 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2008-2009 Patrick McHardy * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -110,26 +110,32 @@ nla_put_failure: return -1; } -static struct nft_expr_ops nft_log_ops __read_mostly = { - .name = "log", +static struct nft_expr_type nft_log_type; +static const struct nft_expr_ops nft_log_ops = { + .type = &nft_log_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_log)), - .owner = THIS_MODULE, .eval = nft_log_eval, .init = nft_log_init, .destroy = nft_log_destroy, .dump = nft_log_dump, +}; + +static struct nft_expr_type nft_log_type __read_mostly = { + .name = "log", + .ops = &nft_log_ops, .policy = nft_log_policy, .maxattr = NFTA_LOG_MAX, + .owner = THIS_MODULE, }; static int __init nft_log_module_init(void) { - return nft_register_expr(&nft_log_ops); + return nft_register_expr(&nft_log_type); } static void __exit nft_log_module_exit(void) { - nft_unregister_expr(&nft_log_ops); + nft_unregister_expr(&nft_log_type); } module_init(nft_log_module_init); diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 4962d2173678..8a6116b75b5a 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -112,24 +112,30 @@ nla_put_failure: return -1; } -static struct nft_expr_ops nft_lookup_ops __read_mostly = { - .name = "lookup", +static struct nft_expr_type nft_lookup_type; +static const struct nft_expr_ops nft_lookup_ops = { + .type = &nft_lookup_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_lookup)), - .owner = THIS_MODULE, .eval = nft_lookup_eval, .init = nft_lookup_init, .destroy = nft_lookup_destroy, .dump = nft_lookup_dump, +}; + +static struct nft_expr_type nft_lookup_type __read_mostly = { + .name = "lookup", + .ops = &nft_lookup_ops, .policy = nft_lookup_policy, .maxattr = NFTA_LOOKUP_MAX, + .owner = THIS_MODULE, }; int __init nft_lookup_module_init(void) { - return nft_register_expr(&nft_lookup_ops); + return nft_register_expr(&nft_lookup_type); } void nft_lookup_module_exit(void) { - nft_unregister_expr(&nft_lookup_ops); + nft_unregister_expr(&nft_lookup_type); } diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 96735aa2f039..8c28220a90b3 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2008-2009 Patrick McHardy * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -193,25 +193,31 @@ nla_put_failure: return -1; } -static struct nft_expr_ops nft_meta_ops __read_mostly = { - .name = "meta", +static struct nft_expr_type nft_meta_type; +static const struct nft_expr_ops nft_meta_ops = { + .type = &nft_meta_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), - .owner = THIS_MODULE, .eval = nft_meta_eval, .init = nft_meta_init, .dump = nft_meta_dump, +}; + +static struct nft_expr_type nft_meta_type __read_mostly = { + .name = "meta", + .ops = &nft_meta_ops, .policy = nft_meta_policy, .maxattr = NFTA_META_MAX, + .owner = THIS_MODULE, }; static int __init nft_meta_module_init(void) { - return nft_register_expr(&nft_meta_ops); + return nft_register_expr(&nft_meta_type); } static void __exit nft_meta_module_exit(void) { - nft_unregister_expr(&nft_meta_ops); + nft_unregister_expr(&nft_meta_type); } module_init(nft_meta_module_init); diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 329f134b3f89..d99db6e37fb1 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2008-2009 Patrick McHardy * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -115,23 +115,29 @@ nla_put_failure: return -1; } -static struct nft_expr_ops nft_payload_ops __read_mostly = { - .name = "payload", +static struct nft_expr_type nft_payload_type; +static const struct nft_expr_ops nft_payload_ops = { + .type = &nft_payload_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_payload)), - .owner = THIS_MODULE, .eval = nft_payload_eval, .init = nft_payload_init, .dump = nft_payload_dump, +}; + +static struct nft_expr_type nft_payload_type __read_mostly = { + .name = "payload", + .ops = &nft_payload_ops, .policy = nft_payload_policy, .maxattr = NFTA_PAYLOAD_MAX, + .owner = THIS_MODULE, }; int __init nft_payload_module_init(void) { - return nft_register_expr(&nft_payload_ops); + return nft_register_expr(&nft_payload_type); } void nft_payload_module_exit(void) { - nft_unregister_expr(&nft_payload_ops); + nft_unregister_expr(&nft_payload_type); } From cb7dbfd0390c9e244339f3270fe8649568241812 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 10 Oct 2013 23:35:40 +0200 Subject: [PATCH 06/17] netfilter: nf_tables: add optimized data comparison for small values Add an optimized version of nft_data_cmp() that only handles values of to 4 bytes length. This patch includes original Patrick McHardy's patch entitled (nf_tables: inline nft_cmp_fast_eval() into main evaluation loop). Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_core.h | 8 ++ net/netfilter/nf_tables_core.c | 18 +++- net/netfilter/nft_cmp.c | 116 ++++++++++++++++++++----- 3 files changed, 118 insertions(+), 24 deletions(-) diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 283396c916e0..3df6a9be3bdd 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -7,6 +7,14 @@ extern void nf_tables_core_module_exit(void); extern int nft_immediate_module_init(void); extern void nft_immediate_module_exit(void); +struct nft_cmp_fast_expr { + u32 data; + enum nft_registers sreg:8; + u8 len; +}; + +extern const struct nft_expr_ops nft_cmp_fast_ops; + extern int nft_cmp_module_init(void); extern void nft_cmp_module_exit(void); diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index fd0ecd3255c1..24000182c8e7 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -20,6 +20,18 @@ #include #include +static void nft_cmp_fast_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1]) +{ + const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr); + u32 mask; + + mask = ~0U >> (sizeof(priv->data) * BITS_PER_BYTE - priv->len); + if ((data[priv->sreg].data[0] & mask) == priv->data) + return; + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + unsigned int nft_do_chain(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, @@ -48,7 +60,11 @@ next_rule: data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; list_for_each_entry_continue_rcu(rule, &chain->rules, list) { nft_rule_for_each_expr(expr, last, rule) { - expr->ops->eval(expr, data, &pkt); + if (expr->ops == &nft_cmp_fast_ops) + nft_cmp_fast_eval(expr, data); + else + expr->ops->eval(expr, data, &pkt); + if (data[NFT_REG_VERDICT].verdict != NFT_CONTINUE) break; } diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index 2c9d5fef2e63..37134f3e84fb 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -75,32 +75,11 @@ static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr, struct nft_data_desc desc; int err; - if (tb[NFTA_CMP_SREG] == NULL || - tb[NFTA_CMP_OP] == NULL || - tb[NFTA_CMP_DATA] == NULL) - return -EINVAL; - priv->sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG])); - err = nft_validate_input_register(priv->sreg); - if (err < 0) - return err; - priv->op = ntohl(nla_get_be32(tb[NFTA_CMP_OP])); - switch (priv->op) { - case NFT_CMP_EQ: - case NFT_CMP_NEQ: - case NFT_CMP_LT: - case NFT_CMP_LTE: - case NFT_CMP_GT: - case NFT_CMP_GTE: - break; - default: - return -EINVAL; - } err = nft_data_init(NULL, &priv->data, &desc, tb[NFTA_CMP_DATA]); - if (err < 0) - return err; + BUG_ON(err < 0); priv->len = desc.len; return 0; @@ -133,9 +112,100 @@ static const struct nft_expr_ops nft_cmp_ops = { .dump = nft_cmp_dump, }; +static int nft_cmp_fast_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_cmp_fast_expr *priv = nft_expr_priv(expr); + struct nft_data_desc desc; + struct nft_data data; + u32 mask; + int err; + + priv->sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG])); + + err = nft_data_init(NULL, &data, &desc, tb[NFTA_CMP_DATA]); + BUG_ON(err < 0); + desc.len *= BITS_PER_BYTE; + + mask = ~0U >> (sizeof(priv->data) * BITS_PER_BYTE - desc.len); + priv->data = data.data[0] & mask; + priv->len = desc.len; + return 0; +} + +static int nft_cmp_fast_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr); + struct nft_data data; + + if (nla_put_be32(skb, NFTA_CMP_SREG, htonl(priv->sreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_CMP_OP, htonl(NFT_CMP_EQ))) + goto nla_put_failure; + + data.data[0] = priv->data; + if (nft_data_dump(skb, NFTA_CMP_DATA, &data, + NFT_DATA_VALUE, priv->len / BITS_PER_BYTE) < 0) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +const struct nft_expr_ops nft_cmp_fast_ops = { + .type = &nft_cmp_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_cmp_fast_expr)), + .eval = NULL, /* inlined */ + .init = nft_cmp_fast_init, + .dump = nft_cmp_fast_dump, +}; + +static const struct nft_expr_ops *nft_cmp_select_ops(const struct nlattr * const tb[]) +{ + struct nft_data_desc desc; + struct nft_data data; + enum nft_registers sreg; + enum nft_cmp_ops op; + int err; + + if (tb[NFTA_CMP_SREG] == NULL || + tb[NFTA_CMP_OP] == NULL || + tb[NFTA_CMP_DATA] == NULL) + return ERR_PTR(-EINVAL); + + sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG])); + err = nft_validate_input_register(sreg); + if (err < 0) + return ERR_PTR(err); + + op = ntohl(nla_get_be32(tb[NFTA_CMP_OP])); + switch (op) { + case NFT_CMP_EQ: + case NFT_CMP_NEQ: + case NFT_CMP_LT: + case NFT_CMP_LTE: + case NFT_CMP_GT: + case NFT_CMP_GTE: + break; + default: + return ERR_PTR(-EINVAL); + } + + err = nft_data_init(NULL, &data, &desc, tb[NFTA_CMP_DATA]); + if (err < 0) + return ERR_PTR(err); + + if (desc.len <= sizeof(u32) && op == NFT_CMP_EQ) + return &nft_cmp_fast_ops; + else + return &nft_cmp_ops; +} + static struct nft_expr_type nft_cmp_type __read_mostly = { .name = "cmp", - .ops = &nft_cmp_ops, + .select_ops = nft_cmp_select_ops, .policy = nft_cmp_policy, .maxattr = NFTA_CMP_MAX, .owner = THIS_MODULE, From c29b72e02573b8fe5e6cae5d192a6a4772e7bbd6 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 10 Oct 2013 11:06:41 +0200 Subject: [PATCH 07/17] netfilter: nft_payload: add optimized payload implementation for small loads Add an optimized payload expression implementation for small (up to 4 bytes) aligned data loads from the linear packet area. This patch also includes original Patrick McHardy's entitled (nf_tables: inline nft_payload_fast_eval() into main evaluation loop). Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_core.h | 9 ++++ net/netfilter/nf_tables_core.c | 31 +++++++++++- net/netfilter/nft_payload.c | 69 ++++++++++++++++---------- 3 files changed, 81 insertions(+), 28 deletions(-) diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 3df6a9be3bdd..fe7b16206a4e 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -27,6 +27,15 @@ extern void nft_bitwise_module_exit(void); extern int nft_byteorder_module_init(void); extern void nft_byteorder_module_exit(void); +struct nft_payload { + enum nft_payload_bases base:8; + u8 offset; + u8 len; + enum nft_registers dreg:8; +}; + +extern const struct nft_expr_ops nft_payload_fast_ops; + extern int nft_payload_module_init(void); extern void nft_payload_module_exit(void); diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 24000182c8e7..9aede59ed2d7 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -32,6 +32,34 @@ static void nft_cmp_fast_eval(const struct nft_expr *expr, data[NFT_REG_VERDICT].verdict = NFT_BREAK; } +static bool nft_payload_fast_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_payload *priv = nft_expr_priv(expr); + const struct sk_buff *skb = pkt->skb; + struct nft_data *dest = &data[priv->dreg]; + unsigned char *ptr; + + if (priv->base == NFT_PAYLOAD_NETWORK_HEADER) + ptr = skb_network_header(skb); + else + ptr = skb_transport_header(skb); + + ptr += priv->offset; + + if (unlikely(ptr + priv->len >= skb_tail_pointer(skb))) + return false; + + if (priv->len == 2) + *(u16 *)dest->data = *(u16 *)ptr; + else if (priv->len == 4) + *(u32 *)dest->data = *(u32 *)ptr; + else + *(u8 *)dest->data = *(u8 *)ptr; + return true; +} + unsigned int nft_do_chain(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, @@ -62,7 +90,8 @@ next_rule: nft_rule_for_each_expr(expr, last, rule) { if (expr->ops == &nft_cmp_fast_ops) nft_cmp_fast_eval(expr, data); - else + else if (expr->ops != &nft_payload_fast_ops || + !nft_payload_fast_eval(expr, data, &pkt)) expr->ops->eval(expr, data, &pkt); if (data[NFT_REG_VERDICT].verdict != NFT_CONTINUE) diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index d99db6e37fb1..7cf13f7e1e94 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -17,13 +17,6 @@ #include #include -struct nft_payload { - enum nft_payload_bases base:8; - u8 offset; - u8 len; - enum nft_registers dreg:8; -}; - static void nft_payload_eval(const struct nft_expr *expr, struct nft_data data[NFT_REG_MAX + 1], const struct nft_pktinfo *pkt) @@ -71,27 +64,9 @@ static int nft_payload_init(const struct nft_ctx *ctx, struct nft_payload *priv = nft_expr_priv(expr); int err; - if (tb[NFTA_PAYLOAD_DREG] == NULL || - tb[NFTA_PAYLOAD_BASE] == NULL || - tb[NFTA_PAYLOAD_OFFSET] == NULL || - tb[NFTA_PAYLOAD_LEN] == NULL) - return -EINVAL; - - priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE])); - switch (priv->base) { - case NFT_PAYLOAD_LL_HEADER: - case NFT_PAYLOAD_NETWORK_HEADER: - case NFT_PAYLOAD_TRANSPORT_HEADER: - break; - default: - return -EOPNOTSUPP; - } - + priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE])); priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET])); priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); - if (priv->len == 0 || - priv->len > FIELD_SIZEOF(struct nft_data, data)) - return -EINVAL; priv->dreg = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_DREG])); err = nft_validate_output_register(priv->dreg); @@ -124,9 +99,49 @@ static const struct nft_expr_ops nft_payload_ops = { .dump = nft_payload_dump, }; +const struct nft_expr_ops nft_payload_fast_ops = { + .type = &nft_payload_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_payload)), + .eval = nft_payload_eval, + .init = nft_payload_init, + .dump = nft_payload_dump, +}; + +static const struct nft_expr_ops *nft_payload_select_ops(const struct nlattr * const tb[]) +{ + enum nft_payload_bases base; + unsigned int offset, len; + + if (tb[NFTA_PAYLOAD_DREG] == NULL || + tb[NFTA_PAYLOAD_BASE] == NULL || + tb[NFTA_PAYLOAD_OFFSET] == NULL || + tb[NFTA_PAYLOAD_LEN] == NULL) + return ERR_PTR(-EINVAL); + + base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE])); + switch (base) { + case NFT_PAYLOAD_LL_HEADER: + case NFT_PAYLOAD_NETWORK_HEADER: + case NFT_PAYLOAD_TRANSPORT_HEADER: + break; + default: + return ERR_PTR(-EOPNOTSUPP); + } + + offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET])); + len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); + if (len == 0 || len > FIELD_SIZEOF(struct nft_data, data)) + return ERR_PTR(-EINVAL); + + if (len <= 4 && IS_ALIGNED(offset, len) && base != NFT_PAYLOAD_LL_HEADER) + return &nft_payload_fast_ops; + else + return &nft_payload_ops; +} + static struct nft_expr_type nft_payload_type __read_mostly = { .name = "payload", - .ops = &nft_payload_ops, + .select_ops = nft_payload_select_ops, .policy = nft_payload_policy, .maxattr = NFTA_PAYLOAD_MAX, .owner = THIS_MODULE, From 9370761c56b66aa5c65e069a7b010111a025018d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 10 Oct 2013 23:21:26 +0200 Subject: [PATCH 08/17] netfilter: nf_tables: convert built-in tables/chains to chain types This patch converts built-in tables/chains to chain types that allows you to deploy customized table and chain configurations from userspace. After this patch, you have to specify the chain type when creating a new chain: add chain ip filter output { type filter hook input priority 0; } ^^^^ ------ The existing chain types after this patch are: filter, route and nat. Note that tables are just containers of chains with no specific semantics, which is a significant change with regards to iptables. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 31 ++- include/uapi/linux/netfilter/nf_tables.h | 2 + net/ipv4/netfilter/Kconfig | 8 +- net/ipv4/netfilter/Makefile | 4 +- net/ipv4/netfilter/nf_tables_ipv4.c | 21 ++ ..._table_nat_ipv4.c => nft_chain_nat_ipv4.c} | 116 +++-------- ...le_route_ipv4.c => nft_chain_route_ipv4.c} | 43 ++-- net/ipv6/netfilter/Kconfig | 4 +- net/ipv6/netfilter/Makefile | 2 +- net/ipv6/netfilter/nf_tables_ipv6.c | 22 +- ...le_route_ipv6.c => nft_chain_route_ipv6.c} | 45 ++-- net/netfilter/nf_tables_api.c | 197 ++++++++---------- 12 files changed, 221 insertions(+), 274 deletions(-) rename net/ipv4/netfilter/{nf_table_nat_ipv4.c => nft_chain_nat_ipv4.c} (76%) rename net/ipv4/netfilter/{nf_table_route_ipv4.c => nft_chain_route_ipv4.c} (61%) rename net/ipv6/netfilter/{nf_table_route_ipv6.c => nft_chain_route_ipv6.c} (65%) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 66d0359702c6..8403f7f52e81 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -336,7 +336,6 @@ static inline struct nft_expr *nft_expr_last(const struct nft_rule *rule) enum nft_chain_flags { NFT_BASE_CHAIN = 0x1, - NFT_CHAIN_BUILTIN = 0x2, }; /** @@ -362,14 +361,23 @@ struct nft_chain { char name[NFT_CHAIN_MAXNAMELEN]; }; +enum nft_chain_type { + NFT_CHAIN_T_DEFAULT = 0, + NFT_CHAIN_T_ROUTE, + NFT_CHAIN_T_NAT, + NFT_CHAIN_T_MAX +}; + /** * struct nft_base_chain - nf_tables base chain * * @ops: netfilter hook ops + * @type: chain type * @chain: the chain */ struct nft_base_chain { struct nf_hook_ops ops; + enum nft_chain_type type; struct nft_chain chain; }; @@ -384,10 +392,6 @@ extern unsigned int nft_do_chain(const struct nf_hook_ops *ops, const struct net_device *out, int (*okfn)(struct sk_buff *)); -enum nft_table_flags { - NFT_TABLE_BUILTIN = 0x1, -}; - /** * struct nft_table - nf_tables table * @@ -431,8 +435,17 @@ struct nft_af_info { extern int nft_register_afinfo(struct nft_af_info *); extern void nft_unregister_afinfo(struct nft_af_info *); -extern int nft_register_table(struct nft_table *, int family); -extern void nft_unregister_table(struct nft_table *, int family); +struct nf_chain_type { + unsigned int hook_mask; + const char *name; + enum nft_chain_type type; + nf_hookfn *fn[NF_MAX_HOOKS]; + struct module *me; + int family; +}; + +extern int nft_register_chain_type(struct nf_chain_type *); +extern void nft_unregister_chain_type(struct nf_chain_type *); extern int nft_register_expr(struct nft_expr_type *); extern void nft_unregister_expr(struct nft_expr_type *); @@ -440,8 +453,8 @@ extern void nft_unregister_expr(struct nft_expr_type *); #define MODULE_ALIAS_NFT_FAMILY(family) \ MODULE_ALIAS("nft-afinfo-" __stringify(family)) -#define MODULE_ALIAS_NFT_TABLE(family, name) \ - MODULE_ALIAS("nft-table-" __stringify(family) "-" name) +#define MODULE_ALIAS_NFT_CHAIN(family, name) \ + MODULE_ALIAS("nft-chain-" __stringify(family) "-" name) #define MODULE_ALIAS_NFT_EXPR(name) \ MODULE_ALIAS("nft-expr-" name) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 9e924014efe3..779cf951c8de 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -115,6 +115,7 @@ enum nft_table_attributes { * @NFTA_CHAIN_HANDLE: numeric handle of the chain (NLA_U64) * @NFTA_CHAIN_NAME: name of the chain (NLA_STRING) * @NFTA_CHAIN_HOOK: hook specification for basechains (NLA_NESTED: nft_hook_attributes) + * @NFTA_CHAIN_TYPE: type name of the string (NLA_NUL_STRING) */ enum nft_chain_attributes { NFTA_CHAIN_UNSPEC, @@ -122,6 +123,7 @@ enum nft_chain_attributes { NFTA_CHAIN_HANDLE, NFTA_CHAIN_NAME, NFTA_CHAIN_HOOK, + NFTA_CHAIN_TYPE, __NFTA_CHAIN_MAX }; #define NFTA_CHAIN_MAX (__NFTA_CHAIN_MAX - 1) diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index eb1d56ece361..ae65fe98bfbe 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -44,13 +44,13 @@ config NFT_REJECT_IPV4 depends on NF_TABLES_IPV4 tristate "nf_tables IPv4 reject support" -config NF_TABLE_ROUTE_IPV4 +config NFT_CHAIN_ROUTE_IPV4 depends on NF_TABLES_IPV4 - tristate "IPv4 nf_tables route table support" + tristate "IPv4 nf_tables route chain support" -config NF_TABLE_NAT_IPV4 +config NFT_CHAIN_NAT_IPV4 depends on NF_TABLES_IPV4 - tristate "IPv4 nf_tables nat table support" + tristate "IPv4 nf_tables nat chain support" config IP_NF_IPTABLES tristate "IP tables support (required for filtering/masq/NAT)" diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index b2f01cd2cd65..91e0bd71a6d3 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -29,8 +29,8 @@ obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o -obj-$(CONFIG_NF_TABLE_ROUTE_IPV4) += nf_table_route_ipv4.o -obj-$(CONFIG_NF_TABLE_NAT_IPV4) += nf_table_nat_ipv4.o +obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o +obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o # generic IP tables obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c index 63d0a3bf53d3..23525c4c0192 100644 --- a/net/ipv4/netfilter/nf_tables_ipv4.c +++ b/net/ipv4/netfilter/nf_tables_ipv4.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2012-2013 Pablo Neira Ayuso * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -41,14 +42,34 @@ static struct nft_af_info nft_af_ipv4 __read_mostly = { }, }; +static struct nf_chain_type filter_ipv4 = { + .family = NFPROTO_IPV4, + .name = "filter", + .type = NFT_CHAIN_T_DEFAULT, + .hook_mask = (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_FORWARD) | + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING), + .fn = { + [NF_INET_LOCAL_IN] = nft_do_chain, + [NF_INET_LOCAL_OUT] = nft_do_chain, + [NF_INET_FORWARD] = nft_do_chain, + [NF_INET_PRE_ROUTING] = nft_do_chain, + [NF_INET_POST_ROUTING] = nft_do_chain, + }, +}; + static int __init nf_tables_ipv4_init(void) { + nft_register_chain_type(&filter_ipv4); return nft_register_afinfo(&nft_af_ipv4); } static void __exit nf_tables_ipv4_exit(void) { nft_unregister_afinfo(&nft_af_ipv4); + nft_unregister_chain_type(&filter_ipv4); } module_init(nf_tables_ipv4_init); diff --git a/net/ipv4/netfilter/nf_table_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c similarity index 76% rename from net/ipv4/netfilter/nf_table_nat_ipv4.c rename to net/ipv4/netfilter/nft_chain_nat_ipv4.c index 2ecce39077a3..cd286306be85 100644 --- a/net/ipv4/netfilter/nf_table_nat_ipv4.c +++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2008-2009 Patrick McHardy + * Copyright (c) 2012 Pablo Neira Ayuso * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -167,7 +168,7 @@ static struct nft_expr_type nft_nat_type __read_mostly = { }; /* - * NAT table + * NAT chains */ static unsigned int nf_nat_fn(const struct nf_hook_ops *ops, @@ -301,115 +302,52 @@ static unsigned int nf_nat_output(const struct nf_hook_ops *ops, return ret; } -static struct nft_base_chain nf_chain_nat_prerouting __read_mostly = { - .chain = { - .name = "PREROUTING", - .rules = LIST_HEAD_INIT(nf_chain_nat_prerouting.chain.rules), - .flags = NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN, - }, - .ops = { - .hook = nf_nat_prerouting, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_PRE_ROUTING, - .priority = NF_IP_PRI_NAT_DST, - .priv = &nf_chain_nat_prerouting.chain, +struct nf_chain_type nft_chain_nat_ipv4 = { + .family = NFPROTO_IPV4, + .name = "nat", + .type = NFT_CHAIN_T_NAT, + .hook_mask = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_LOCAL_IN), + .fn = { + [NF_INET_PRE_ROUTING] = nf_nat_prerouting, + [NF_INET_POST_ROUTING] = nf_nat_postrouting, + [NF_INET_LOCAL_OUT] = nf_nat_output, + [NF_INET_LOCAL_IN] = nf_nat_fn, }, + .me = THIS_MODULE, }; -static struct nft_base_chain nf_chain_nat_postrouting __read_mostly = { - .chain = { - .name = "POSTROUTING", - .rules = LIST_HEAD_INIT(nf_chain_nat_postrouting.chain.rules), - .flags = NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN, - }, - .ops = { - .hook = nf_nat_postrouting, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_POST_ROUTING, - .priority = NF_IP_PRI_NAT_SRC, - .priv = &nf_chain_nat_postrouting.chain, - }, -}; - -static struct nft_base_chain nf_chain_nat_output __read_mostly = { - .chain = { - .name = "OUTPUT", - .rules = LIST_HEAD_INIT(nf_chain_nat_output.chain.rules), - .flags = NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN, - }, - .ops = { - .hook = nf_nat_output, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_LOCAL_OUT, - .priority = NF_IP_PRI_NAT_DST, - .priv = &nf_chain_nat_output.chain, - }, -}; - -static struct nft_base_chain nf_chain_nat_input __read_mostly = { - .chain = { - .name = "INPUT", - .rules = LIST_HEAD_INIT(nf_chain_nat_input.chain.rules), - .flags = NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN, - }, - .ops = { - .hook = nf_nat_fn, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_LOCAL_IN, - .priority = NF_IP_PRI_NAT_SRC, - .priv = &nf_chain_nat_input.chain, - }, -}; - - -static struct nft_table nf_table_nat_ipv4 __read_mostly = { - .name = "nat", - .chains = LIST_HEAD_INIT(nf_table_nat_ipv4.chains), -}; - -static int __init nf_table_nat_init(void) +static int __init nft_chain_nat_init(void) { int err; - list_add_tail(&nf_chain_nat_prerouting.chain.list, - &nf_table_nat_ipv4.chains); - list_add_tail(&nf_chain_nat_postrouting.chain.list, - &nf_table_nat_ipv4.chains); - list_add_tail(&nf_chain_nat_output.chain.list, - &nf_table_nat_ipv4.chains); - list_add_tail(&nf_chain_nat_input.chain.list, - &nf_table_nat_ipv4.chains); - - err = nft_register_table(&nf_table_nat_ipv4, NFPROTO_IPV4); + err = nft_register_chain_type(&nft_chain_nat_ipv4); if (err < 0) - goto err1; + return err; err = nft_register_expr(&nft_nat_type); if (err < 0) - goto err2; + goto err; return 0; -err2: - nft_unregister_table(&nf_table_nat_ipv4, NFPROTO_IPV4); -err1: +err: + nft_unregister_chain_type(&nft_chain_nat_ipv4); return err; } -static void __exit nf_table_nat_exit(void) +static void __exit nft_chain_nat_exit(void) { nft_unregister_expr(&nft_nat_type); - nft_unregister_table(&nf_table_nat_ipv4, AF_INET); + nft_unregister_chain_type(&nft_chain_nat_ipv4); } -module_init(nf_table_nat_init); -module_exit(nf_table_nat_exit); +module_init(nft_chain_nat_init); +module_exit(nft_chain_nat_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy "); -MODULE_ALIAS_NFT_TABLE(AF_INET, "nat"); +MODULE_ALIAS_NFT_CHAIN(AF_INET, "nat"); MODULE_ALIAS_NFT_EXPR("nat"); diff --git a/net/ipv4/netfilter/nf_table_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c similarity index 61% rename from net/ipv4/netfilter/nf_table_route_ipv4.c rename to net/ipv4/netfilter/nft_chain_route_ipv4.c index 4f257a1ed661..6b84e097b8fc 100644 --- a/net/ipv4/netfilter/nf_table_route_ipv4.c +++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2012 Pablo Neira Ayuso * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -56,42 +57,30 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, return ret; } -static struct nft_base_chain nf_chain_route_output __read_mostly = { - .chain = { - .name = "OUTPUT", - .rules = LIST_HEAD_INIT(nf_chain_route_output.chain.rules), - .flags = NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN, - }, - .ops = { - .hook = nf_route_table_hook, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_LOCAL_OUT, - .priority = NF_IP_PRI_MANGLE, - .priv = &nf_chain_route_output.chain, +static struct nf_chain_type nft_chain_route_ipv4 = { + .family = NFPROTO_IPV4, + .name = "route", + .type = NFT_CHAIN_T_ROUTE, + .hook_mask = (1 << NF_INET_LOCAL_OUT), + .fn = { + [NF_INET_LOCAL_OUT] = nf_route_table_hook, }, + .me = THIS_MODULE, }; -static struct nft_table nf_table_route_ipv4 __read_mostly = { - .name = "route", - .chains = LIST_HEAD_INIT(nf_table_route_ipv4.chains), -}; - -static int __init nf_table_route_init(void) +static int __init nft_chain_route_init(void) { - list_add_tail(&nf_chain_route_output.chain.list, - &nf_table_route_ipv4.chains); - return nft_register_table(&nf_table_route_ipv4, NFPROTO_IPV4); + return nft_register_chain_type(&nft_chain_route_ipv4); } -static void __exit nf_table_route_exit(void) +static void __exit nft_chain_route_exit(void) { - nft_unregister_table(&nf_table_route_ipv4, NFPROTO_IPV4); + nft_unregister_chain_type(&nft_chain_route_ipv4); } -module_init(nf_table_route_init); -module_exit(nf_table_route_exit); +module_init(nft_chain_route_init); +module_exit(nft_chain_route_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy "); -MODULE_ALIAS_NFT_TABLE(AF_INET, "route"); +MODULE_ALIAS_NFT_CHAIN(AF_INET, "route"); diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 5677e38eeca3..23833064b7b5 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -29,9 +29,9 @@ config NF_TABLES_IPV6 depends on NF_TABLES tristate "IPv6 nf_tables support" -config NF_TABLE_ROUTE_IPV6 +config NFT_CHAIN_ROUTE_IPV6 depends on NF_TABLES_IPV6 - tristate "IPv6 nf_tables route table support" + tristate "IPv6 nf_tables route chain support" config IP6_NF_IPTABLES tristate "IP6 tables support (required for filtering)" diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 956af4492d10..be4913aa524d 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -25,7 +25,7 @@ obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o # nf_tables obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o -obj-$(CONFIG_NF_TABLE_ROUTE_IPV6) += nf_table_route_ipv6.o +obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o # matches obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c index e0717cea4913..3631d6238e6f 100644 --- a/net/ipv6/netfilter/nf_tables_ipv6.c +++ b/net/ipv6/netfilter/nf_tables_ipv6.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2012-2013 Pablo Neira Ayuso * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -39,14 +40,33 @@ static struct nft_af_info nft_af_ipv6 __read_mostly = { }, }; +static struct nf_chain_type filter_ipv6 = { + .family = NFPROTO_IPV6, + .name = "filter", + .type = NFT_CHAIN_T_DEFAULT, + .hook_mask = (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_FORWARD) | + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING), + .fn = { + [NF_INET_LOCAL_IN] = nft_do_chain, + [NF_INET_LOCAL_OUT] = nft_do_chain, + [NF_INET_FORWARD] = nft_do_chain, + [NF_INET_PRE_ROUTING] = nft_do_chain, + [NF_INET_POST_ROUTING] = nft_do_chain, + }, +}; + static int __init nf_tables_ipv6_init(void) { + nft_register_chain_type(&filter_ipv6); return nft_register_afinfo(&nft_af_ipv6); } - static void __exit nf_tables_ipv6_exit(void) { nft_unregister_afinfo(&nft_af_ipv6); + nft_unregister_chain_type(&filter_ipv6); } module_init(nf_tables_ipv6_init); diff --git a/net/ipv6/netfilter/nf_table_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c similarity index 65% rename from net/ipv6/netfilter/nf_table_route_ipv6.c rename to net/ipv6/netfilter/nft_chain_route_ipv6.c index 48ac65c7b398..4cdc992fa067 100644 --- a/net/ipv6/netfilter/nf_table_route_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2012 Pablo Neira Ayuso * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -52,42 +53,30 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, return ret; } -static struct nft_base_chain nf_chain_route_output __read_mostly = { - .chain = { - .name = "OUTPUT", - .rules = LIST_HEAD_INIT(nf_chain_route_output.chain.rules), - .flags = NFT_BASE_CHAIN | NFT_CHAIN_BUILTIN, - }, - .ops = { - .hook = nf_route_table_hook, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_LOCAL_OUT, - .priority = NF_IP6_PRI_MANGLE, - .priv = &nf_chain_route_output.chain, - }, +static struct nf_chain_type nft_chain_route_ipv6 = { + .family = NFPROTO_IPV6, + .name = "route", + .type = NFT_CHAIN_T_ROUTE, + .hook_mask = (1 << NF_INET_LOCAL_OUT), + .fn = { + [NF_INET_LOCAL_OUT] = nf_route_table_hook, + }, + .me = THIS_MODULE, }; -static struct nft_table nf_table_route_ipv6 __read_mostly = { - .name = "route", - .chains = LIST_HEAD_INIT(nf_table_route_ipv6.chains), -}; - -static int __init nf_table_route_init(void) +static int __init nft_chain_route_init(void) { - list_add_tail(&nf_chain_route_output.chain.list, - &nf_table_route_ipv6.chains); - return nft_register_table(&nf_table_route_ipv6, NFPROTO_IPV6); + return nft_register_chain_type(&nft_chain_route_ipv6); } -static void __exit nf_table_route_exit(void) +static void __exit nft_chain_route_exit(void) { - nft_unregister_table(&nf_table_route_ipv6, NFPROTO_IPV6); + nft_unregister_chain_type(&nft_chain_route_ipv6); } -module_init(nf_table_route_init); -module_exit(nf_table_route_exit); +module_init(nft_chain_route_init); +module_exit(nft_chain_route_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy "); -MODULE_ALIAS_NFT_TABLE(AF_INET6, "route"); +MODULE_ALIAS_NFT_CHAIN(AF_INET6, "route"); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 6dac9a3c9c40..9c2d8d5af843 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -104,8 +104,7 @@ static struct nft_table *nft_table_lookup(const struct nft_af_info *afi, } static struct nft_table *nf_tables_table_lookup(const struct nft_af_info *afi, - const struct nlattr *nla, - bool autoload) + const struct nlattr *nla) { struct nft_table *table; @@ -116,16 +115,6 @@ static struct nft_table *nf_tables_table_lookup(const struct nft_af_info *afi, if (table != NULL) return table; -#ifdef CONFIG_MODULES - if (autoload) { - nfnl_unlock(NFNL_SUBSYS_NFTABLES); - request_module("nft-table-%u-%*.s", afi->family, - nla_len(nla)-1, (const char *)nla_data(nla)); - nfnl_lock(NFNL_SUBSYS_NFTABLES); - if (nft_table_lookup(afi, nla)) - return ERR_PTR(-EAGAIN); - } -#endif return ERR_PTR(-ENOENT); } @@ -134,6 +123,39 @@ static inline u64 nf_tables_alloc_handle(struct nft_table *table) return ++table->hgenerator; } +static struct nf_chain_type *chain_type[AF_MAX][NFT_CHAIN_T_MAX]; + +static int __nf_tables_chain_type_lookup(int family, const struct nlattr *nla) +{ + int i; + + for (i=0; iname)) + return i; + } + return -1; +} + +static int nf_tables_chain_type_lookup(const struct nft_af_info *afi, + const struct nlattr *nla, + bool autoload) +{ + int type; + + type = __nf_tables_chain_type_lookup(afi->family, nla); +#ifdef CONFIG_MODULES + if (type < 0 && autoload) { + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + request_module("nft-chain-%u-%*.s", afi->family, + nla_len(nla)-1, (const char *)nla_data(nla)); + nfnl_lock(NFNL_SUBSYS_NFTABLES); + type = __nf_tables_chain_type_lookup(afi->family, nla); + } +#endif + return type; +} + static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = { [NFTA_TABLE_NAME] = { .type = NLA_STRING }, }; @@ -258,7 +280,7 @@ static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], false); + table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]); if (IS_ERR(table)) return PTR_ERR(table); @@ -294,7 +316,7 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, return PTR_ERR(afi); name = nla[NFTA_TABLE_NAME]; - table = nf_tables_table_lookup(afi, name, false); + table = nf_tables_table_lookup(afi, name); if (IS_ERR(table)) { if (PTR_ERR(table) != -ENOENT) return PTR_ERR(table); @@ -335,13 +357,10 @@ static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], false); + table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]); if (IS_ERR(table)) return PTR_ERR(table); - if (table->flags & NFT_TABLE_BUILTIN) - return -EOPNOTSUPP; - if (table->use) return -EBUSY; @@ -351,99 +370,34 @@ static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb, return 0; } -static struct nft_table *__nf_tables_table_lookup(const struct nft_af_info *afi, - const char *name) +int nft_register_chain_type(struct nf_chain_type *ctype) { - struct nft_table *table; - - list_for_each_entry(table, &afi->tables, list) { - if (!strcmp(name, table->name)) - return table; - } - - return ERR_PTR(-ENOENT); -} - -static int nf_tables_chain_notify(const struct sk_buff *oskb, - const struct nlmsghdr *nlh, - const struct nft_table *table, - const struct nft_chain *chain, - int event, int family); - -/** - * nft_register_table - register a built-in table - * - * @table: the table to register - * @family: protocol family to register table with - * - * Register a built-in table for use with nf_tables. Returns zero on - * success or a negative errno code otherwise. - */ -int nft_register_table(struct nft_table *table, int family) -{ - struct nft_af_info *afi; - struct nft_table *t; - struct nft_chain *chain; - int err; + int err = 0; nfnl_lock(NFNL_SUBSYS_NFTABLES); -again: - afi = nf_tables_afinfo_lookup(family, true); - if (IS_ERR(afi)) { - err = PTR_ERR(afi); - if (err == -EAGAIN) - goto again; - goto err; + if (chain_type[ctype->family][ctype->type] != NULL) { + err = -EBUSY; + goto out; } - t = __nf_tables_table_lookup(afi, table->name); - if (IS_ERR(t)) { - err = PTR_ERR(t); - if (err != -ENOENT) - goto err; - t = NULL; - } + if (!try_module_get(ctype->me)) + goto out; - if (t != NULL) { - err = -EEXIST; - goto err; - } - - table->flags |= NFT_TABLE_BUILTIN; - INIT_LIST_HEAD(&table->sets); - list_add_tail(&table->list, &afi->tables); - nf_tables_table_notify(NULL, NULL, table, NFT_MSG_NEWTABLE, family); - list_for_each_entry(chain, &table->chains, list) - nf_tables_chain_notify(NULL, NULL, table, chain, - NFT_MSG_NEWCHAIN, family); - err = 0; -err: + chain_type[ctype->family][ctype->type] = ctype; +out: nfnl_unlock(NFNL_SUBSYS_NFTABLES); return err; } -EXPORT_SYMBOL_GPL(nft_register_table); +EXPORT_SYMBOL_GPL(nft_register_chain_type); -/** - * nft_unregister_table - unregister a built-in table - * - * @table: the table to unregister - * @family: protocol family to unregister table with - * - * Unregister a built-in table for use with nf_tables. - */ -void nft_unregister_table(struct nft_table *table, int family) +void nft_unregister_chain_type(struct nf_chain_type *ctype) { - struct nft_chain *chain; - nfnl_lock(NFNL_SUBSYS_NFTABLES); - list_del(&table->list); - list_for_each_entry(chain, &table->chains, list) - nf_tables_chain_notify(NULL, NULL, table, chain, - NFT_MSG_DELCHAIN, family); - nf_tables_table_notify(NULL, NULL, table, NFT_MSG_DELTABLE, family); + chain_type[ctype->family][ctype->type] = NULL; + module_put(ctype->me); nfnl_unlock(NFNL_SUBSYS_NFTABLES); } -EXPORT_SYMBOL_GPL(nft_unregister_table); +EXPORT_SYMBOL_GPL(nft_unregister_chain_type); /* * Chains @@ -484,6 +438,7 @@ static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = { [NFTA_CHAIN_NAME] = { .type = NLA_STRING, .len = NFT_CHAIN_MAXNAMELEN - 1 }, [NFTA_CHAIN_HOOK] = { .type = NLA_NESTED }, + [NFTA_CHAIN_TYPE] = { .type = NLA_NUL_STRING }, }; static const struct nla_policy nft_hook_policy[NFTA_HOOK_MAX + 1] = { @@ -526,6 +481,10 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, u32 portid, u32 seq, if (nla_put_be32(skb, NFTA_HOOK_PRIORITY, htonl(ops->priority))) goto nla_put_failure; nla_nest_end(skb, nest); + + if (nla_put_string(skb, NFTA_CHAIN_TYPE, + chain_type[ops->pf][nft_base_chain(chain)->type]->name)) + goto nla_put_failure; } return nlmsg_end(skb, nlh); @@ -633,7 +592,7 @@ static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], false); + table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); @@ -680,7 +639,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], create); + table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); @@ -722,6 +681,17 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, if (nla[NFTA_CHAIN_HOOK]) { struct nf_hook_ops *ops; + nf_hookfn *hookfn; + u32 hooknum; + int type = NFT_CHAIN_T_DEFAULT; + + if (nla[NFTA_CHAIN_TYPE]) { + type = nf_tables_chain_type_lookup(afi, + nla[NFTA_CHAIN_TYPE], + create); + if (type < 0) + return -ENOENT; + } err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK], nft_hook_policy); @@ -730,12 +700,20 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, if (ha[NFTA_HOOK_HOOKNUM] == NULL || ha[NFTA_HOOK_PRIORITY] == NULL) return -EINVAL; - if (ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM])) >= afi->nhooks) + + hooknum = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM])); + if (hooknum >= afi->nhooks) return -EINVAL; + hookfn = chain_type[family][type]->fn[hooknum]; + if (hookfn == NULL) + return -EOPNOTSUPP; + basechain = kzalloc(sizeof(*basechain), GFP_KERNEL); if (basechain == NULL) return -ENOMEM; + + basechain->type = type; chain = &basechain->chain; ops = &basechain->ops; @@ -744,7 +722,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, ops->hooknum = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM])); ops->priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY])); ops->priv = chain; - ops->hook = nft_do_chain; + ops->hook = hookfn; if (afi->hooks[ops->hooknum]) ops->hook = afi->hooks[ops->hooknum]; @@ -793,7 +771,7 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], false); + table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); @@ -801,9 +779,6 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(chain)) return PTR_ERR(chain); - if (chain->flags & NFT_CHAIN_BUILTIN) - return -EOPNOTSUPP; - if (!list_empty(&chain->rules)) return -EBUSY; @@ -1190,7 +1165,7 @@ static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], false); + table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); @@ -1268,7 +1243,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], create); + table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); @@ -1374,7 +1349,7 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], false); + table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); @@ -1490,7 +1465,7 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, return PTR_ERR(afi); if (nla[NFTA_SET_TABLE] != NULL) { - table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE], false); + table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); } @@ -1820,7 +1795,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE], create); + table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); @@ -2008,7 +1983,7 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE], false); + table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); From 0ca743a5599199152a31a7146b83213c786c2eb2 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 14 Oct 2013 00:06:06 +0200 Subject: [PATCH 09/17] netfilter: nf_tables: add compatibility layer for x_tables This patch adds the x_tables compatibility layer. This allows you to use existing x_tables matches and targets from nf_tables. This compatibility later allows us to use existing matches/targets for features that are still missing in nf_tables. We can progressively replace them with native nf_tables extensions. It also provides the userspace compatibility software that allows you to express the rule-set using the iptables syntax but using the nf_tables kernel components. In order to get this compatibility layer working, I've done the following things: * add NFNL_SUBSYS_NFT_COMPAT: this new nfnetlink subsystem is used to query the x_tables match/target revision, so we don't need to use the native x_table getsockopt interface. * emulate xt structures: this required extending the struct nft_pktinfo to include the fragment offset, which is already obtained from ip[6]_tables and that is used by some matches/targets. * add support for default policy to base chains, required to emulate x_tables. * add NFTA_CHAIN_USE attribute to obtain the number of references to chains, required by x_tables emulation. * add chain packet/byte counters using per-cpu. * support 32-64 bits compat. For historical reasons, this patch includes the following patches that were posted in the netfilter-devel mailing list. From Pablo Neira Ayuso: * nf_tables: add default policy to base chains * netfilter: nf_tables: add NFTA_CHAIN_USE attribute * nf_tables: nft_compat: private data of target and matches in contiguous area * nf_tables: validate hooks for compat match/target * nf_tables: nft_compat: release cached matches/targets * nf_tables: x_tables support as a compile time option * nf_tables: fix alias for xtables over nftables module * nf_tables: add packet and byte counters per chain * nf_tables: fix per-chain counter stats if no counters are passed * nf_tables: don't bump chain stats * nf_tables: add protocol and flags for xtables over nf_tables * nf_tables: add ip[6]t_entry emulation * nf_tables: move specific layer 3 compat code to nf_tables_ipv[4|6] * nf_tables: support 32bits-64bits x_tables compat * nf_tables: fix compilation if CONFIG_COMPAT is disabled From Patrick McHardy: * nf_tables: move policy to struct nft_base_chain * nf_tables: send notifications for base chain policy changes From Alexander Primak: * nf_tables: remove the duplicate NF_INET_LOCAL_OUT From Nicolas Dichtel: * nf_tables: fix compilation when nf-netlink is a module Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 44 +- include/net/netfilter/nf_tables_ipv4.h | 23 + include/net/netfilter/nf_tables_ipv6.h | 30 + include/uapi/linux/netfilter/Kbuild | 1 + include/uapi/linux/netfilter/nf_tables.h | 32 + .../uapi/linux/netfilter/nf_tables_compat.h | 38 + include/uapi/linux/netfilter/nfnetlink.h | 3 +- net/ipv4/netfilter/nf_tables_ipv4.c | 32 +- net/ipv4/netfilter/nft_chain_nat_ipv4.c | 6 +- net/ipv4/netfilter/nft_chain_route_ipv4.c | 6 +- net/ipv6/netfilter/nf_tables_ipv6.c | 33 +- net/ipv6/netfilter/nft_chain_route_ipv6.c | 8 +- net/netfilter/Kconfig | 9 + net/netfilter/Makefile | 1 + net/netfilter/nf_tables_api.c | 220 ++++- net/netfilter/nf_tables_core.c | 46 +- net/netfilter/nft_cmp.c | 3 +- net/netfilter/nft_compat.c | 768 ++++++++++++++++++ net/netfilter/nft_immediate.c | 12 +- net/netfilter/nft_payload.c | 4 +- 20 files changed, 1241 insertions(+), 78 deletions(-) create mode 100644 include/net/netfilter/nf_tables_ipv4.h create mode 100644 include/net/netfilter/nf_tables_ipv6.h create mode 100644 include/uapi/linux/netfilter/nf_tables_compat.h create mode 100644 net/netfilter/nft_compat.c diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 8403f7f52e81..a68f45f0fe2e 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -15,8 +16,23 @@ struct nft_pktinfo { u8 hooknum; u8 nhoff; u8 thoff; + /* for x_tables compatibility */ + struct xt_action_param xt; }; +static inline void nft_set_pktinfo(struct nft_pktinfo *pkt, + const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out) +{ + pkt->skb = skb; + pkt->in = pkt->xt.in = in; + pkt->out = pkt->xt.out = out; + pkt->hooknum = pkt->xt.hooknum = ops->hooknum; + pkt->xt.family = ops->pf; +} + struct nft_data { union { u32 data[4]; @@ -57,6 +73,7 @@ static inline void nft_data_debug(const struct nft_data *data) * @afi: address family info * @table: the table the chain is contained in * @chain: the chain the rule is contained in + * @nla: netlink attributes */ struct nft_ctx { const struct sk_buff *skb; @@ -64,6 +81,7 @@ struct nft_ctx { const struct nft_af_info *afi; const struct nft_table *table; const struct nft_chain *chain; + const struct nlattr * const *nla; }; struct nft_data_desc { @@ -235,7 +253,8 @@ extern void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, * @maxattr: highest netlink attribute number */ struct nft_expr_type { - const struct nft_expr_ops *(*select_ops)(const struct nlattr * const tb[]); + const struct nft_expr_ops *(*select_ops)(const struct nft_ctx *, + const struct nlattr * const tb[]); const struct nft_expr_ops *ops; struct list_head list; const char *name; @@ -253,6 +272,8 @@ struct nft_expr_type { * @destroy: destruction function * @dump: function to dump parameters * @type: expression type + * @validate: validate expression, called during loop detection + * @data: extra data to attach to this expression operation */ struct nft_expr; struct nft_expr_ops { @@ -267,8 +288,11 @@ struct nft_expr_ops { void (*destroy)(const struct nft_expr *expr); int (*dump)(struct sk_buff *skb, const struct nft_expr *expr); - const struct nft_data * (*get_verdict)(const struct nft_expr *expr); + int (*validate)(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data); const struct nft_expr_type *type; + void *data; }; #define NFT_EXPR_MAXATTR 16 @@ -368,16 +392,25 @@ enum nft_chain_type { NFT_CHAIN_T_MAX }; +struct nft_stats { + u64 bytes; + u64 pkts; +}; + /** * struct nft_base_chain - nf_tables base chain * * @ops: netfilter hook ops * @type: chain type + * @policy: default policy + * @stats: per-cpu chain stats * @chain: the chain */ struct nft_base_chain { struct nf_hook_ops ops; enum nft_chain_type type; + u8 policy; + struct nft_stats __percpu *stats; struct nft_chain chain; }; @@ -386,11 +419,8 @@ static inline struct nft_base_chain *nft_base_chain(const struct nft_chain *chai return container_of(chain, struct nft_base_chain, chain); } -extern unsigned int nft_do_chain(const struct nf_hook_ops *ops, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)); +extern unsigned int nft_do_chain_pktinfo(struct nft_pktinfo *pkt, + const struct nf_hook_ops *ops); /** * struct nft_table - nf_tables table diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h new file mode 100644 index 000000000000..1be1c2c197ee --- /dev/null +++ b/include/net/netfilter/nf_tables_ipv4.h @@ -0,0 +1,23 @@ +#ifndef _NF_TABLES_IPV4_H_ +#define _NF_TABLES_IPV4_H_ + +#include +#include + +static inline void +nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt, + const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out) +{ + struct iphdr *ip; + + nft_set_pktinfo(pkt, ops, skb, in, out); + + pkt->xt.thoff = ip_hdrlen(pkt->skb); + ip = ip_hdr(pkt->skb); + pkt->xt.fragoff = ntohs(ip->frag_off) & IP_OFFSET; +} + +#endif diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h new file mode 100644 index 000000000000..4a9b88a65963 --- /dev/null +++ b/include/net/netfilter/nf_tables_ipv6.h @@ -0,0 +1,30 @@ +#ifndef _NF_TABLES_IPV6_H_ +#define _NF_TABLES_IPV6_H_ + +#include +#include + +static inline int +nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt, + const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out) +{ + int protohdr, thoff = 0; + unsigned short frag_off; + + nft_set_pktinfo(pkt, ops, skb, in, out); + + protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL); + /* If malformed, drop it */ + if (protohdr < 0) + return -1; + + pkt->xt.thoff = thoff; + pkt->xt.fragoff = frag_off; + + return 0; +} + +#endif diff --git a/include/uapi/linux/netfilter/Kbuild b/include/uapi/linux/netfilter/Kbuild index 6ce0b7f566a7..17c3af2c4bb9 100644 --- a/include/uapi/linux/netfilter/Kbuild +++ b/include/uapi/linux/netfilter/Kbuild @@ -6,6 +6,7 @@ header-y += nf_conntrack_sctp.h header-y += nf_conntrack_tcp.h header-y += nf_conntrack_tuple_common.h header-y += nf_tables.h +header-y += nf_tables_compat.h header-y += nf_nat.h header-y += nfnetlink.h header-y += nfnetlink_acct.h diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 779cf951c8de..1563875e6942 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -115,7 +115,10 @@ enum nft_table_attributes { * @NFTA_CHAIN_HANDLE: numeric handle of the chain (NLA_U64) * @NFTA_CHAIN_NAME: name of the chain (NLA_STRING) * @NFTA_CHAIN_HOOK: hook specification for basechains (NLA_NESTED: nft_hook_attributes) + * @NFTA_CHAIN_POLICY: numeric policy of the chain (NLA_U32) + * @NFTA_CHAIN_USE: number of references to this chain (NLA_U32) * @NFTA_CHAIN_TYPE: type name of the string (NLA_NUL_STRING) + * @NFTA_CHAIN_COUNTERS: counter specification of the chain (NLA_NESTED: nft_counter_attributes) */ enum nft_chain_attributes { NFTA_CHAIN_UNSPEC, @@ -123,7 +126,10 @@ enum nft_chain_attributes { NFTA_CHAIN_HANDLE, NFTA_CHAIN_NAME, NFTA_CHAIN_HOOK, + NFTA_CHAIN_POLICY, + NFTA_CHAIN_USE, NFTA_CHAIN_TYPE, + NFTA_CHAIN_COUNTERS, __NFTA_CHAIN_MAX }; #define NFTA_CHAIN_MAX (__NFTA_CHAIN_MAX - 1) @@ -135,6 +141,7 @@ enum nft_chain_attributes { * @NFTA_RULE_CHAIN: name of the chain containing the rule (NLA_STRING) * @NFTA_RULE_HANDLE: numeric handle of the rule (NLA_U64) * @NFTA_RULE_EXPRESSIONS: list of expressions (NLA_NESTED: nft_expr_attributes) + * @NFTA_RULE_COMPAT: compatibility specifications of the rule (NLA_NESTED: nft_rule_compat_attributes) */ enum nft_rule_attributes { NFTA_RULE_UNSPEC, @@ -142,10 +149,35 @@ enum nft_rule_attributes { NFTA_RULE_CHAIN, NFTA_RULE_HANDLE, NFTA_RULE_EXPRESSIONS, + NFTA_RULE_COMPAT, __NFTA_RULE_MAX }; #define NFTA_RULE_MAX (__NFTA_RULE_MAX - 1) +/** + * enum nft_rule_compat_flags - nf_tables rule compat flags + * + * @NFT_RULE_COMPAT_F_INV: invert the check result + */ +enum nft_rule_compat_flags { + NFT_RULE_COMPAT_F_INV = (1 << 1), + NFT_RULE_COMPAT_F_MASK = NFT_RULE_COMPAT_F_INV, +}; + +/** + * enum nft_rule_compat_attributes - nf_tables rule compat attributes + * + * @NFTA_RULE_COMPAT_PROTO: numerice value of handled protocol (NLA_U32) + * @NFTA_RULE_COMPAT_FLAGS: bitmask of enum nft_rule_compat_flags (NLA_U32) + */ +enum nft_rule_compat_attributes { + NFTA_RULE_COMPAT_UNSPEC, + NFTA_RULE_COMPAT_PROTO, + NFTA_RULE_COMPAT_FLAGS, + __NFTA_RULE_COMPAT_MAX +}; +#define NFTA_RULE_COMPAT_MAX (__NFTA_RULE_COMPAT_MAX - 1) + /** * enum nft_set_flags - nf_tables set flags * diff --git a/include/uapi/linux/netfilter/nf_tables_compat.h b/include/uapi/linux/netfilter/nf_tables_compat.h new file mode 100644 index 000000000000..8310f5f76551 --- /dev/null +++ b/include/uapi/linux/netfilter/nf_tables_compat.h @@ -0,0 +1,38 @@ +#ifndef _NFT_COMPAT_NFNETLINK_H_ +#define _NFT_COMPAT_NFNETLINK_H_ + +enum nft_target_attributes { + NFTA_TARGET_UNSPEC, + NFTA_TARGET_NAME, + NFTA_TARGET_REV, + NFTA_TARGET_INFO, + __NFTA_TARGET_MAX +}; +#define NFTA_TARGET_MAX (__NFTA_TARGET_MAX - 1) + +enum nft_match_attributes { + NFTA_MATCH_UNSPEC, + NFTA_MATCH_NAME, + NFTA_MATCH_REV, + NFTA_MATCH_INFO, + __NFTA_MATCH_MAX +}; +#define NFTA_MATCH_MAX (__NFTA_MATCH_MAX - 1) + +#define NFT_COMPAT_NAME_MAX 32 + +enum { + NFNL_MSG_COMPAT_GET, + NFNL_MSG_COMPAT_MAX +}; + +enum { + NFTA_COMPAT_UNSPEC = 0, + NFTA_COMPAT_NAME, + NFTA_COMPAT_REV, + NFTA_COMPAT_TYPE, + __NFTA_COMPAT_MAX, +}; +#define NFTA_COMPAT_MAX (__NFTA_COMPAT_MAX - 1) + +#endif diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h index d276c3bd55b8..288959404d54 100644 --- a/include/uapi/linux/netfilter/nfnetlink.h +++ b/include/uapi/linux/netfilter/nfnetlink.h @@ -54,6 +54,7 @@ struct nfgenmsg { #define NFNL_SUBSYS_CTNETLINK_TIMEOUT 8 #define NFNL_SUBSYS_CTHELPER 9 #define NFNL_SUBSYS_NFTABLES 10 -#define NFNL_SUBSYS_COUNT 11 +#define NFNL_SUBSYS_NFT_COMPAT 11 +#define NFNL_SUBSYS_COUNT 12 #endif /* _UAPI_NFNETLINK_H */ diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c index 23525c4c0192..c61cffb9b760 100644 --- a/net/ipv4/netfilter/nf_tables_ipv4.c +++ b/net/ipv4/netfilter/nf_tables_ipv4.c @@ -15,6 +15,8 @@ #include #include #include +#include +#include static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops, struct sk_buff *skb, @@ -22,6 +24,8 @@ static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops, const struct net_device *out, int (*okfn)(struct sk_buff *)) { + struct nft_pktinfo pkt; + if (unlikely(skb->len < sizeof(struct iphdr) || ip_hdr(skb)->ihl < sizeof(struct iphdr) / 4)) { if (net_ratelimit()) @@ -29,8 +33,9 @@ static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops, "packet\n"); return NF_ACCEPT; } + nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out); - return nft_do_chain(ops, skb, in, out, okfn); + return nft_do_chain_pktinfo(&pkt, ops); } static struct nft_af_info nft_af_ipv4 __read_mostly = { @@ -42,6 +47,21 @@ static struct nft_af_info nft_af_ipv4 __read_mostly = { }, }; + +static unsigned int +nft_do_chain_ipv4(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct nft_pktinfo pkt; + + nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out); + + return nft_do_chain_pktinfo(&pkt, ops); +} + static struct nf_chain_type filter_ipv4 = { .family = NFPROTO_IPV4, .name = "filter", @@ -52,11 +72,11 @@ static struct nf_chain_type filter_ipv4 = { (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_POST_ROUTING), .fn = { - [NF_INET_LOCAL_IN] = nft_do_chain, - [NF_INET_LOCAL_OUT] = nft_do_chain, - [NF_INET_FORWARD] = nft_do_chain, - [NF_INET_PRE_ROUTING] = nft_do_chain, - [NF_INET_POST_ROUTING] = nft_do_chain, + [NF_INET_LOCAL_IN] = nft_do_chain_ipv4, + [NF_INET_LOCAL_OUT] = nft_ipv4_output, + [NF_INET_FORWARD] = nft_do_chain_ipv4, + [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4, + [NF_INET_POST_ROUTING] = nft_do_chain_ipv4, }, }; diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c index cd286306be85..e09c201adf84 100644 --- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c +++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -181,6 +182,7 @@ static unsigned int nf_nat_fn(const struct nf_hook_ops *ops, struct nf_conn *ct = nf_ct_get(skb, &ctinfo); struct nf_conn_nat *nat; enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum); + struct nft_pktinfo pkt; unsigned int ret; if (ct == NULL || nf_ct_is_untracked(ct)) @@ -213,7 +215,9 @@ static unsigned int nf_nat_fn(const struct nf_hook_ops *ops, if (nf_nat_initialized(ct, maniptype)) break; - ret = nft_do_chain(ops, skb, in, out, okfn); + nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out); + + ret = nft_do_chain_pktinfo(&pkt, ops); if (ret != NF_ACCEPT) return ret; if (!nf_nat_initialized(ct, maniptype)) { diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c index 6b84e097b8fc..4e6bf9a3d7aa 100644 --- a/net/ipv4/netfilter/nft_chain_route_ipv4.c +++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -27,6 +28,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, int (*okfn)(struct sk_buff *)) { unsigned int ret; + struct nft_pktinfo pkt; u32 mark; __be32 saddr, daddr; u_int8_t tos; @@ -37,13 +39,15 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; + nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out); + mark = skb->mark; iph = ip_hdr(skb); saddr = iph->saddr; daddr = iph->daddr; tos = iph->tos; - ret = nft_do_chain(ops, skb, in, out, okfn); + ret = nft_do_chain_pktinfo(&pkt, ops); if (ret != NF_DROP && ret != NF_QUEUE) { iph = ip_hdr(skb); diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c index 3631d6238e6f..42f905a808a3 100644 --- a/net/ipv6/netfilter/nf_tables_ipv6.c +++ b/net/ipv6/netfilter/nf_tables_ipv6.c @@ -14,6 +14,7 @@ #include #include #include +#include static unsigned int nft_ipv6_output(const struct nf_hook_ops *ops, struct sk_buff *skb, @@ -21,14 +22,18 @@ static unsigned int nft_ipv6_output(const struct nf_hook_ops *ops, const struct net_device *out, int (*okfn)(struct sk_buff *)) { + struct nft_pktinfo pkt; + if (unlikely(skb->len < sizeof(struct ipv6hdr))) { if (net_ratelimit()) pr_info("nf_tables_ipv6: ignoring short SOCK_RAW " "packet\n"); return NF_ACCEPT; } + if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0) + return NF_DROP; - return nft_do_chain(ops, skb, in, out, okfn); + return nft_do_chain_pktinfo(&pkt, ops); } static struct nft_af_info nft_af_ipv6 __read_mostly = { @@ -40,6 +45,22 @@ static struct nft_af_info nft_af_ipv6 __read_mostly = { }, }; +static unsigned int +nft_do_chain_ipv6(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct nft_pktinfo pkt; + + /* malformed packet, drop it */ + if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0) + return NF_DROP; + + return nft_do_chain_pktinfo(&pkt, ops); +} + static struct nf_chain_type filter_ipv6 = { .family = NFPROTO_IPV6, .name = "filter", @@ -50,11 +71,11 @@ static struct nf_chain_type filter_ipv6 = { (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_POST_ROUTING), .fn = { - [NF_INET_LOCAL_IN] = nft_do_chain, - [NF_INET_LOCAL_OUT] = nft_do_chain, - [NF_INET_FORWARD] = nft_do_chain, - [NF_INET_PRE_ROUTING] = nft_do_chain, - [NF_INET_POST_ROUTING] = nft_do_chain, + [NF_INET_LOCAL_IN] = nft_do_chain_ipv6, + [NF_INET_LOCAL_OUT] = nft_ipv6_output, + [NF_INET_FORWARD] = nft_do_chain_ipv6, + [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6, + [NF_INET_POST_ROUTING] = nft_do_chain_ipv6, }, }; diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c index 4cdc992fa067..3fe40f0456ad 100644 --- a/net/ipv6/netfilter/nft_chain_route_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c @@ -19,6 +19,7 @@ #include #include #include +#include #include static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, @@ -28,10 +29,15 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, int (*okfn)(struct sk_buff *)) { unsigned int ret; + struct nft_pktinfo pkt; struct in6_addr saddr, daddr; u_int8_t hop_limit; u32 mark, flowlabel; + /* malformed packet, drop it */ + if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0) + return NF_DROP; + /* save source/dest address, mark, hoplimit, flowlabel, priority */ memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr)); memcpy(&daddr, &ipv6_hdr(skb)->daddr, sizeof(daddr)); @@ -41,7 +47,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, /* flowlabel and prio (includes version, which shouldn't change either */ flowlabel = *((u32 *)ipv6_hdr(skb)); - ret = nft_do_chain(ops, skb, in, out, okfn); + ret = nft_do_chain_pktinfo(&pkt, ops); if (ret != NF_DROP && ret != NF_QUEUE && (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) || memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) || diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index aa184a46bbf3..49e362707379 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -450,6 +450,15 @@ config NFT_LIMIT depends on NF_TABLES tristate "Netfilter nf_tables limit module" +config NFT_COMPAT + depends on NF_TABLES + depends on NETFILTER_XTABLES + tristate "Netfilter x_tables over nf_tables module" + help + This is required if you intend to use any of existing + x_tables match/target extensions over the nf_tables + framework. + config NETFILTER_XTABLES tristate "Netfilter Xtables support (required for ip_tables)" default m if NETFILTER_ADVANCED=n diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index b6b78754e4cc..a6781450b6fb 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -70,6 +70,7 @@ nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o obj-$(CONFIG_NF_TABLES) += nf_tables.o +obj-$(CONFIG_NFT_COMPAT) += nft_compat.o obj-$(CONFIG_NFT_EXTHDR) += nft_exthdr.o obj-$(CONFIG_NFT_META) += nft_meta.o obj-$(CONFIG_NFT_CT) += nft_ct.o diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 9c2d8d5af843..61e017b349cb 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -438,7 +438,9 @@ static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = { [NFTA_CHAIN_NAME] = { .type = NLA_STRING, .len = NFT_CHAIN_MAXNAMELEN - 1 }, [NFTA_CHAIN_HOOK] = { .type = NLA_NESTED }, + [NFTA_CHAIN_POLICY] = { .type = NLA_U32 }, [NFTA_CHAIN_TYPE] = { .type = NLA_NUL_STRING }, + [NFTA_CHAIN_COUNTERS] = { .type = NLA_NESTED }, }; static const struct nla_policy nft_hook_policy[NFTA_HOOK_MAX + 1] = { @@ -446,6 +448,33 @@ static const struct nla_policy nft_hook_policy[NFTA_HOOK_MAX + 1] = { [NFTA_HOOK_PRIORITY] = { .type = NLA_U32 }, }; +static int nft_dump_stats(struct sk_buff *skb, struct nft_stats __percpu *stats) +{ + struct nft_stats *cpu_stats, total; + struct nlattr *nest; + int cpu; + + memset(&total, 0, sizeof(total)); + for_each_possible_cpu(cpu) { + cpu_stats = per_cpu_ptr(stats, cpu); + total.pkts += cpu_stats->pkts; + total.bytes += cpu_stats->bytes; + } + nest = nla_nest_start(skb, NFTA_CHAIN_COUNTERS); + if (nest == NULL) + goto nla_put_failure; + + if (nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.pkts)) || + nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes))) + goto nla_put_failure; + + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + return -ENOSPC; +} + static int nf_tables_fill_chain_info(struct sk_buff *skb, u32 portid, u32 seq, int event, u32 flags, int family, const struct nft_table *table, @@ -472,8 +501,11 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, u32 portid, u32 seq, goto nla_put_failure; if (chain->flags & NFT_BASE_CHAIN) { - const struct nf_hook_ops *ops = &nft_base_chain(chain)->ops; - struct nlattr *nest = nla_nest_start(skb, NFTA_CHAIN_HOOK); + const struct nft_base_chain *basechain = nft_base_chain(chain); + const struct nf_hook_ops *ops = &basechain->ops; + struct nlattr *nest; + + nest = nla_nest_start(skb, NFTA_CHAIN_HOOK); if (nest == NULL) goto nla_put_failure; if (nla_put_be32(skb, NFTA_HOOK_HOOKNUM, htonl(ops->hooknum))) @@ -482,11 +514,21 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, u32 portid, u32 seq, goto nla_put_failure; nla_nest_end(skb, nest); + if (nla_put_be32(skb, NFTA_CHAIN_POLICY, + htonl(basechain->policy))) + goto nla_put_failure; + if (nla_put_string(skb, NFTA_CHAIN_TYPE, chain_type[ops->pf][nft_base_chain(chain)->type]->name)) goto nla_put_failure; + + if (nft_dump_stats(skb, nft_base_chain(chain)->stats)) + goto nla_put_failure; } + if (nla_put_be32(skb, NFTA_CHAIN_USE, htonl(chain->use))) + goto nla_put_failure; + return nlmsg_end(skb, nlh); nla_put_failure: @@ -617,6 +659,67 @@ err: return err; } +static int +nf_tables_chain_policy(struct nft_base_chain *chain, const struct nlattr *attr) +{ + switch (ntohl(nla_get_be32(attr))) { + case NF_DROP: + chain->policy = NF_DROP; + break; + case NF_ACCEPT: + chain->policy = NF_ACCEPT; + break; + default: + return -EINVAL; + } + return 0; +} + +static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = { + [NFTA_COUNTER_PACKETS] = { .type = NLA_U64 }, + [NFTA_COUNTER_BYTES] = { .type = NLA_U64 }, +}; + +static int +nf_tables_counters(struct nft_base_chain *chain, const struct nlattr *attr) +{ + struct nlattr *tb[NFTA_COUNTER_MAX+1]; + struct nft_stats __percpu *newstats; + struct nft_stats *stats; + int err; + + err = nla_parse_nested(tb, NFTA_COUNTER_MAX, attr, nft_counter_policy); + if (err < 0) + return err; + + if (!tb[NFTA_COUNTER_BYTES] || !tb[NFTA_COUNTER_PACKETS]) + return -EINVAL; + + newstats = alloc_percpu(struct nft_stats); + if (newstats == NULL) + return -ENOMEM; + + /* Restore old counters on this cpu, no problem. Per-cpu statistics + * are not exposed to userspace. + */ + stats = this_cpu_ptr(newstats); + stats->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES])); + stats->pkts = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS])); + + if (chain->stats) { + /* nfnl_lock is held, add some nfnl function for this, later */ + struct nft_stats __percpu *oldstats = + rcu_dereference_protected(chain->stats, 1); + + rcu_assign_pointer(chain->stats, newstats); + synchronize_rcu(); + free_percpu(oldstats); + } else + rcu_assign_pointer(chain->stats, newstats); + + return 0; +} + static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -626,7 +729,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, const struct nft_af_info *afi; struct nft_table *table; struct nft_chain *chain; - struct nft_base_chain *basechain; + struct nft_base_chain *basechain = NULL; struct nlattr *ha[NFTA_HOOK_MAX + 1]; int family = nfmsg->nfgen_family; u64 handle = 0; @@ -673,6 +776,26 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, !IS_ERR(nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]))) return -EEXIST; + if (nla[NFTA_CHAIN_POLICY]) { + if (!(chain->flags & NFT_BASE_CHAIN)) + return -EOPNOTSUPP; + + err = nf_tables_chain_policy(nft_base_chain(chain), + nla[NFTA_CHAIN_POLICY]); + if (err < 0) + return err; + } + + if (nla[NFTA_CHAIN_COUNTERS]) { + if (!(chain->flags & NFT_BASE_CHAIN)) + return -EOPNOTSUPP; + + err = nf_tables_counters(nft_base_chain(chain), + nla[NFTA_CHAIN_COUNTERS]); + if (err < 0) + return err; + } + if (nla[NFTA_CHAIN_HANDLE] && name) nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN); @@ -727,6 +850,36 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, ops->hook = afi->hooks[ops->hooknum]; chain->flags |= NFT_BASE_CHAIN; + + if (nla[NFTA_CHAIN_POLICY]) { + err = nf_tables_chain_policy(basechain, + nla[NFTA_CHAIN_POLICY]); + if (err < 0) { + free_percpu(basechain->stats); + kfree(basechain); + return err; + } + } else + basechain->policy = NF_ACCEPT; + + if (nla[NFTA_CHAIN_COUNTERS]) { + err = nf_tables_counters(basechain, + nla[NFTA_CHAIN_COUNTERS]); + if (err < 0) { + free_percpu(basechain->stats); + kfree(basechain); + return err; + } + } else { + struct nft_stats __percpu *newstats; + + newstats = alloc_percpu(struct nft_stats); + if (newstats == NULL) + return -ENOMEM; + + rcu_assign_pointer(nft_base_chain(chain)->stats, + newstats); + } } else { chain = kzalloc(sizeof(*chain), GFP_KERNEL); if (chain == NULL) @@ -739,6 +892,15 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, list_add_tail(&chain->list, &table->chains); table->use++; + + if (chain->flags & NFT_BASE_CHAIN) { + err = nf_register_hook(&nft_base_chain(chain)->ops); + if (err < 0) { + free_percpu(basechain->stats); + kfree(basechain); + return err; + } + } notify: nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_NEWCHAIN, family); @@ -751,9 +913,10 @@ static void nf_tables_rcu_chain_destroy(struct rcu_head *head) BUG_ON(chain->use > 0); - if (chain->flags & NFT_BASE_CHAIN) + if (chain->flags & NFT_BASE_CHAIN) { + free_percpu(nft_base_chain(chain)->stats); kfree(nft_base_chain(chain)); - else + } else kfree(chain); } @@ -801,13 +964,15 @@ static void nft_ctx_init(struct nft_ctx *ctx, const struct nlmsghdr *nlh, const struct nft_af_info *afi, const struct nft_table *table, - const struct nft_chain *chain) + const struct nft_chain *chain, + const struct nlattr * const *nla) { ctx->skb = skb; ctx->nlh = nlh; ctx->afi = afi; ctx->table = table; ctx->chain = chain; + ctx->nla = nla; } /* @@ -910,7 +1075,8 @@ struct nft_expr_info { struct nlattr *tb[NFT_EXPR_MAXATTR + 1]; }; -static int nf_tables_expr_parse(const struct nlattr *nla, +static int nf_tables_expr_parse(const struct nft_ctx *ctx, + const struct nlattr *nla, struct nft_expr_info *info) { const struct nft_expr_type *type; @@ -935,7 +1101,8 @@ static int nf_tables_expr_parse(const struct nlattr *nla, memset(info->tb, 0, sizeof(info->tb[0]) * (type->maxattr + 1)); if (type->select_ops != NULL) { - ops = type->select_ops((const struct nlattr * const *)info->tb); + ops = type->select_ops(ctx, + (const struct nlattr * const *)info->tb); if (IS_ERR(ops)) { err = PTR_ERR(ops); goto err1; @@ -1012,6 +1179,7 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = { .len = NFT_CHAIN_MAXNAMELEN - 1 }, [NFTA_RULE_HANDLE] = { .type = NLA_U64 }, [NFTA_RULE_EXPRESSIONS] = { .type = NLA_NESTED }, + [NFTA_RULE_COMPAT] = { .type = NLA_NESTED }, }; static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq, @@ -1269,6 +1437,8 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, handle = nf_tables_alloc_handle(table); } + nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla); + n = 0; size = 0; if (nla[NFTA_RULE_EXPRESSIONS]) { @@ -1278,7 +1448,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, goto err1; if (n == NFT_RULE_MAXEXPRS) goto err1; - err = nf_tables_expr_parse(tmp, &info[n]); + err = nf_tables_expr_parse(&ctx, tmp, &info[n]); if (err < 0) goto err1; size += info[n].ops->size; @@ -1294,7 +1464,6 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, rule->handle = handle; rule->dlen = size; - nft_ctx_init(&ctx, skb, nlh, afi, table, chain); expr = nft_expr_first(rule); for (i = 0; i < n; i++) { err = nf_tables_newexpr(&ctx, &info[i], expr); @@ -1304,13 +1473,6 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, expr = nft_expr_next(expr); } - /* Register hook when first rule is inserted into a base chain */ - if (list_empty(&chain->rules) && chain->flags & NFT_BASE_CHAIN) { - err = nf_register_hook(&nft_base_chain(chain)->ops); - if (err < 0) - goto err2; - } - if (nlh->nlmsg_flags & NLM_F_REPLACE) { list_replace_rcu(&old_rule->list, &rule->list); nf_tables_rule_destroy(old_rule); @@ -1379,10 +1541,6 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, } } - /* Unregister hook when last rule from base chain is deleted */ - if (list_empty(&chain->rules) && chain->flags & NFT_BASE_CHAIN) - nf_unregister_hook(&nft_base_chain(chain)->ops); - return 0; } @@ -1470,7 +1628,7 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, return PTR_ERR(table); } - nft_ctx_init(ctx, skb, nlh, afi, table, NULL); + nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla); return 0; } @@ -1799,7 +1957,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(table)) return PTR_ERR(table); - nft_ctx_init(&ctx, skb, nlh, afi, table, NULL); + nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla); set = nf_tables_set_lookup(table, nla[NFTA_SET_NAME]); if (IS_ERR(set)) { @@ -1987,7 +2145,7 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, if (IS_ERR(table)) return PTR_ERR(table); - nft_ctx_init(ctx, skb, nlh, afi, table, NULL); + nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla); return 0; } @@ -2435,23 +2593,27 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, { const struct nft_rule *rule; const struct nft_expr *expr, *last; - const struct nft_data *data; const struct nft_set *set; struct nft_set_binding *binding; struct nft_set_iter iter; - int err; if (ctx->chain == chain) return -ELOOP; list_for_each_entry(rule, &chain->rules, list) { nft_rule_for_each_expr(expr, last, rule) { - if (!expr->ops->get_verdict) + const struct nft_data *data = NULL; + int err; + + if (!expr->ops->validate) continue; - data = expr->ops->get_verdict(expr); + err = expr->ops->validate(ctx, expr, &data); + if (err < 0) + return err; + if (data == NULL) - break; + continue; switch (data->verdict) { case NFT_JUMP: diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 9aede59ed2d7..e51a45c12128 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -60,27 +60,34 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr, return true; } -unsigned int nft_do_chain(const struct nf_hook_ops *ops, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) +struct nft_jumpstack { + const struct nft_chain *chain; + const struct nft_rule *rule; +}; + +static inline void +nft_chain_stats(const struct nft_chain *this, const struct nft_pktinfo *pkt, + struct nft_jumpstack *jumpstack, unsigned int stackptr) +{ + struct nft_stats __percpu *stats; + const struct nft_chain *chain = stackptr ? jumpstack[0].chain : this; + + rcu_read_lock_bh(); + stats = rcu_dereference(nft_base_chain(chain)->stats); + __this_cpu_inc(stats->pkts); + __this_cpu_add(stats->bytes, pkt->skb->len); + rcu_read_unlock_bh(); +} + +unsigned int +nft_do_chain_pktinfo(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) { const struct nft_chain *chain = ops->priv; const struct nft_rule *rule; const struct nft_expr *expr, *last; struct nft_data data[NFT_REG_MAX + 1]; - const struct nft_pktinfo pkt = { - .skb = skb, - .in = in, - .out = out, - .hooknum = ops->hooknum, - }; unsigned int stackptr = 0; - struct { - const struct nft_chain *chain; - const struct nft_rule *rule; - } jumpstack[NFT_JUMP_STACK_SIZE]; + struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE]; do_chain: rule = list_entry(&chain->rules, struct nft_rule, list); @@ -91,8 +98,8 @@ next_rule: if (expr->ops == &nft_cmp_fast_ops) nft_cmp_fast_eval(expr, data); else if (expr->ops != &nft_payload_fast_ops || - !nft_payload_fast_eval(expr, data, &pkt)) - expr->ops->eval(expr, data, &pkt); + !nft_payload_fast_eval(expr, data, pkt)) + expr->ops->eval(expr, data, pkt); if (data[NFT_REG_VERDICT].verdict != NFT_CONTINUE) break; @@ -135,10 +142,11 @@ next_rule: rule = jumpstack[stackptr].rule; goto next_rule; } + nft_chain_stats(chain, pkt, jumpstack, stackptr); - return NF_ACCEPT; + return nft_base_chain(chain)->policy; } -EXPORT_SYMBOL_GPL(nft_do_chain); +EXPORT_SYMBOL_GPL(nft_do_chain_pktinfo); int __init nf_tables_core_module_init(void) { diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index 37134f3e84fb..954925db414d 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -162,7 +162,8 @@ const struct nft_expr_ops nft_cmp_fast_ops = { .dump = nft_cmp_fast_dump, }; -static const struct nft_expr_ops *nft_cmp_select_ops(const struct nlattr * const tb[]) +static const struct nft_expr_ops * +nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_data_desc desc; struct nft_data data; diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c new file mode 100644 index 000000000000..4811f762e060 --- /dev/null +++ b/net/netfilter/nft_compat.c @@ -0,0 +1,768 @@ +/* + * (C) 2012-2013 by Pablo Neira Ayuso + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This software has been sponsored by Sophos Astaro + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* for set_fs */ +#include + +union nft_entry { + struct ipt_entry e4; + struct ip6t_entry e6; +}; + +static inline void +nft_compat_set_par(struct xt_action_param *par, void *xt, const void *xt_info) +{ + par->target = xt; + par->targinfo = xt_info; + par->hotdrop = false; +} + +static void nft_target_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + void *info = nft_expr_priv(expr); + struct xt_target *target = expr->ops->data; + struct sk_buff *skb = pkt->skb; + int ret; + + nft_compat_set_par((struct xt_action_param *)&pkt->xt, target, info); + + ret = target->target(skb, &pkt->xt); + + if (pkt->xt.hotdrop) + ret = NF_DROP; + + switch(ret) { + case XT_CONTINUE: + data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; + break; + default: + data[NFT_REG_VERDICT].verdict = ret; + break; + } + return; +} + +static const struct nla_policy nft_target_policy[NFTA_TARGET_MAX + 1] = { + [NFTA_TARGET_NAME] = { .type = NLA_NUL_STRING }, + [NFTA_TARGET_REV] = { .type = NLA_U32 }, + [NFTA_TARGET_INFO] = { .type = NLA_BINARY }, +}; + +static void +nft_target_set_tgchk_param(struct xt_tgchk_param *par, + const struct nft_ctx *ctx, + struct xt_target *target, void *info, + union nft_entry *entry, u8 proto, bool inv) +{ + par->net = &init_net; + par->table = ctx->table->name; + switch (ctx->afi->family) { + case AF_INET: + entry->e4.ip.proto = proto; + entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0; + break; + case AF_INET6: + entry->e6.ipv6.proto = proto; + entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0; + break; + } + par->entryinfo = entry; + par->target = target; + par->targinfo = info; + if (ctx->chain->flags & NFT_BASE_CHAIN) { + const struct nft_base_chain *basechain = + nft_base_chain(ctx->chain); + const struct nf_hook_ops *ops = &basechain->ops; + + par->hook_mask = 1 << ops->hooknum; + } + par->family = ctx->afi->family; +} + +static void target_compat_from_user(struct xt_target *t, void *in, void *out) +{ +#ifdef CONFIG_COMPAT + if (t->compat_from_user) { + int pad; + + t->compat_from_user(out, in); + pad = XT_ALIGN(t->targetsize) - t->targetsize; + if (pad > 0) + memset(out + t->targetsize, 0, pad); + } else +#endif + memcpy(out, in, XT_ALIGN(t->targetsize)); +} + +static inline int nft_compat_target_offset(struct xt_target *target) +{ +#ifdef CONFIG_COMPAT + return xt_compat_target_offset(target); +#else + return 0; +#endif +} + +static const struct nla_policy nft_rule_compat_policy[NFTA_RULE_COMPAT_MAX + 1] = { + [NFTA_RULE_COMPAT_PROTO] = { .type = NLA_U32 }, + [NFTA_RULE_COMPAT_FLAGS] = { .type = NLA_U32 }, +}; + +static u8 nft_parse_compat(const struct nlattr *attr, bool *inv) +{ + struct nlattr *tb[NFTA_RULE_COMPAT_MAX+1]; + u32 flags; + int err; + + err = nla_parse_nested(tb, NFTA_RULE_COMPAT_MAX, attr, + nft_rule_compat_policy); + if (err < 0) + return err; + + if (!tb[NFTA_RULE_COMPAT_PROTO] || !tb[NFTA_RULE_COMPAT_FLAGS]) + return -EINVAL; + + flags = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_FLAGS])); + if (flags & ~NFT_RULE_COMPAT_F_MASK) + return -EINVAL; + if (flags & NFT_RULE_COMPAT_F_INV) + *inv = true; + + return ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO])); +} + +static int +nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + void *info = nft_expr_priv(expr); + struct xt_target *target = expr->ops->data; + struct xt_tgchk_param par; + size_t size = XT_ALIGN(nla_len(tb[NFTA_TARGET_INFO])); + u8 proto = 0; + bool inv = false; + union nft_entry e = {}; + int ret; + + target_compat_from_user(target, nla_data(tb[NFTA_TARGET_INFO]), info); + + if (ctx->nla[NFTA_RULE_COMPAT]) + proto = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &inv); + + nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv); + + ret = xt_check_target(&par, size, proto, inv); + if (ret < 0) + goto err; + + /* The standard target cannot be used */ + if (target->target == NULL) { + ret = -EINVAL; + goto err; + } + + return 0; +err: + module_put(target->me); + return ret; +} + +static void +nft_target_destroy(const struct nft_expr *expr) +{ + struct xt_target *target = expr->ops->data; + + module_put(target->me); +} + +static int +target_dump_info(struct sk_buff *skb, const struct xt_target *t, const void *in) +{ + int ret; + +#ifdef CONFIG_COMPAT + if (t->compat_to_user) { + mm_segment_t old_fs; + void *out; + + out = kmalloc(XT_ALIGN(t->targetsize), GFP_ATOMIC); + if (out == NULL) + return -ENOMEM; + + /* We want to reuse existing compat_to_user */ + old_fs = get_fs(); + set_fs(KERNEL_DS); + t->compat_to_user(out, in); + set_fs(old_fs); + ret = nla_put(skb, NFTA_TARGET_INFO, XT_ALIGN(t->targetsize), out); + kfree(out); + } else +#endif + ret = nla_put(skb, NFTA_TARGET_INFO, XT_ALIGN(t->targetsize), in); + + return ret; +} + +static int nft_target_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct xt_target *target = expr->ops->data; + void *info = nft_expr_priv(expr); + + if (nla_put_string(skb, NFTA_TARGET_NAME, target->name) || + nla_put_be32(skb, NFTA_TARGET_REV, htonl(target->revision)) || + target_dump_info(skb, target, info)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + +static int nft_target_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + struct xt_target *target = expr->ops->data; + unsigned int hook_mask = 0; + + if (ctx->chain->flags & NFT_BASE_CHAIN) { + const struct nft_base_chain *basechain = + nft_base_chain(ctx->chain); + const struct nf_hook_ops *ops = &basechain->ops; + + hook_mask = 1 << ops->hooknum; + if (hook_mask & target->hooks) + return 0; + + /* This target is being called from an invalid chain */ + return -EINVAL; + } + return 0; +} + +static void nft_match_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + void *info = nft_expr_priv(expr); + struct xt_match *match = expr->ops->data; + struct sk_buff *skb = pkt->skb; + bool ret; + + nft_compat_set_par((struct xt_action_param *)&pkt->xt, match, info); + + ret = match->match(skb, (struct xt_action_param *)&pkt->xt); + + if (pkt->xt.hotdrop) { + data[NFT_REG_VERDICT].verdict = NF_DROP; + return; + } + + switch(ret) { + case true: + data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; + break; + case false: + data[NFT_REG_VERDICT].verdict = NFT_BREAK; + break; + } +} + +static const struct nla_policy nft_match_policy[NFTA_MATCH_MAX + 1] = { + [NFTA_MATCH_NAME] = { .type = NLA_NUL_STRING }, + [NFTA_MATCH_REV] = { .type = NLA_U32 }, + [NFTA_MATCH_INFO] = { .type = NLA_BINARY }, +}; + +/* struct xt_mtchk_param and xt_tgchk_param look very similar */ +static void +nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx, + struct xt_match *match, void *info, + union nft_entry *entry, u8 proto, bool inv) +{ + par->net = &init_net; + par->table = ctx->table->name; + switch (ctx->afi->family) { + case AF_INET: + entry->e4.ip.proto = proto; + entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0; + break; + case AF_INET6: + entry->e6.ipv6.proto = proto; + entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0; + break; + } + par->entryinfo = entry; + par->match = match; + par->matchinfo = info; + if (ctx->chain->flags & NFT_BASE_CHAIN) { + const struct nft_base_chain *basechain = + nft_base_chain(ctx->chain); + const struct nf_hook_ops *ops = &basechain->ops; + + par->hook_mask = 1 << ops->hooknum; + } + par->family = ctx->afi->family; +} + +static void match_compat_from_user(struct xt_match *m, void *in, void *out) +{ +#ifdef CONFIG_COMPAT + if (m->compat_from_user) { + int pad; + + m->compat_from_user(out, in); + pad = XT_ALIGN(m->matchsize) - m->matchsize; + if (pad > 0) + memset(out + m->matchsize, 0, pad); + } else +#endif + memcpy(out, in, XT_ALIGN(m->matchsize)); +} + +static int +nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + void *info = nft_expr_priv(expr); + struct xt_match *match = expr->ops->data; + struct xt_mtchk_param par; + size_t size = XT_ALIGN(nla_len(tb[NFTA_MATCH_INFO])); + u8 proto = 0; + bool inv = false; + union nft_entry e = {}; + int ret; + + match_compat_from_user(match, nla_data(tb[NFTA_MATCH_INFO]), info); + + if (ctx->nla[NFTA_RULE_COMPAT]) + proto = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &inv); + + nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv); + + ret = xt_check_match(&par, size, proto, inv); + if (ret < 0) + goto err; + + return 0; +err: + module_put(match->me); + return ret; +} + +static void +nft_match_destroy(const struct nft_expr *expr) +{ + struct xt_match *match = expr->ops->data; + + module_put(match->me); +} + +static int +match_dump_info(struct sk_buff *skb, const struct xt_match *m, const void *in) +{ + int ret; + +#ifdef CONFIG_COMPAT + if (m->compat_to_user) { + mm_segment_t old_fs; + void *out; + + out = kmalloc(XT_ALIGN(m->matchsize), GFP_ATOMIC); + if (out == NULL) + return -ENOMEM; + + /* We want to reuse existing compat_to_user */ + old_fs = get_fs(); + set_fs(KERNEL_DS); + m->compat_to_user(out, in); + set_fs(old_fs); + ret = nla_put(skb, NFTA_MATCH_INFO, XT_ALIGN(m->matchsize), out); + kfree(out); + } else +#endif + ret = nla_put(skb, NFTA_MATCH_INFO, XT_ALIGN(m->matchsize), in); + + return ret; +} + +static inline int nft_compat_match_offset(struct xt_match *match) +{ +#ifdef CONFIG_COMPAT + return xt_compat_match_offset(match); +#else + return 0; +#endif +} + +static int nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + void *info = nft_expr_priv(expr); + struct xt_match *match = expr->ops->data; + + if (nla_put_string(skb, NFTA_MATCH_NAME, match->name) || + nla_put_be32(skb, NFTA_MATCH_REV, htonl(match->revision)) || + match_dump_info(skb, match, info)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + +static int nft_match_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + struct xt_match *match = expr->ops->data; + unsigned int hook_mask = 0; + + if (ctx->chain->flags & NFT_BASE_CHAIN) { + const struct nft_base_chain *basechain = + nft_base_chain(ctx->chain); + const struct nf_hook_ops *ops = &basechain->ops; + + hook_mask = 1 << ops->hooknum; + if (hook_mask & match->hooks) + return 0; + + /* This match is being called from an invalid chain */ + return -EINVAL; + } + return 0; +} + +static int +nfnl_compat_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, + int event, u16 family, const char *name, + int rev, int target) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + unsigned int flags = portid ? NLM_F_MULTI : 0; + + event |= NFNL_SUBSYS_NFT_COMPAT << 8; + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); + if (nlh == NULL) + goto nlmsg_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = family; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (nla_put_string(skb, NFTA_COMPAT_NAME, name) || + nla_put_be32(skb, NFTA_COMPAT_REV, htonl(rev)) || + nla_put_be32(skb, NFTA_COMPAT_TYPE, htonl(target))) + goto nla_put_failure; + + nlmsg_end(skb, nlh); + return skb->len; + +nlmsg_failure: +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -1; +} + +static int +nfnl_compat_get(struct sock *nfnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +{ + int ret = 0, target; + struct nfgenmsg *nfmsg; + const char *fmt; + const char *name; + u32 rev; + struct sk_buff *skb2; + + if (tb[NFTA_COMPAT_NAME] == NULL || + tb[NFTA_COMPAT_REV] == NULL || + tb[NFTA_COMPAT_TYPE] == NULL) + return -EINVAL; + + name = nla_data(tb[NFTA_COMPAT_NAME]); + rev = ntohl(nla_get_be32(tb[NFTA_COMPAT_REV])); + target = ntohl(nla_get_be32(tb[NFTA_COMPAT_TYPE])); + + nfmsg = nlmsg_data(nlh); + + switch(nfmsg->nfgen_family) { + case AF_INET: + fmt = "ipt_%s"; + break; + case AF_INET6: + fmt = "ip6t_%s"; + break; + default: + pr_err("nft_compat: unsupported protocol %d\n", + nfmsg->nfgen_family); + return -EINVAL; + } + + try_then_request_module(xt_find_revision(nfmsg->nfgen_family, name, + rev, target, &ret), + fmt, name); + + if (ret < 0) + return ret; + + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (skb2 == NULL) + return -ENOMEM; + + /* include the best revision for this extension in the message */ + if (nfnl_compat_fill_info(skb2, NETLINK_CB(skb).portid, + nlh->nlmsg_seq, + NFNL_MSG_TYPE(nlh->nlmsg_type), + NFNL_MSG_COMPAT_GET, + nfmsg->nfgen_family, + name, ret, target) <= 0) { + kfree_skb(skb2); + return -ENOSPC; + } + + ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid, + MSG_DONTWAIT); + if (ret > 0) + ret = 0; + + return ret == -EAGAIN ? -ENOBUFS : ret; +} + +static const struct nla_policy nfnl_compat_policy_get[NFTA_COMPAT_MAX+1] = { + [NFTA_COMPAT_NAME] = { .type = NLA_NUL_STRING, + .len = NFT_COMPAT_NAME_MAX-1 }, + [NFTA_COMPAT_REV] = { .type = NLA_U32 }, + [NFTA_COMPAT_TYPE] = { .type = NLA_U32 }, +}; + +static const struct nfnl_callback nfnl_nft_compat_cb[NFNL_MSG_COMPAT_MAX] = { + [NFNL_MSG_COMPAT_GET] = { .call = nfnl_compat_get, + .attr_count = NFTA_COMPAT_MAX, + .policy = nfnl_compat_policy_get }, +}; + +static const struct nfnetlink_subsystem nfnl_compat_subsys = { + .name = "nft-compat", + .subsys_id = NFNL_SUBSYS_NFT_COMPAT, + .cb_count = NFNL_MSG_COMPAT_MAX, + .cb = nfnl_nft_compat_cb, +}; + +static LIST_HEAD(nft_match_list); + +struct nft_xt { + struct list_head head; + struct nft_expr_ops ops; +}; + +static struct nft_expr_type nft_match_type; + +static const struct nft_expr_ops * +nft_match_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + struct nft_xt *nft_match; + struct xt_match *match; + char *mt_name; + __u32 rev, family; + + if (tb[NFTA_MATCH_NAME] == NULL || + tb[NFTA_MATCH_REV] == NULL || + tb[NFTA_MATCH_INFO] == NULL) + return ERR_PTR(-EINVAL); + + mt_name = nla_data(tb[NFTA_MATCH_NAME]); + rev = ntohl(nla_get_be32(tb[NFTA_MATCH_REV])); + family = ctx->afi->family; + + /* Re-use the existing match if it's already loaded. */ + list_for_each_entry(nft_match, &nft_match_list, head) { + struct xt_match *match = nft_match->ops.data; + + if (strcmp(match->name, mt_name) == 0 && + match->revision == rev && match->family == family) + return &nft_match->ops; + } + + match = xt_request_find_match(family, mt_name, rev); + if (IS_ERR(match)) + return ERR_PTR(-ENOENT); + + /* This is the first time we use this match, allocate operations */ + nft_match = kzalloc(sizeof(struct nft_xt), GFP_KERNEL); + if (nft_match == NULL) + return ERR_PTR(-ENOMEM); + + nft_match->ops.type = &nft_match_type; + nft_match->ops.size = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize) + + nft_compat_match_offset(match)); + nft_match->ops.eval = nft_match_eval; + nft_match->ops.init = nft_match_init; + nft_match->ops.destroy = nft_match_destroy; + nft_match->ops.dump = nft_match_dump; + nft_match->ops.validate = nft_match_validate; + nft_match->ops.data = match; + + list_add(&nft_match->head, &nft_match_list); + + return &nft_match->ops; +} + +static void nft_match_release(void) +{ + struct nft_xt *nft_match; + + list_for_each_entry(nft_match, &nft_match_list, head) + kfree(nft_match); +} + +static struct nft_expr_type nft_match_type __read_mostly = { + .name = "match", + .select_ops = nft_match_select_ops, + .policy = nft_match_policy, + .maxattr = NFTA_MATCH_MAX, + .owner = THIS_MODULE, +}; + +static LIST_HEAD(nft_target_list); + +static struct nft_expr_type nft_target_type; + +static const struct nft_expr_ops * +nft_target_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + struct nft_xt *nft_target; + struct xt_target *target; + char *tg_name; + __u32 rev, family; + + if (tb[NFTA_TARGET_NAME] == NULL || + tb[NFTA_TARGET_REV] == NULL || + tb[NFTA_TARGET_INFO] == NULL) + return ERR_PTR(-EINVAL); + + tg_name = nla_data(tb[NFTA_TARGET_NAME]); + rev = ntohl(nla_get_be32(tb[NFTA_TARGET_REV])); + family = ctx->afi->family; + + /* Re-use the existing target if it's already loaded. */ + list_for_each_entry(nft_target, &nft_match_list, head) { + struct xt_target *target = nft_target->ops.data; + + if (strcmp(target->name, tg_name) == 0 && + target->revision == rev && target->family == family) + return &nft_target->ops; + } + + target = xt_request_find_target(family, tg_name, rev); + if (IS_ERR(target)) + return ERR_PTR(-ENOENT); + + /* This is the first time we use this target, allocate operations */ + nft_target = kzalloc(sizeof(struct nft_xt), GFP_KERNEL); + if (nft_target == NULL) + return ERR_PTR(-ENOMEM); + + nft_target->ops.type = &nft_target_type; + nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize) + + nft_compat_target_offset(target)); + nft_target->ops.eval = nft_target_eval; + nft_target->ops.init = nft_target_init; + nft_target->ops.destroy = nft_target_destroy; + nft_target->ops.dump = nft_target_dump; + nft_target->ops.validate = nft_target_validate; + nft_target->ops.data = target; + + list_add(&nft_target->head, &nft_target_list); + + return &nft_target->ops; +} + +static void nft_target_release(void) +{ + struct nft_xt *nft_target; + + list_for_each_entry(nft_target, &nft_target_list, head) + kfree(nft_target); +} + +static struct nft_expr_type nft_target_type __read_mostly = { + .name = "target", + .select_ops = nft_target_select_ops, + .policy = nft_target_policy, + .maxattr = NFTA_TARGET_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_compat_module_init(void) +{ + int ret; + + ret = nft_register_expr(&nft_match_type); + if (ret < 0) + return ret; + + ret = nft_register_expr(&nft_target_type); + if (ret < 0) + goto err_match; + + ret = nfnetlink_subsys_register(&nfnl_compat_subsys); + if (ret < 0) { + pr_err("nft_compat: cannot register with nfnetlink.\n"); + goto err_target; + } + + pr_info("nf_tables_compat: (c) 2012 Pablo Neira Ayuso \n"); + + return ret; + +err_target: + nft_unregister_expr(&nft_target_type); +err_match: + nft_unregister_expr(&nft_match_type); + return ret; +} + +static void __exit nft_compat_module_exit(void) +{ + nfnetlink_subsys_unregister(&nfnl_compat_subsys); + nft_unregister_expr(&nft_target_type); + nft_unregister_expr(&nft_match_type); + nft_match_release(); + nft_target_release(); +} + +MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFT_COMPAT); + +module_init(nft_compat_module_init); +module_exit(nft_compat_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso "); +MODULE_ALIAS_NFT_EXPR("match"); +MODULE_ALIAS_NFT_EXPR("target"); diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 1bfeeaf865b6..f169501f1ad4 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -90,14 +90,16 @@ nla_put_failure: return -1; } -static const struct nft_data *nft_immediate_get_verdict(const struct nft_expr *expr) +static int nft_immediate_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) { const struct nft_immediate_expr *priv = nft_expr_priv(expr); if (priv->dreg == NFT_REG_VERDICT) - return &priv->data; - else - return NULL; + *data = &priv->data; + + return 0; } static struct nft_expr_type nft_imm_type; @@ -108,7 +110,7 @@ static const struct nft_expr_ops nft_imm_ops = { .init = nft_immediate_init, .destroy = nft_immediate_destroy, .dump = nft_immediate_dump, - .get_verdict = nft_immediate_get_verdict, + .validate = nft_immediate_validate, }; static struct nft_expr_type nft_imm_type __read_mostly = { diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 7cf13f7e1e94..bc8bdb2c1ba7 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -107,7 +107,9 @@ const struct nft_expr_ops nft_payload_fast_ops = { .dump = nft_payload_dump, }; -static const struct nft_expr_ops *nft_payload_select_ops(const struct nlattr * const tb[]) +static const struct nft_expr_ops * +nft_payload_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) { enum nft_payload_bases base; unsigned int offset, len; From c54032e05bfcbb261f47aaadf8476e864e8712f4 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 11 Oct 2013 10:00:22 +0200 Subject: [PATCH 10/17] netfilter: nf_tables: nft_payload: fix transport header base We cannot use skb->transport_header since it's unset, use pkt->xt.thoff instead. Now possible using information made available through the x_tables compatibility layer. Reported-by: Eric Leblond Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_core.c | 2 +- net/netfilter/nft_payload.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index e51a45c12128..3c13007d80df 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -44,7 +44,7 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr, if (priv->base == NFT_PAYLOAD_NETWORK_HEADER) ptr = skb_network_header(skb); else - ptr = skb_transport_header(skb); + ptr = skb_network_header(skb) + pkt->xt.thoff; ptr += priv->offset; diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index bc8bdb2c1ba7..a2aeb318678f 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -36,7 +36,7 @@ static void nft_payload_eval(const struct nft_expr *expr, offset = skb_network_offset(skb); break; case NFT_PAYLOAD_TRANSPORT_HEADER: - offset = skb_transport_offset(skb); + offset = pkt->xt.thoff; break; default: BUG(); From 9ddf63235749a9efa1fad2eeb74be2ee9b580f8d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 10 Oct 2013 13:26:33 +0200 Subject: [PATCH 11/17] netfilter: nf_tables: add support for dormant tables This patch allows you to temporarily disable an entire table. You can change the state of a dormant table via NFT_MSG_NEWTABLE messages. Using this operation you can wake up a table, so their chains are registered. This provides atomicity at chain level. Thus, the rule-set of one chain is applied at once, avoiding any possible intermediate state in every chain. Still, the chains that belongs to a table are registered consecutively. This also allows you to have inactive tables in the kernel. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 11 +++ net/netfilter/nf_tables_api.c | 97 ++++++++++++++++++++++-- 2 files changed, 101 insertions(+), 7 deletions(-) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 1563875e6942..a9c4bce1988f 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -96,14 +96,25 @@ enum nft_hook_attributes { }; #define NFTA_HOOK_MAX (__NFTA_HOOK_MAX - 1) +/** + * enum nft_table_flags - nf_tables table flags + * + * @NFT_TABLE_F_DORMANT: this table is not active + */ +enum nft_table_flags { + NFT_TABLE_F_DORMANT = 0x1, +}; + /** * enum nft_table_attributes - nf_tables table netlink attributes * * @NFTA_TABLE_NAME: name of the table (NLA_STRING) + * @NFTA_TABLE_FLAGS: bitmask of enum nft_table_flags (NLA_U32) */ enum nft_table_attributes { NFTA_TABLE_UNSPEC, NFTA_TABLE_NAME, + NFTA_TABLE_FLAGS, __NFTA_TABLE_MAX }; #define NFTA_TABLE_MAX (__NFTA_TABLE_MAX - 1) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 61e017b349cb..a4dd7ce5ec3e 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -158,6 +158,7 @@ static int nf_tables_chain_type_lookup(const struct nft_af_info *afi, static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = { [NFTA_TABLE_NAME] = { .type = NLA_STRING }, + [NFTA_TABLE_FLAGS] = { .type = NLA_U32 }, }; static int nf_tables_fill_table_info(struct sk_buff *skb, u32 portid, u32 seq, @@ -177,7 +178,8 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, u32 portid, u32 seq, nfmsg->version = NFNETLINK_V0; nfmsg->res_id = 0; - if (nla_put_string(skb, NFTA_TABLE_NAME, table->name)) + if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) || + nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags))) goto nla_put_failure; return nlmsg_end(skb, nlh); @@ -301,6 +303,74 @@ err: return err; } +static int nf_tables_table_enable(struct nft_table *table) +{ + struct nft_chain *chain; + int err, i = 0; + + list_for_each_entry(chain, &table->chains, list) { + err = nf_register_hook(&nft_base_chain(chain)->ops); + if (err < 0) + goto err; + + i++; + } + return 0; +err: + list_for_each_entry(chain, &table->chains, list) { + if (i-- <= 0) + break; + + nf_unregister_hook(&nft_base_chain(chain)->ops); + } + return err; +} + +static int nf_tables_table_disable(struct nft_table *table) +{ + struct nft_chain *chain; + + list_for_each_entry(chain, &table->chains, list) + nf_unregister_hook(&nft_base_chain(chain)->ops); + + return 0; +} + +static int nf_tables_updtable(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[], + struct nft_af_info *afi, struct nft_table *table) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + int family = nfmsg->nfgen_family, ret = 0; + + if (nla[NFTA_TABLE_FLAGS]) { + __be32 flags; + + flags = ntohl(nla_get_be32(nla[NFTA_TABLE_FLAGS])); + if (flags & ~NFT_TABLE_F_DORMANT) + return -EINVAL; + + if ((flags & NFT_TABLE_F_DORMANT) && + !(table->flags & NFT_TABLE_F_DORMANT)) { + ret = nf_tables_table_disable(table); + if (ret >= 0) + table->flags |= NFT_TABLE_F_DORMANT; + } else if (!(flags & NFT_TABLE_F_DORMANT) && + table->flags & NFT_TABLE_F_DORMANT) { + ret = nf_tables_table_enable(table); + if (ret >= 0) + table->flags &= ~NFT_TABLE_F_DORMANT; + } + if (ret < 0) + goto err; + } + + nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family); +err: + return ret; +} + static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -328,7 +398,7 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, return -EEXIST; if (nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; - return 0; + return nf_tables_updtable(nlsk, skb, nlh, nla, afi, table); } table = kzalloc(sizeof(*table) + nla_len(name), GFP_KERNEL); @@ -339,6 +409,18 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, INIT_LIST_HEAD(&table->chains); INIT_LIST_HEAD(&table->sets); + if (nla[NFTA_TABLE_FLAGS]) { + __be32 flags; + + flags = ntohl(nla_get_be32(nla[NFTA_TABLE_FLAGS])); + if (flags & ~NFT_TABLE_F_DORMANT) { + kfree(table); + return -EINVAL; + } + + table->flags |= flags; + } + list_add_tail(&table->list, &afi->tables); nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family); return 0; @@ -890,10 +972,8 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, chain->handle = nf_tables_alloc_handle(table); nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN); - list_add_tail(&chain->list, &table->chains); - table->use++; - - if (chain->flags & NFT_BASE_CHAIN) { + if (!(table->flags & NFT_TABLE_F_DORMANT) && + chain->flags & NFT_BASE_CHAIN) { err = nf_register_hook(&nft_base_chain(chain)->ops); if (err < 0) { free_percpu(basechain->stats); @@ -901,6 +981,8 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, return err; } } + list_add_tail(&chain->list, &table->chains); + table->use++; notify: nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_NEWCHAIN, family); @@ -948,7 +1030,8 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, list_del(&chain->list); table->use--; - if (chain->flags & NFT_BASE_CHAIN) + if (!(table->flags & NFT_TABLE_F_DORMANT) && + chain->flags & NFT_BASE_CHAIN) nf_unregister_hook(&nft_base_chain(chain)->ops); nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_DELCHAIN, From eb31628e37a0a4e01fffd79dcc7f815d2357f53a Mon Sep 17 00:00:00 2001 From: Tomasz Bursztyka Date: Thu, 10 Oct 2013 13:39:19 +0200 Subject: [PATCH 12/17] netfilter: nf_tables: Add support for IPv6 NAT This patch generalizes the NAT expression to support both IPv4 and IPv6 using the existing IPv4/IPv6 NAT infrastructure. This also adds the NAT chain type for IPv6. This patch collapses the following patches that were posted to the netfilter-devel mailing list, from Tomasz: * nf_tables: Change NFTA_NAT_ attributes to better semantic significance * nf_tables: Split IPv4 NAT into NAT expression and IPv4 NAT chain * nf_tables: Add support for IPv6 NAT expression * nf_tables: Add support for IPv6 NAT chain * nf_tables: Fix up build issue on IPv6 NAT support And, from Pablo Neira Ayuso: * fix missing dependencies in nft_chain_nat Signed-off-by: Tomasz Bursztyka Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 18 +- net/ipv4/netfilter/Kconfig | 1 + net/ipv4/netfilter/nft_chain_nat_ipv4.c | 156 +--------------- net/ipv6/netfilter/Kconfig | 5 + net/ipv6/netfilter/Makefile | 1 + net/ipv6/netfilter/nft_chain_nat_ipv6.c | 211 ++++++++++++++++++++++ net/netfilter/Kconfig | 6 + net/netfilter/Makefile | 1 + net/netfilter/nft_nat.c | 220 +++++++++++++++++++++++ 9 files changed, 457 insertions(+), 162 deletions(-) create mode 100644 net/ipv6/netfilter/nft_chain_nat_ipv6.c create mode 100644 net/netfilter/nft_nat.c diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index a9c4bce1988f..7d4a1992f89c 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -695,18 +695,20 @@ enum nft_nat_types { * enum nft_nat_attributes - nf_tables nat expression netlink attributes * * @NFTA_NAT_TYPE: NAT type (NLA_U32: nft_nat_types) - * @NFTA_NAT_ADDR_MIN: source register of address range start (NLA_U32: nft_registers) - * @NFTA_NAT_ADDR_MAX: source register of address range end (NLA_U32: nft_registers) - * @NFTA_NAT_PROTO_MIN: source register of proto range start (NLA_U32: nft_registers) - * @NFTA_NAT_PROTO_MAX: source register of proto range end (NLA_U32: nft_registers) + * @NFTA_NAT_FAMILY: NAT family (NLA_U32) + * @NFTA_NAT_REG_ADDR_MIN: source register of address range start (NLA_U32: nft_registers) + * @NFTA_NAT_REG_ADDR_MAX: source register of address range end (NLA_U32: nft_registers) + * @NFTA_NAT_REG_PROTO_MIN: source register of proto range start (NLA_U32: nft_registers) + * @NFTA_NAT_REG_PROTO_MAX: source register of proto range end (NLA_U32: nft_registers) */ enum nft_nat_attributes { NFTA_NAT_UNSPEC, NFTA_NAT_TYPE, - NFTA_NAT_ADDR_MIN, - NFTA_NAT_ADDR_MAX, - NFTA_NAT_PROTO_MIN, - NFTA_NAT_PROTO_MAX, + NFTA_NAT_FAMILY, + NFTA_NAT_REG_ADDR_MIN, + NFTA_NAT_REG_ADDR_MAX, + NFTA_NAT_REG_PROTO_MIN, + NFTA_NAT_REG_PROTO_MAX, __NFTA_NAT_MAX }; #define NFTA_NAT_MAX (__NFTA_NAT_MAX - 1) diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index ae65fe98bfbe..1f37ef67f1ac 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -50,6 +50,7 @@ config NFT_CHAIN_ROUTE_IPV4 config NFT_CHAIN_NAT_IPV4 depends on NF_TABLES_IPV4 + depends on NF_NAT_IPV4 && NFT_NAT tristate "IPv4 nf_tables nat chain support" config IP_NF_IPTABLES diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c index e09c201adf84..cf2c792cd971 100644 --- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c +++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2008-2009 Patrick McHardy * Copyright (c) 2012 Pablo Neira Ayuso + * Copyright (c) 2012 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -14,10 +15,8 @@ #include #include #include -#include #include #include -#include #include #include #include @@ -27,147 +26,6 @@ #include #include -struct nft_nat { - enum nft_registers sreg_addr_min:8; - enum nft_registers sreg_addr_max:8; - enum nft_registers sreg_proto_min:8; - enum nft_registers sreg_proto_max:8; - enum nf_nat_manip_type type; -}; - -static void nft_nat_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], - const struct nft_pktinfo *pkt) -{ - const struct nft_nat *priv = nft_expr_priv(expr); - enum ip_conntrack_info ctinfo; - struct nf_conn *ct = nf_ct_get(pkt->skb, &ctinfo); - struct nf_nat_range range; - - memset(&range, 0, sizeof(range)); - if (priv->sreg_addr_min) { - range.min_addr.ip = data[priv->sreg_addr_min].data[0]; - range.max_addr.ip = data[priv->sreg_addr_max].data[0]; - range.flags |= NF_NAT_RANGE_MAP_IPS; - } - - if (priv->sreg_proto_min) { - range.min_proto.all = data[priv->sreg_proto_min].data[0]; - range.max_proto.all = data[priv->sreg_proto_max].data[0]; - range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; - } - - data[NFT_REG_VERDICT].verdict = - nf_nat_setup_info(ct, &range, priv->type); -} - -static const struct nla_policy nft_nat_policy[NFTA_NAT_MAX + 1] = { - [NFTA_NAT_ADDR_MIN] = { .type = NLA_U32 }, - [NFTA_NAT_ADDR_MAX] = { .type = NLA_U32 }, - [NFTA_NAT_PROTO_MIN] = { .type = NLA_U32 }, - [NFTA_NAT_PROTO_MAX] = { .type = NLA_U32 }, - [NFTA_NAT_TYPE] = { .type = NLA_U32 }, -}; - -static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, - const struct nlattr * const tb[]) -{ - struct nft_nat *priv = nft_expr_priv(expr); - int err; - - if (tb[NFTA_NAT_TYPE] == NULL) - return -EINVAL; - - switch (ntohl(nla_get_be32(tb[NFTA_NAT_TYPE]))) { - case NFT_NAT_SNAT: - priv->type = NF_NAT_MANIP_SRC; - break; - case NFT_NAT_DNAT: - priv->type = NF_NAT_MANIP_DST; - break; - default: - return -EINVAL; - } - - if (tb[NFTA_NAT_ADDR_MIN]) { - priv->sreg_addr_min = ntohl(nla_get_be32(tb[NFTA_NAT_ADDR_MIN])); - err = nft_validate_input_register(priv->sreg_addr_min); - if (err < 0) - return err; - } - - if (tb[NFTA_NAT_ADDR_MAX]) { - priv->sreg_addr_max = ntohl(nla_get_be32(tb[NFTA_NAT_ADDR_MAX])); - err = nft_validate_input_register(priv->sreg_addr_max); - if (err < 0) - return err; - } else - priv->sreg_addr_max = priv->sreg_addr_min; - - if (tb[NFTA_NAT_PROTO_MIN]) { - priv->sreg_proto_min = ntohl(nla_get_be32(tb[NFTA_NAT_PROTO_MIN])); - err = nft_validate_input_register(priv->sreg_proto_min); - if (err < 0) - return err; - } - - if (tb[NFTA_NAT_PROTO_MAX]) { - priv->sreg_proto_max = ntohl(nla_get_be32(tb[NFTA_NAT_PROTO_MAX])); - err = nft_validate_input_register(priv->sreg_proto_max); - if (err < 0) - return err; - } else - priv->sreg_proto_max = priv->sreg_proto_min; - - return 0; -} - -static int nft_nat_dump(struct sk_buff *skb, const struct nft_expr *expr) -{ - const struct nft_nat *priv = nft_expr_priv(expr); - - switch (priv->type) { - case NF_NAT_MANIP_SRC: - if (nla_put_be32(skb, NFTA_NAT_TYPE, htonl(NFT_NAT_SNAT))) - goto nla_put_failure; - break; - case NF_NAT_MANIP_DST: - if (nla_put_be32(skb, NFTA_NAT_TYPE, htonl(NFT_NAT_DNAT))) - goto nla_put_failure; - break; - } - - if (nla_put_be32(skb, NFTA_NAT_ADDR_MIN, htonl(priv->sreg_addr_min))) - goto nla_put_failure; - if (nla_put_be32(skb, NFTA_NAT_ADDR_MAX, htonl(priv->sreg_addr_max))) - goto nla_put_failure; - if (nla_put_be32(skb, NFTA_NAT_PROTO_MIN, htonl(priv->sreg_proto_min))) - goto nla_put_failure; - if (nla_put_be32(skb, NFTA_NAT_PROTO_MAX, htonl(priv->sreg_proto_max))) - goto nla_put_failure; - return 0; - -nla_put_failure: - return -1; -} - -static struct nft_expr_type nft_nat_type; -static const struct nft_expr_ops nft_nat_ops = { - .type = &nft_nat_type, - .size = NFT_EXPR_SIZE(sizeof(struct nft_nat)), - .eval = nft_nat_eval, - .init = nft_nat_init, - .dump = nft_nat_dump, -}; - -static struct nft_expr_type nft_nat_type __read_mostly = { - .name = "nat", - .ops = &nft_nat_ops, - .policy = nft_nat_policy, - .maxattr = NFTA_NAT_MAX, - .owner = THIS_MODULE, -}; - /* * NAT chains */ @@ -306,7 +164,7 @@ static unsigned int nf_nat_output(const struct nf_hook_ops *ops, return ret; } -struct nf_chain_type nft_chain_nat_ipv4 = { +static struct nf_chain_type nft_chain_nat_ipv4 = { .family = NFPROTO_IPV4, .name = "nat", .type = NFT_CHAIN_T_NAT, @@ -331,20 +189,11 @@ static int __init nft_chain_nat_init(void) if (err < 0) return err; - err = nft_register_expr(&nft_nat_type); - if (err < 0) - goto err; - return 0; - -err: - nft_unregister_chain_type(&nft_chain_nat_ipv4); - return err; } static void __exit nft_chain_nat_exit(void) { - nft_unregister_expr(&nft_nat_type); nft_unregister_chain_type(&nft_chain_nat_ipv4); } @@ -354,4 +203,3 @@ module_exit(nft_chain_nat_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy "); MODULE_ALIAS_NFT_CHAIN(AF_INET, "nat"); -MODULE_ALIAS_NFT_EXPR("nat"); diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 23833064b7b5..7702f9e90a04 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -33,6 +33,11 @@ config NFT_CHAIN_ROUTE_IPV6 depends on NF_TABLES_IPV6 tristate "IPv6 nf_tables route chain support" +config NFT_CHAIN_NAT_IPV6 + depends on NF_TABLES_IPV6 + depends on NF_NAT_IPV6 && NFT_NAT + tristate "IPv6 nf_tables nat chain support" + config IP6_NF_IPTABLES tristate "IP6 tables support (required for filtering)" depends on INET && IPV6 diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index be4913aa524d..d1b4928f34f7 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -26,6 +26,7 @@ obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o # nf_tables obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o +obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o # matches obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c new file mode 100644 index 000000000000..e86dcd70dc76 --- /dev/null +++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2011 Patrick McHardy + * Copyright (c) 2012 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * IPv6 NAT chains + */ + +static unsigned int nf_nat_ipv6_fn(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + struct nf_conn_nat *nat; + enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum); + __be16 frag_off; + int hdrlen; + u8 nexthdr; + struct nft_pktinfo pkt; + unsigned int ret; + + if (ct == NULL || nf_ct_is_untracked(ct)) + return NF_ACCEPT; + + nat = nfct_nat(ct); + if (nat == NULL) { + /* Conntrack module was loaded late, can't add extension. */ + if (nf_ct_is_confirmed(ct)) + return NF_ACCEPT; + nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); + if (nat == NULL) + return NF_ACCEPT; + } + + switch (ctinfo) { + case IP_CT_RELATED: + case IP_CT_RELATED + IP_CT_IS_REPLY: + nexthdr = ipv6_hdr(skb)->nexthdr; + hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), + &nexthdr, &frag_off); + + if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { + if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo, + ops->hooknum, + hdrlen)) + return NF_DROP; + else + return NF_ACCEPT; + } + /* Fall through */ + case IP_CT_NEW: + if (nf_nat_initialized(ct, maniptype)) + break; + + nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out); + + ret = nft_do_chain_pktinfo(&pkt, ops); + if (ret != NF_ACCEPT) + return ret; + if (!nf_nat_initialized(ct, maniptype)) { + ret = nf_nat_alloc_null_binding(ct, ops->hooknum); + if (ret != NF_ACCEPT) + return ret; + } + default: + break; + } + + return nf_nat_packet(ct, ctinfo, ops->hooknum, skb); +} + +static unsigned int nf_nat_ipv6_prerouting(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct in6_addr daddr = ipv6_hdr(skb)->daddr; + unsigned int ret; + + ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn); + if (ret != NF_DROP && ret != NF_STOLEN && + ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr)) + skb_dst_drop(skb); + + return ret; +} + +static unsigned int nf_nat_ipv6_postrouting(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + enum ip_conntrack_info ctinfo __maybe_unused; + const struct nf_conn *ct __maybe_unused; + unsigned int ret; + + ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn); +#ifdef CONFIG_XFRM + if (ret != NF_DROP && ret != NF_STOLEN && + !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3, + &ct->tuplehash[!dir].tuple.dst.u3) || + (ct->tuplehash[dir].tuple.src.u.all != + ct->tuplehash[!dir].tuple.dst.u.all)) + if (nf_xfrm_me_harder(skb, AF_INET6) < 0) + ret = NF_DROP; + } +#endif + return ret; +} + +static unsigned int nf_nat_ipv6_output(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + enum ip_conntrack_info ctinfo; + const struct nf_conn *ct; + unsigned int ret; + + ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn); + if (ret != NF_DROP && ret != NF_STOLEN && + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, + &ct->tuplehash[!dir].tuple.src.u3)) { + if (ip6_route_me_harder(skb)) + ret = NF_DROP; + } +#ifdef CONFIG_XFRM + else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && + ct->tuplehash[dir].tuple.dst.u.all != + ct->tuplehash[!dir].tuple.src.u.all) + if (nf_xfrm_me_harder(skb, AF_INET6)) + ret = NF_DROP; +#endif + } + return ret; +} + +static struct nf_chain_type nft_chain_nat_ipv6 = { + .family = NFPROTO_IPV6, + .name = "nat", + .type = NFT_CHAIN_T_NAT, + .hook_mask = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_LOCAL_IN), + .fn = { + [NF_INET_PRE_ROUTING] = nf_nat_ipv6_prerouting, + [NF_INET_POST_ROUTING] = nf_nat_ipv6_postrouting, + [NF_INET_LOCAL_OUT] = nf_nat_ipv6_output, + [NF_INET_LOCAL_IN] = nf_nat_ipv6_fn, + }, + .me = THIS_MODULE, +}; + +static int __init nft_chain_nat_ipv6_init(void) +{ + int err; + + err = nft_register_chain_type(&nft_chain_nat_ipv6); + if (err < 0) + return err; + + return 0; +} + +static void __exit nft_chain_nat_ipv6_exit(void) +{ + nft_unregister_chain_type(&nft_chain_nat_ipv6); +} + +module_init(nft_chain_nat_ipv6_init); +module_exit(nft_chain_nat_ipv6_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Tomasz Bursztyka "); +MODULE_ALIAS_NFT_CHAIN(AF_INET6, "nat"); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 49e362707379..48acec17e27a 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -450,6 +450,12 @@ config NFT_LIMIT depends on NF_TABLES tristate "Netfilter nf_tables limit module" +config NFT_NAT + depends on NF_TABLES + depends on NF_CONNTRACK + depends on NF_NAT + tristate "Netfilter nf_tables nat module" + config NFT_COMPAT depends on NF_TABLES depends on NETFILTER_XTABLES diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index a6781450b6fb..394483b2c193 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -75,6 +75,7 @@ obj-$(CONFIG_NFT_EXTHDR) += nft_exthdr.o obj-$(CONFIG_NFT_META) += nft_meta.o obj-$(CONFIG_NFT_CT) += nft_ct.o obj-$(CONFIG_NFT_LIMIT) += nft_limit.o +obj-$(CONFIG_NFT_NAT) += nft_nat.o #nf_tables-objs += nft_meta_target.o obj-$(CONFIG_NFT_RBTREE) += nft_rbtree.o obj-$(CONFIG_NFT_HASH) += nft_hash.o diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c new file mode 100644 index 000000000000..b0b87b2d2411 --- /dev/null +++ b/net/netfilter/nft_nat.c @@ -0,0 +1,220 @@ +/* + * Copyright (c) 2008-2009 Patrick McHardy + * Copyright (c) 2012 Pablo Neira Ayuso + * Copyright (c) 2012 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_nat { + enum nft_registers sreg_addr_min:8; + enum nft_registers sreg_addr_max:8; + enum nft_registers sreg_proto_min:8; + enum nft_registers sreg_proto_max:8; + int family; + enum nf_nat_manip_type type; +}; + +static void nft_nat_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_nat *priv = nft_expr_priv(expr); + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(pkt->skb, &ctinfo); + struct nf_nat_range range; + + memset(&range, 0, sizeof(range)); + if (priv->sreg_addr_min) { + if (priv->family == AF_INET) { + range.min_addr.ip = data[priv->sreg_addr_min].data[0]; + range.max_addr.ip = data[priv->sreg_addr_max].data[0]; + + } else { + memcpy(range.min_addr.ip6, + data[priv->sreg_addr_min].data, + sizeof(struct nft_data)); + memcpy(range.max_addr.ip6, + data[priv->sreg_addr_max].data, + sizeof(struct nft_data)); + } + range.flags |= NF_NAT_RANGE_MAP_IPS; + } + + if (priv->sreg_proto_min) { + range.min_proto.all = data[priv->sreg_proto_min].data[0]; + range.max_proto.all = data[priv->sreg_proto_max].data[0]; + range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; + } + + data[NFT_REG_VERDICT].verdict = + nf_nat_setup_info(ct, &range, priv->type); +} + +static const struct nla_policy nft_nat_policy[NFTA_NAT_MAX + 1] = { + [NFTA_NAT_TYPE] = { .type = NLA_U32 }, + [NFTA_NAT_FAMILY] = { .type = NLA_U32 }, + [NFTA_NAT_REG_ADDR_MIN] = { .type = NLA_U32 }, + [NFTA_NAT_REG_ADDR_MAX] = { .type = NLA_U32 }, + [NFTA_NAT_REG_PROTO_MIN] = { .type = NLA_U32 }, + [NFTA_NAT_REG_PROTO_MAX] = { .type = NLA_U32 }, +}; + +static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_nat *priv = nft_expr_priv(expr); + int err; + + if (tb[NFTA_NAT_TYPE] == NULL) + return -EINVAL; + + switch (ntohl(nla_get_be32(tb[NFTA_NAT_TYPE]))) { + case NFT_NAT_SNAT: + priv->type = NF_NAT_MANIP_SRC; + break; + case NFT_NAT_DNAT: + priv->type = NF_NAT_MANIP_DST; + break; + default: + return -EINVAL; + } + + if (tb[NFTA_NAT_FAMILY] == NULL) + return -EINVAL; + + priv->family = ntohl(nla_get_be32(tb[NFTA_NAT_FAMILY])); + if (priv->family != AF_INET && priv->family != AF_INET6) + return -EINVAL; + + if (tb[NFTA_NAT_REG_ADDR_MIN]) { + priv->sreg_addr_min = ntohl(nla_get_be32( + tb[NFTA_NAT_REG_ADDR_MIN])); + err = nft_validate_input_register(priv->sreg_addr_min); + if (err < 0) + return err; + } + + if (tb[NFTA_NAT_REG_ADDR_MAX]) { + priv->sreg_addr_max = ntohl(nla_get_be32( + tb[NFTA_NAT_REG_ADDR_MAX])); + err = nft_validate_input_register(priv->sreg_addr_max); + if (err < 0) + return err; + } else + priv->sreg_addr_max = priv->sreg_addr_min; + + if (tb[NFTA_NAT_REG_PROTO_MIN]) { + priv->sreg_proto_min = ntohl(nla_get_be32( + tb[NFTA_NAT_REG_PROTO_MIN])); + err = nft_validate_input_register(priv->sreg_proto_min); + if (err < 0) + return err; + } + + if (tb[NFTA_NAT_REG_PROTO_MAX]) { + priv->sreg_proto_max = ntohl(nla_get_be32( + tb[NFTA_NAT_REG_PROTO_MAX])); + err = nft_validate_input_register(priv->sreg_proto_max); + if (err < 0) + return err; + } else + priv->sreg_proto_max = priv->sreg_proto_min; + + return 0; +} + +static int nft_nat_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_nat *priv = nft_expr_priv(expr); + + switch (priv->type) { + case NF_NAT_MANIP_SRC: + if (nla_put_be32(skb, NFTA_NAT_TYPE, htonl(NFT_NAT_SNAT))) + goto nla_put_failure; + break; + case NF_NAT_MANIP_DST: + if (nla_put_be32(skb, NFTA_NAT_TYPE, htonl(NFT_NAT_DNAT))) + goto nla_put_failure; + break; + } + + if (nla_put_be32(skb, NFTA_NAT_FAMILY, htonl(priv->family))) + goto nla_put_failure; + if (nla_put_be32(skb, + NFTA_NAT_REG_ADDR_MIN, htonl(priv->sreg_addr_min))) + goto nla_put_failure; + if (nla_put_be32(skb, + NFTA_NAT_REG_ADDR_MAX, htonl(priv->sreg_addr_max))) + goto nla_put_failure; + if (nla_put_be32(skb, + NFTA_NAT_REG_PROTO_MIN, htonl(priv->sreg_proto_min))) + goto nla_put_failure; + if (nla_put_be32(skb, + NFTA_NAT_REG_PROTO_MAX, htonl(priv->sreg_proto_max))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_nat_type; +static const struct nft_expr_ops nft_nat_ops = { + .type = &nft_nat_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_nat)), + .eval = nft_nat_eval, + .init = nft_nat_init, + .dump = nft_nat_dump, +}; + +static struct nft_expr_type nft_nat_type __read_mostly = { + .name = "nat", + .ops = &nft_nat_ops, + .policy = nft_nat_policy, + .maxattr = NFTA_NAT_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_nat_module_init(void) +{ + int err; + + err = nft_register_expr(&nft_nat_type); + if (err < 0) + return err; + + return 0; +} + +static void __exit nft_nat_module_exit(void) +{ + nft_unregister_expr(&nft_nat_type); +} + +module_init(nft_nat_module_init); +module_exit(nft_nat_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Tomasz Bursztyka "); +MODULE_ALIAS_NFT_EXPR("nat"); From 99633ab29b2131b68089a6c7f60458390860e044 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 10 Oct 2013 23:28:33 +0200 Subject: [PATCH 13/17] netfilter: nf_tables: complete net namespace support Register family per netnamespace to ensure that sets are only visible in its approapriate namespace. Signed-off-by: Pablo Neira Ayuso --- include/net/net_namespace.h | 4 ++ include/net/netfilter/nf_tables.h | 4 +- include/net/netns/nftables.h | 15 +++++ net/bridge/netfilter/nf_tables_bridge.c | 32 +++++++++- net/ipv4/netfilter/nf_tables_ipv4.c | 32 +++++++++- net/ipv6/netfilter/nf_tables_ipv6.c | 33 +++++++++- net/netfilter/nf_tables_api.c | 83 ++++++++++++++++--------- 7 files changed, 168 insertions(+), 35 deletions(-) create mode 100644 include/net/netns/nftables.h diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index bcc4a8ed4450..da68c9a90ac5 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -22,6 +22,7 @@ #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) #include #endif +#include #include struct user_namespace; @@ -101,6 +102,9 @@ struct net { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) struct netns_ct ct; #endif +#if defined(CONFIG_NF_TABLES) || defined(CONFIG_NF_TABLES_MODULE) + struct netns_nftables nft; +#endif #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) struct netns_nf_frag nf_frag; #endif diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index a68f45f0fe2e..d3272e943aac 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -68,6 +68,7 @@ static inline void nft_data_debug(const struct nft_data *data) /** * struct nft_ctx - nf_tables rule/set context * + * @net: net namespace * @skb: netlink skb * @nlh: netlink message header * @afi: address family info @@ -76,6 +77,7 @@ static inline void nft_data_debug(const struct nft_data *data) * @nla: netlink attributes */ struct nft_ctx { + struct net *net; const struct sk_buff *skb; const struct nlmsghdr *nlh; const struct nft_af_info *afi; @@ -462,7 +464,7 @@ struct nft_af_info { nf_hookfn *hooks[NF_MAX_HOOKS]; }; -extern int nft_register_afinfo(struct nft_af_info *); +extern int nft_register_afinfo(struct net *, struct nft_af_info *); extern void nft_unregister_afinfo(struct nft_af_info *); struct nf_chain_type { diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h new file mode 100644 index 000000000000..a98b1c5d9913 --- /dev/null +++ b/include/net/netns/nftables.h @@ -0,0 +1,15 @@ +#ifndef _NETNS_NFTABLES_H_ +#define _NETNS_NFTABLES_H_ + +#include + +struct nft_af_info; + +struct netns_nftables { + struct list_head af_info; + struct nft_af_info *ipv4; + struct nft_af_info *ipv6; + struct nft_af_info *bridge; +}; + +#endif diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c index bc5c21c911c0..e8cb016fa34d 100644 --- a/net/bridge/netfilter/nf_tables_bridge.c +++ b/net/bridge/netfilter/nf_tables_bridge.c @@ -19,14 +19,42 @@ static struct nft_af_info nft_af_bridge __read_mostly = { .owner = THIS_MODULE, }; +static int nf_tables_bridge_init_net(struct net *net) +{ + net->nft.bridge = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL); + if (net->nft.bridge == NULL) + return -ENOMEM; + + memcpy(net->nft.bridge, &nft_af_bridge, sizeof(nft_af_bridge)); + + if (nft_register_afinfo(net, net->nft.bridge) < 0) + goto err; + + return 0; +err: + kfree(net->nft.bridge); + return -ENOMEM; +} + +static void nf_tables_bridge_exit_net(struct net *net) +{ + nft_unregister_afinfo(net->nft.bridge); + kfree(net->nft.bridge); +} + +static struct pernet_operations nf_tables_bridge_net_ops = { + .init = nf_tables_bridge_init_net, + .exit = nf_tables_bridge_exit_net, +}; + static int __init nf_tables_bridge_init(void) { - return nft_register_afinfo(&nft_af_bridge); + return register_pernet_subsys(&nf_tables_bridge_net_ops); } static void __exit nf_tables_bridge_exit(void) { - nft_unregister_afinfo(&nft_af_bridge); + return unregister_pernet_subsys(&nf_tables_bridge_net_ops); } module_init(nf_tables_bridge_init); diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c index c61cffb9b760..8f7536be1322 100644 --- a/net/ipv4/netfilter/nf_tables_ipv4.c +++ b/net/ipv4/netfilter/nf_tables_ipv4.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -47,6 +48,33 @@ static struct nft_af_info nft_af_ipv4 __read_mostly = { }, }; +static int nf_tables_ipv4_init_net(struct net *net) +{ + net->nft.ipv4 = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL); + if (net->nft.ipv4 == NULL) + return -ENOMEM; + + memcpy(net->nft.ipv4, &nft_af_ipv4, sizeof(nft_af_ipv4)); + + if (nft_register_afinfo(net, net->nft.ipv4) < 0) + goto err; + + return 0; +err: + kfree(net->nft.ipv4); + return -ENOMEM; +} + +static void nf_tables_ipv4_exit_net(struct net *net) +{ + nft_unregister_afinfo(net->nft.ipv4); + kfree(net->nft.ipv4); +} + +static struct pernet_operations nf_tables_ipv4_net_ops = { + .init = nf_tables_ipv4_init_net, + .exit = nf_tables_ipv4_exit_net, +}; static unsigned int nft_do_chain_ipv4(const struct nf_hook_ops *ops, @@ -83,12 +111,12 @@ static struct nf_chain_type filter_ipv4 = { static int __init nf_tables_ipv4_init(void) { nft_register_chain_type(&filter_ipv4); - return nft_register_afinfo(&nft_af_ipv4); + return register_pernet_subsys(&nf_tables_ipv4_net_ops); } static void __exit nf_tables_ipv4_exit(void) { - nft_unregister_afinfo(&nft_af_ipv4); + unregister_pernet_subsys(&nf_tables_ipv4_net_ops); nft_unregister_chain_type(&filter_ipv4); } diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c index 42f905a808a3..d77db8a13505 100644 --- a/net/ipv6/netfilter/nf_tables_ipv6.c +++ b/net/ipv6/netfilter/nf_tables_ipv6.c @@ -45,6 +45,34 @@ static struct nft_af_info nft_af_ipv6 __read_mostly = { }, }; +static int nf_tables_ipv6_init_net(struct net *net) +{ + net->nft.ipv6 = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL); + if (net->nft.ipv6 == NULL) + return -ENOMEM; + + memcpy(net->nft.ipv6, &nft_af_ipv6, sizeof(nft_af_ipv6)); + + if (nft_register_afinfo(net, net->nft.ipv6) < 0) + goto err; + + return 0; +err: + kfree(net->nft.ipv6); + return -ENOMEM; +} + +static void nf_tables_ipv6_exit_net(struct net *net) +{ + nft_unregister_afinfo(net->nft.ipv6); + kfree(net->nft.ipv6); +} + +static struct pernet_operations nf_tables_ipv6_net_ops = { + .init = nf_tables_ipv6_init_net, + .exit = nf_tables_ipv6_exit_net, +}; + static unsigned int nft_do_chain_ipv6(const struct nf_hook_ops *ops, struct sk_buff *skb, @@ -82,11 +110,12 @@ static struct nf_chain_type filter_ipv6 = { static int __init nf_tables_ipv6_init(void) { nft_register_chain_type(&filter_ipv6); - return nft_register_afinfo(&nft_af_ipv6); + return register_pernet_subsys(&nf_tables_ipv6_net_ops); } + static void __exit nf_tables_ipv6_exit(void) { - nft_unregister_afinfo(&nft_af_ipv6); + unregister_pernet_subsys(&nf_tables_ipv6_net_ops); nft_unregister_chain_type(&filter_ipv6); } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a4dd7ce5ec3e..e1ee85047ec1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -18,9 +18,9 @@ #include #include #include +#include #include -static LIST_HEAD(nf_tables_afinfo); static LIST_HEAD(nf_tables_expressions); /** @@ -31,11 +31,11 @@ static LIST_HEAD(nf_tables_expressions); * Register the address family for use with nf_tables. Returns zero on * success or a negative errno code otherwise. */ -int nft_register_afinfo(struct nft_af_info *afi) +int nft_register_afinfo(struct net *net, struct nft_af_info *afi) { INIT_LIST_HEAD(&afi->tables); nfnl_lock(NFNL_SUBSYS_NFTABLES); - list_add_tail(&afi->list, &nf_tables_afinfo); + list_add_tail(&afi->list, &net->nft.af_info); nfnl_unlock(NFNL_SUBSYS_NFTABLES); return 0; } @@ -56,22 +56,23 @@ void nft_unregister_afinfo(struct nft_af_info *afi) } EXPORT_SYMBOL_GPL(nft_unregister_afinfo); -static struct nft_af_info *nft_afinfo_lookup(int family) +static struct nft_af_info *nft_afinfo_lookup(struct net *net, int family) { struct nft_af_info *afi; - list_for_each_entry(afi, &nf_tables_afinfo, list) { + list_for_each_entry(afi, &net->nft.af_info, list) { if (afi->family == family) return afi; } return NULL; } -static struct nft_af_info *nf_tables_afinfo_lookup(int family, bool autoload) +static struct nft_af_info * +nf_tables_afinfo_lookup(struct net *net, int family, bool autoload) { struct nft_af_info *afi; - afi = nft_afinfo_lookup(family); + afi = nft_afinfo_lookup(net, family); if (afi != NULL) return afi; #ifdef CONFIG_MODULES @@ -79,7 +80,7 @@ static struct nft_af_info *nf_tables_afinfo_lookup(int family, bool autoload) nfnl_unlock(NFNL_SUBSYS_NFTABLES); request_module("nft-afinfo-%u", family); nfnl_lock(NFNL_SUBSYS_NFTABLES); - afi = nft_afinfo_lookup(family); + afi = nft_afinfo_lookup(net, family); if (afi != NULL) return ERR_PTR(-EAGAIN); } @@ -232,9 +233,10 @@ static int nf_tables_dump_tables(struct sk_buff *skb, const struct nft_af_info *afi; const struct nft_table *table; unsigned int idx = 0, s_idx = cb->args[0]; + struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; - list_for_each_entry(afi, &nf_tables_afinfo, list) { + list_for_each_entry(afi, &net->nft.af_info, list) { if (family != NFPROTO_UNSPEC && family != afi->family) continue; @@ -268,6 +270,7 @@ static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb, const struct nft_af_info *afi; const struct nft_table *table; struct sk_buff *skb2; + struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; int err; @@ -278,7 +281,7 @@ static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb, return netlink_dump_start(nlsk, skb, nlh, &c); } - afi = nf_tables_afinfo_lookup(family, false); + afi = nf_tables_afinfo_lookup(net, family, false); if (IS_ERR(afi)) return PTR_ERR(afi); @@ -379,9 +382,10 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, const struct nlattr *name; struct nft_af_info *afi; struct nft_table *table; + struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; - afi = nf_tables_afinfo_lookup(family, true); + afi = nf_tables_afinfo_lookup(net, family, true); if (IS_ERR(afi)) return PTR_ERR(afi); @@ -433,9 +437,10 @@ static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb, const struct nfgenmsg *nfmsg = nlmsg_data(nlh); struct nft_af_info *afi; struct nft_table *table; + struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; - afi = nf_tables_afinfo_lookup(family, false); + afi = nf_tables_afinfo_lookup(net, family, false); if (IS_ERR(afi)) return PTR_ERR(afi); @@ -663,9 +668,10 @@ static int nf_tables_dump_chains(struct sk_buff *skb, const struct nft_table *table; const struct nft_chain *chain; unsigned int idx = 0, s_idx = cb->args[0]; + struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; - list_for_each_entry(afi, &nf_tables_afinfo, list) { + list_for_each_entry(afi, &net->nft.af_info, list) { if (family != NFPROTO_UNSPEC && family != afi->family) continue; @@ -702,6 +708,7 @@ static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb, const struct nft_table *table; const struct nft_chain *chain; struct sk_buff *skb2; + struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; int err; @@ -712,7 +719,7 @@ static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb, return netlink_dump_start(nlsk, skb, nlh, &c); } - afi = nf_tables_afinfo_lookup(family, false); + afi = nf_tables_afinfo_lookup(net, family, false); if (IS_ERR(afi)) return PTR_ERR(afi); @@ -813,6 +820,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, struct nft_chain *chain; struct nft_base_chain *basechain = NULL; struct nlattr *ha[NFTA_HOOK_MAX + 1]; + struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; u64 handle = 0; int err; @@ -820,7 +828,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; - afi = nf_tables_afinfo_lookup(family, true); + afi = nf_tables_afinfo_lookup(net, family, true); if (IS_ERR(afi)) return PTR_ERR(afi); @@ -1010,9 +1018,10 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, const struct nft_af_info *afi; struct nft_table *table; struct nft_chain *chain; + struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; - afi = nf_tables_afinfo_lookup(family, false); + afi = nf_tables_afinfo_lookup(net, family, false); if (IS_ERR(afi)) return PTR_ERR(afi); @@ -1050,6 +1059,7 @@ static void nft_ctx_init(struct nft_ctx *ctx, const struct nft_chain *chain, const struct nlattr * const *nla) { + ctx->net = sock_net(skb->sk); ctx->skb = skb; ctx->nlh = nlh; ctx->afi = afi; @@ -1361,9 +1371,10 @@ static int nf_tables_dump_rules(struct sk_buff *skb, const struct nft_chain *chain; const struct nft_rule *rule; unsigned int idx = 0, s_idx = cb->args[0]; + struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; - list_for_each_entry(afi, &nf_tables_afinfo, list) { + list_for_each_entry(afi, &net->nft.af_info, list) { if (family != NFPROTO_UNSPEC && family != afi->family) continue; @@ -1402,6 +1413,7 @@ static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb, const struct nft_chain *chain; const struct nft_rule *rule; struct sk_buff *skb2; + struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; int err; @@ -1412,7 +1424,7 @@ static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb, return netlink_dump_start(nlsk, skb, nlh, &c); } - afi = nf_tables_afinfo_lookup(family, false); + afi = nf_tables_afinfo_lookup(net, family, false); if (IS_ERR(afi)) return PTR_ERR(afi); @@ -1477,6 +1489,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); const struct nft_af_info *afi; + struct net *net = sock_net(skb->sk); struct nft_table *table; struct nft_chain *chain; struct nft_rule *rule, *old_rule = NULL; @@ -1490,7 +1503,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; - afi = nf_tables_afinfo_lookup(nfmsg->nfgen_family, create); + afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create); if (IS_ERR(afi)) return PTR_ERR(afi); @@ -1585,12 +1598,13 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); const struct nft_af_info *afi; + struct net *net = sock_net(skb->sk); const struct nft_table *table; struct nft_chain *chain; struct nft_rule *rule, *tmp; int family = nfmsg->nfgen_family; - afi = nf_tables_afinfo_lookup(family, false); + afi = nf_tables_afinfo_lookup(net, family, false); if (IS_ERR(afi)) return PTR_ERR(afi); @@ -1697,11 +1711,12 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { + struct net *net = sock_net(skb->sk); const struct nfgenmsg *nfmsg = nlmsg_data(nlh); const struct nft_af_info *afi; const struct nft_table *table = NULL; - afi = nf_tables_afinfo_lookup(nfmsg->nfgen_family, false); + afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false); if (IS_ERR(afi)) return PTR_ERR(afi); @@ -1818,12 +1833,11 @@ static int nf_tables_set_notify(const struct nft_ctx *ctx, { struct sk_buff *skb; u32 portid = NETLINK_CB(ctx->skb).portid; - struct net *net = sock_net(ctx->skb->sk); bool report; int err; report = nlmsg_report(ctx->nlh); - if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) + if (!report && !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) return 0; err = -ENOBUFS; @@ -1837,11 +1851,11 @@ static int nf_tables_set_notify(const struct nft_ctx *ctx, goto err; } - err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, + err = nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES, report, GFP_KERNEL); err: if (err < 0) - nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err); + nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, err); return err; } @@ -1974,6 +1988,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, const struct nfgenmsg *nfmsg = nlmsg_data(nlh); const struct nft_set_ops *ops; const struct nft_af_info *afi; + struct net *net = sock_net(skb->sk); struct nft_table *table; struct nft_set *set; struct nft_ctx ctx; @@ -2032,7 +2047,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; - afi = nf_tables_afinfo_lookup(nfmsg->nfgen_family, create); + afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create); if (IS_ERR(afi)) return PTR_ERR(afi); @@ -2219,8 +2234,9 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, const struct nfgenmsg *nfmsg = nlmsg_data(nlh); const struct nft_af_info *afi; const struct nft_table *table; + struct net *net = sock_net(skb->sk); - afi = nf_tables_afinfo_lookup(nfmsg->nfgen_family, false); + afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false); if (IS_ERR(afi)) return PTR_ERR(afi); @@ -3011,6 +3027,16 @@ int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data, } EXPORT_SYMBOL_GPL(nft_data_dump); +static int nf_tables_init_net(struct net *net) +{ + INIT_LIST_HEAD(&net->nft.af_info); + return 0; +} + +static struct pernet_operations nf_tables_net_ops = { + .init = nf_tables_init_net, +}; + static int __init nf_tables_module_init(void) { int err; @@ -3031,7 +3057,7 @@ static int __init nf_tables_module_init(void) goto err3; pr_info("nf_tables: (c) 2007-2009 Patrick McHardy \n"); - return 0; + return register_pernet_subsys(&nf_tables_net_ops); err3: nf_tables_core_module_exit(); err2: @@ -3042,6 +3068,7 @@ err1: static void __exit nf_tables_module_exit(void) { + unregister_pernet_subsys(&nf_tables_net_ops); nfnetlink_subsys_unregister(&nf_tables_subsys); nf_tables_core_module_exit(); kfree(info); From 5e94846686d027a4c8ecc5d9d52b18036d3e8f7a Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Thu, 10 Oct 2013 13:41:44 +0200 Subject: [PATCH 14/17] netfilter: nf_tables: add insert operation This patch adds a new rule attribute NFTA_RULE_POSITION which is used to store the position of a rule relatively to the others. By providing the create command and specifying the position, the rule is inserted after the rule with the handle equal to the provided position. Regarding notification, the position attribute specifies the handle of the previous rule to make sure we don't point to any stale rule in notifications coming from the commit path. This patch includes the following fix from Pablo: * nf_tables: fix rule deletion event reporting Signed-off-by: Eric Leblond Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ net/netfilter/nf_tables_api.c | 38 ++++++++++++++++++++---- 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 7d4a1992f89c..fbfd229a8e99 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -153,6 +153,7 @@ enum nft_chain_attributes { * @NFTA_RULE_HANDLE: numeric handle of the rule (NLA_U64) * @NFTA_RULE_EXPRESSIONS: list of expressions (NLA_NESTED: nft_expr_attributes) * @NFTA_RULE_COMPAT: compatibility specifications of the rule (NLA_NESTED: nft_rule_compat_attributes) + * @NFTA_RULE_POSITION: numeric handle of the previous rule (NLA_U64) */ enum nft_rule_attributes { NFTA_RULE_UNSPEC, @@ -161,6 +162,7 @@ enum nft_rule_attributes { NFTA_RULE_HANDLE, NFTA_RULE_EXPRESSIONS, NFTA_RULE_COMPAT, + NFTA_RULE_POSITION, __NFTA_RULE_MAX }; #define NFTA_RULE_MAX (__NFTA_RULE_MAX - 1) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index e1ee85047ec1..0f140663ec71 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1273,6 +1273,7 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = { [NFTA_RULE_HANDLE] = { .type = NLA_U64 }, [NFTA_RULE_EXPRESSIONS] = { .type = NLA_NESTED }, [NFTA_RULE_COMPAT] = { .type = NLA_NESTED }, + [NFTA_RULE_POSITION] = { .type = NLA_U64 }, }; static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq, @@ -1285,9 +1286,10 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq, struct nfgenmsg *nfmsg; const struct nft_expr *expr, *next; struct nlattr *list; + const struct nft_rule *prule; + int type = event | NFNL_SUBSYS_NFTABLES << 8; - event |= NFNL_SUBSYS_NFTABLES << 8; - nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), + nlh = nlmsg_put(skb, portid, seq, type, sizeof(struct nfgenmsg), flags); if (nlh == NULL) goto nla_put_failure; @@ -1304,6 +1306,13 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq, if (nla_put_be64(skb, NFTA_RULE_HANDLE, cpu_to_be64(rule->handle))) goto nla_put_failure; + if ((event != NFT_MSG_DELRULE) && (rule->list.prev != &chain->rules)) { + prule = list_entry(rule->list.prev, struct nft_rule, list); + if (nla_put_be64(skb, NFTA_RULE_POSITION, + cpu_to_be64(prule->handle))) + goto nla_put_failure; + } + list = nla_nest_start(skb, NFTA_RULE_EXPRESSIONS); if (list == NULL) goto nla_put_failure; @@ -1499,7 +1508,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, unsigned int size, i, n; int err, rem; bool create; - u64 handle; + u64 handle, pos_handle; create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; @@ -1533,6 +1542,16 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, handle = nf_tables_alloc_handle(table); } + if (nla[NFTA_RULE_POSITION]) { + if (!(nlh->nlmsg_flags & NLM_F_CREATE)) + return -EOPNOTSUPP; + + pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION])); + old_rule = __nf_tables_rule_lookup(chain, pos_handle); + if (IS_ERR(old_rule)) + return PTR_ERR(old_rule); + } + nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla); n = 0; @@ -1573,9 +1592,16 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, list_replace_rcu(&old_rule->list, &rule->list); nf_tables_rule_destroy(old_rule); } else if (nlh->nlmsg_flags & NLM_F_APPEND) - list_add_tail_rcu(&rule->list, &chain->rules); - else - list_add_rcu(&rule->list, &chain->rules); + if (old_rule) + list_add_rcu(&rule->list, &old_rule->list); + else + list_add_tail_rcu(&rule->list, &chain->rules); + else { + if (old_rule) + list_add_tail_rcu(&rule->list, &old_rule->list); + else + list_add_rcu(&rule->list, &chain->rules); + } nf_tables_rule_notify(skb, nlh, table, chain, rule, NFT_MSG_NEWRULE, nlh->nlmsg_flags & (NLM_F_APPEND | NLM_F_REPLACE), From 0628b123c96d126e617beb3b4fd63b874d0e4f17 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 14 Oct 2013 11:05:33 +0200 Subject: [PATCH 15/17] netfilter: nfnetlink: add batch support and use it from nf_tables This patch adds a batch support to nfnetlink. Basically, it adds two new control messages: * NFNL_MSG_BATCH_BEGIN, that indicates the beginning of a batch, the nfgenmsg->res_id indicates the nfnetlink subsystem ID. * NFNL_MSG_BATCH_END, that results in the invocation of the ss->commit callback function. If not specified or an error ocurred in the batch, the ss->abort function is invoked instead. The end message represents the commit operation in nftables, the lack of end message results in an abort. This patch also adds the .call_batch function that is only called from the batch receival path. This patch adds atomic rule updates and dumps based on bitmask generations. This allows to atomically commit a set of rule-set updates incrementally without altering the internal state of existing nf_tables expressions/matches/targets. The idea consists of using a generation cursor of 1 bit and a bitmask of 2 bits per rule. Assuming the gencursor is 0, then the genmask (expressed as a bitmask) can be interpreted as: 00 active in the present, will be active in the next generation. 01 inactive in the present, will be active in the next generation. 10 active in the present, will be deleted in the next generation. ^ gencursor Once you invoke the transition to the next generation, the global gencursor is updated: 00 active in the present, will be active in the next generation. 01 active in the present, needs to zero its future, it becomes 00. 10 inactive in the present, delete now. ^ gencursor If a dump is in progress and nf_tables enters a new generation, the dump will stop and return -EBUSY to let userspace know that it has to retry again. In order to invalidate dumps, a global genctr counter is increased everytime nf_tables enters a new generation. This new operation can be used from the user-space utility that controls the firewall, eg. nft -f restore The rule updates contained in `file' will be applied atomically. cat file ----- add filter INPUT ip saddr 1.1.1.1 counter accept #1 del filter INPUT ip daddr 2.2.2.2 counter drop #2 -EOF- Note that the rule 1 will be inactive until the transition to the next generation, the rule 2 will be evicted in the next generation. There is a penalty during the rule update due to the branch misprediction in the packet matching framework. But that should be quickly resolved once the iteration over the commit list that contain rules that require updates is finished. Event notification happens once the rule-set update has been committed. So we skip notifications is case the rule-set update is aborted, which can happen in case that the rule-set is tested to apply correctly. This patch squashed the following patches from Pablo: * nf_tables: atomic rule updates and dumps * nf_tables: get rid of per rule list_head for commits * nf_tables: use per netns commit list * nfnetlink: add batch support and use it from nf_tables * nf_tables: all rule updates are transactional * nf_tables: attach replacement rule after stale one * nf_tables: do not allow deletion/replacement of stale rules * nf_tables: remove unused NFTA_RULE_FLAGS Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nfnetlink.h | 5 + include/net/netfilter/nf_tables.h | 25 ++- include/net/netns/nftables.h | 3 + include/uapi/linux/netfilter/nfnetlink.h | 4 + net/netfilter/nf_tables_api.c | 204 ++++++++++++++++++++--- net/netfilter/nf_tables_core.c | 10 ++ net/netfilter/nfnetlink.c | 175 ++++++++++++++++++- 7 files changed, 402 insertions(+), 24 deletions(-) diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 4f68cd7141d2..28c74367e900 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -14,6 +14,9 @@ struct nfnl_callback { int (*call_rcu)(struct sock *nl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const cda[]); + int (*call_batch)(struct sock *nl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]); const struct nla_policy *policy; /* netlink attribute policy */ const u_int16_t attr_count; /* number of nlattr's */ }; @@ -23,6 +26,8 @@ struct nfnetlink_subsystem { __u8 subsys_id; /* nfnetlink subsystem ID */ __u8 cb_count; /* number of callbacks */ const struct nfnl_callback *cb; /* callback for individual types */ + int (*commit)(struct sk_buff *skb); + int (*abort)(struct sk_buff *skb); }; int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n); diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index d3272e943aac..975ad3c573c7 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -323,18 +323,39 @@ static inline void *nft_expr_priv(const struct nft_expr *expr) * @list: used internally * @rcu_head: used internally for rcu * @handle: rule handle + * @genmask: generation mask * @dlen: length of expression data * @data: expression data */ struct nft_rule { struct list_head list; struct rcu_head rcu_head; - u64 handle:48, + u64 handle:46, + genmask:2, dlen:16; unsigned char data[] __attribute__((aligned(__alignof__(struct nft_expr)))); }; +/** + * struct nft_rule_trans - nf_tables rule update in transaction + * + * @list: used internally + * @rule: rule that needs to be updated + * @chain: chain that this rule belongs to + * @table: table for which this chain applies + * @nlh: netlink header of the message that contain this update + * @family: family expressesed as AF_* + */ +struct nft_rule_trans { + struct list_head list; + struct nft_rule *rule; + const struct nft_chain *chain; + const struct nft_table *table; + const struct nlmsghdr *nlh; + u8 family; +}; + static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule) { return (struct nft_expr *)&rule->data[0]; @@ -370,6 +391,7 @@ enum nft_chain_flags { * @rules: list of rules in the chain * @list: used internally * @rcu_head: used internally + * @net: net namespace that this chain belongs to * @handle: chain handle * @flags: bitmask of enum nft_chain_flags * @use: number of jump references to this chain @@ -380,6 +402,7 @@ struct nft_chain { struct list_head rules; struct list_head list; struct rcu_head rcu_head; + struct net *net; u64 handle; u8 flags; u16 use; diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h index a98b1c5d9913..08a4248a12b5 100644 --- a/include/net/netns/nftables.h +++ b/include/net/netns/nftables.h @@ -7,9 +7,12 @@ struct nft_af_info; struct netns_nftables { struct list_head af_info; + struct list_head commit_list; struct nft_af_info *ipv4; struct nft_af_info *ipv6; struct nft_af_info *bridge; + u8 gencursor; + u8 genctr; }; #endif diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h index 288959404d54..596ddd45253c 100644 --- a/include/uapi/linux/netfilter/nfnetlink.h +++ b/include/uapi/linux/netfilter/nfnetlink.h @@ -57,4 +57,8 @@ struct nfgenmsg { #define NFNL_SUBSYS_NFT_COMPAT 11 #define NFNL_SUBSYS_COUNT 12 +/* Reserved control nfnetlink messages */ +#define NFNL_MSG_BATCH_BEGIN NLMSG_MIN_TYPE +#define NFNL_MSG_BATCH_END NLMSG_MIN_TYPE+1 + #endif /* _UAPI_NFNETLINK_H */ diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 0f140663ec71..79e1418a6043 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -978,6 +978,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, INIT_LIST_HEAD(&chain->rules); chain->handle = nf_tables_alloc_handle(table); + chain->net = net; nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN); if (!(table->flags & NFT_TABLE_F_DORMANT) && @@ -1371,6 +1372,41 @@ err: return err; } +static inline bool +nft_rule_is_active(struct net *net, const struct nft_rule *rule) +{ + return (rule->genmask & (1 << net->nft.gencursor)) == 0; +} + +static inline int gencursor_next(struct net *net) +{ + return net->nft.gencursor+1 == 1 ? 1 : 0; +} + +static inline int +nft_rule_is_active_next(struct net *net, const struct nft_rule *rule) +{ + return (rule->genmask & (1 << gencursor_next(net))) == 0; +} + +static inline void +nft_rule_activate_next(struct net *net, struct nft_rule *rule) +{ + /* Now inactive, will be active in the future */ + rule->genmask = (1 << net->nft.gencursor); +} + +static inline void +nft_rule_disactivate_next(struct net *net, struct nft_rule *rule) +{ + rule->genmask = (1 << gencursor_next(net)); +} + +static inline void nft_rule_clear(struct net *net, struct nft_rule *rule) +{ + rule->genmask = 0; +} + static int nf_tables_dump_rules(struct sk_buff *skb, struct netlink_callback *cb) { @@ -1382,6 +1418,8 @@ static int nf_tables_dump_rules(struct sk_buff *skb, unsigned int idx = 0, s_idx = cb->args[0]; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; + u8 genctr = ACCESS_ONCE(net->nft.genctr); + u8 gencursor = ACCESS_ONCE(net->nft.gencursor); list_for_each_entry(afi, &net->nft.af_info, list) { if (family != NFPROTO_UNSPEC && family != afi->family) @@ -1390,6 +1428,8 @@ static int nf_tables_dump_rules(struct sk_buff *skb, list_for_each_entry(table, &afi->tables, list) { list_for_each_entry(chain, &table->chains, list) { list_for_each_entry(rule, &chain->rules, list) { + if (!nft_rule_is_active(net, rule)) + goto cont; if (idx < s_idx) goto cont; if (idx > s_idx) @@ -1408,6 +1448,10 @@ cont: } } done: + /* Invalidate this dump, a transition to the new generation happened */ + if (gencursor != net->nft.gencursor || genctr != net->nft.genctr) + return -EBUSY; + cb->args[0] = idx; return skb->len; } @@ -1492,6 +1536,25 @@ static void nf_tables_rule_destroy(struct nft_rule *rule) static struct nft_expr_info *info; +static struct nft_rule_trans * +nf_tables_trans_add(struct nft_rule *rule, const struct nft_ctx *ctx) +{ + struct nft_rule_trans *rupd; + + rupd = kmalloc(sizeof(struct nft_rule_trans), GFP_KERNEL); + if (rupd == NULL) + return NULL; + + rupd->chain = ctx->chain; + rupd->table = ctx->table; + rupd->rule = rule; + rupd->family = ctx->afi->family; + rupd->nlh = ctx->nlh; + list_add_tail(&rupd->list, &ctx->net->nft.commit_list); + + return rupd; +} + static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -1502,6 +1565,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, struct nft_table *table; struct nft_chain *chain; struct nft_rule *rule, *old_rule = NULL; + struct nft_rule_trans *repl = NULL; struct nft_expr *expr; struct nft_ctx ctx; struct nlattr *tmp; @@ -1576,6 +1640,8 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, if (rule == NULL) goto err1; + nft_rule_activate_next(net, rule); + rule->handle = handle; rule->dlen = size; @@ -1589,8 +1655,18 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, } if (nlh->nlmsg_flags & NLM_F_REPLACE) { - list_replace_rcu(&old_rule->list, &rule->list); - nf_tables_rule_destroy(old_rule); + if (nft_rule_is_active_next(net, old_rule)) { + repl = nf_tables_trans_add(old_rule, &ctx); + if (repl == NULL) { + err = -ENOMEM; + goto err2; + } + nft_rule_disactivate_next(net, old_rule); + list_add_tail(&rule->list, &old_rule->list); + } else { + err = -ENOENT; + goto err2; + } } else if (nlh->nlmsg_flags & NLM_F_APPEND) if (old_rule) list_add_rcu(&rule->list, &old_rule->list); @@ -1603,11 +1679,20 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, list_add_rcu(&rule->list, &chain->rules); } - nf_tables_rule_notify(skb, nlh, table, chain, rule, NFT_MSG_NEWRULE, - nlh->nlmsg_flags & (NLM_F_APPEND | NLM_F_REPLACE), - nfmsg->nfgen_family); + if (nf_tables_trans_add(rule, &ctx) == NULL) { + err = -ENOMEM; + goto err3; + } return 0; +err3: + list_del_rcu(&rule->list); + if (repl) { + list_del_rcu(&repl->rule->list); + list_del(&repl->list); + nft_rule_clear(net, repl->rule); + kfree(repl); + } err2: nf_tables_rule_destroy(rule); err1: @@ -1618,6 +1703,19 @@ err1: return err; } +static int +nf_tables_delrule_one(struct nft_ctx *ctx, struct nft_rule *rule) +{ + /* You cannot delete the same rule twice */ + if (nft_rule_is_active_next(ctx->net, rule)) { + if (nf_tables_trans_add(rule, ctx) == NULL) + return -ENOMEM; + nft_rule_disactivate_next(ctx->net, rule); + return 0; + } + return -ENOENT; +} + static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -1628,7 +1726,8 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, const struct nft_table *table; struct nft_chain *chain; struct nft_rule *rule, *tmp; - int family = nfmsg->nfgen_family; + int family = nfmsg->nfgen_family, err = 0; + struct nft_ctx ctx; afi = nf_tables_afinfo_lookup(net, family, false); if (IS_ERR(afi)) @@ -1642,28 +1741,92 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(chain)) return PTR_ERR(chain); + nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla); + if (nla[NFTA_RULE_HANDLE]) { rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]); if (IS_ERR(rule)) return PTR_ERR(rule); - /* List removal must be visible before destroying expressions */ - list_del_rcu(&rule->list); - - nf_tables_rule_notify(skb, nlh, table, chain, rule, - NFT_MSG_DELRULE, 0, family); - nf_tables_rule_destroy(rule); + err = nf_tables_delrule_one(&ctx, rule); } else { /* Remove all rules in this chain */ list_for_each_entry_safe(rule, tmp, &chain->rules, list) { - list_del_rcu(&rule->list); - - nf_tables_rule_notify(skb, nlh, table, chain, rule, - NFT_MSG_DELRULE, 0, family); - nf_tables_rule_destroy(rule); + err = nf_tables_delrule_one(&ctx, rule); + if (err < 0) + break; } } + return err; +} + +static int nf_tables_commit(struct sk_buff *skb) +{ + struct net *net = sock_net(skb->sk); + struct nft_rule_trans *rupd, *tmp; + + /* Bump generation counter, invalidate any dump in progress */ + net->nft.genctr++; + + /* A new generation has just started */ + net->nft.gencursor = gencursor_next(net); + + /* Make sure all packets have left the previous generation before + * purging old rules. + */ + synchronize_rcu(); + + list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) { + /* Delete this rule from the dirty list */ + list_del(&rupd->list); + + /* This rule was inactive in the past and just became active. + * Clear the next bit of the genmask since its meaning has + * changed, now it is the future. + */ + if (nft_rule_is_active(net, rupd->rule)) { + nft_rule_clear(net, rupd->rule); + nf_tables_rule_notify(skb, rupd->nlh, rupd->table, + rupd->chain, rupd->rule, + NFT_MSG_NEWRULE, 0, + rupd->family); + kfree(rupd); + continue; + } + + /* This rule is in the past, get rid of it */ + list_del_rcu(&rupd->rule->list); + nf_tables_rule_notify(skb, rupd->nlh, rupd->table, rupd->chain, + rupd->rule, NFT_MSG_DELRULE, 0, + rupd->family); + nf_tables_rule_destroy(rupd->rule); + kfree(rupd); + } + + return 0; +} + +static int nf_tables_abort(struct sk_buff *skb) +{ + struct net *net = sock_net(skb->sk); + struct nft_rule_trans *rupd, *tmp; + + list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) { + /* Delete all rules from the dirty list */ + list_del(&rupd->list); + + if (!nft_rule_is_active_next(net, rupd->rule)) { + nft_rule_clear(net, rupd->rule); + kfree(rupd); + continue; + } + + /* This rule is inactive, get rid of it */ + list_del_rcu(&rupd->rule->list); + nf_tables_rule_destroy(rupd->rule); + kfree(rupd); + } return 0; } @@ -2634,7 +2797,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_chain_policy, }, [NFT_MSG_NEWRULE] = { - .call = nf_tables_newrule, + .call_batch = nf_tables_newrule, .attr_count = NFTA_RULE_MAX, .policy = nft_rule_policy, }, @@ -2644,7 +2807,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_rule_policy, }, [NFT_MSG_DELRULE] = { - .call = nf_tables_delrule, + .call_batch = nf_tables_delrule, .attr_count = NFTA_RULE_MAX, .policy = nft_rule_policy, }, @@ -2685,6 +2848,8 @@ static const struct nfnetlink_subsystem nf_tables_subsys = { .subsys_id = NFNL_SUBSYS_NFTABLES, .cb_count = NFT_MSG_MAX, .cb = nf_tables_cb, + .commit = nf_tables_commit, + .abort = nf_tables_abort, }; /* @@ -3056,6 +3221,7 @@ EXPORT_SYMBOL_GPL(nft_data_dump); static int nf_tables_init_net(struct net *net) { INIT_LIST_HEAD(&net->nft.af_info); + INIT_LIST_HEAD(&net->nft.commit_list); return 0; } diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 3c13007d80df..d581ef660248 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -88,12 +88,22 @@ nft_do_chain_pktinfo(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) struct nft_data data[NFT_REG_MAX + 1]; unsigned int stackptr = 0; struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE]; + /* + * Cache cursor to avoid problems in case that the cursor is updated + * while traversing the ruleset. + */ + unsigned int gencursor = ACCESS_ONCE(chain->net->nft.gencursor); do_chain: rule = list_entry(&chain->rules, struct nft_rule, list); next_rule: data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; list_for_each_entry_continue_rcu(rule, &chain->rules, list) { + + /* This rule is not active, skip. */ + if (unlikely(rule->genmask & (1 << gencursor))) + continue; + nft_rule_for_each_expr(expr, last, rule) { if (expr->ops == &nft_cmp_fast_ops) nft_cmp_fast_eval(expr, data); diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 572d87dc116f..027f16af51a0 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -147,9 +147,6 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) const struct nfnetlink_subsystem *ss; int type, err; - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - return -EPERM; - /* All the messages must at least contain nfgenmsg */ if (nlmsg_len(nlh) < sizeof(struct nfgenmsg)) return 0; @@ -217,9 +214,179 @@ replay: } } +static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, + u_int16_t subsys_id) +{ + struct sk_buff *nskb, *oskb = skb; + struct net *net = sock_net(skb->sk); + const struct nfnetlink_subsystem *ss; + const struct nfnl_callback *nc; + bool success = true, done = false; + int err; + + if (subsys_id >= NFNL_SUBSYS_COUNT) + return netlink_ack(skb, nlh, -EINVAL); +replay: + nskb = netlink_skb_clone(oskb, GFP_KERNEL); + if (!nskb) + return netlink_ack(oskb, nlh, -ENOMEM); + + nskb->sk = oskb->sk; + skb = nskb; + + nfnl_lock(subsys_id); + ss = rcu_dereference_protected(table[subsys_id].subsys, + lockdep_is_held(&table[subsys_id].mutex)); + if (!ss) { +#ifdef CONFIG_MODULES + nfnl_unlock(subsys_id); + request_module("nfnetlink-subsys-%d", subsys_id); + nfnl_lock(subsys_id); + ss = rcu_dereference_protected(table[subsys_id].subsys, + lockdep_is_held(&table[subsys_id].mutex)); + if (!ss) +#endif + { + nfnl_unlock(subsys_id); + kfree_skb(nskb); + return netlink_ack(skb, nlh, -EOPNOTSUPP); + } + } + + if (!ss->commit || !ss->abort) { + nfnl_unlock(subsys_id); + kfree_skb(nskb); + return netlink_ack(skb, nlh, -EOPNOTSUPP); + } + + while (skb->len >= nlmsg_total_size(0)) { + int msglen, type; + + nlh = nlmsg_hdr(skb); + err = 0; + + if (nlh->nlmsg_len < NLMSG_HDRLEN) { + err = -EINVAL; + goto ack; + } + + /* Only requests are handled by the kernel */ + if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) { + err = -EINVAL; + goto ack; + } + + type = nlh->nlmsg_type; + if (type == NFNL_MSG_BATCH_BEGIN) { + /* Malformed: Batch begin twice */ + success = false; + goto done; + } else if (type == NFNL_MSG_BATCH_END) { + done = true; + goto done; + } else if (type < NLMSG_MIN_TYPE) { + err = -EINVAL; + goto ack; + } + + /* We only accept a batch with messages for the same + * subsystem. + */ + if (NFNL_SUBSYS_ID(type) != subsys_id) { + err = -EINVAL; + goto ack; + } + + nc = nfnetlink_find_client(type, ss); + if (!nc) { + err = -EINVAL; + goto ack; + } + + { + int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); + u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type); + struct nlattr *cda[ss->cb[cb_id].attr_count + 1]; + struct nlattr *attr = (void *)nlh + min_len; + int attrlen = nlh->nlmsg_len - min_len; + + err = nla_parse(cda, ss->cb[cb_id].attr_count, + attr, attrlen, ss->cb[cb_id].policy); + if (err < 0) + goto ack; + + if (nc->call_batch) { + err = nc->call_batch(net->nfnl, skb, nlh, + (const struct nlattr **)cda); + } + + /* The lock was released to autoload some module, we + * have to abort and start from scratch using the + * original skb. + */ + if (err == -EAGAIN) { + ss->abort(skb); + nfnl_unlock(subsys_id); + kfree_skb(nskb); + goto replay; + } + } +ack: + if (nlh->nlmsg_flags & NLM_F_ACK || err) { + /* We don't stop processing the batch on errors, thus, + * userspace gets all the errors that the batch + * triggers. + */ + netlink_ack(skb, nlh, err); + if (err) + success = false; + } + + msglen = NLMSG_ALIGN(nlh->nlmsg_len); + if (msglen > skb->len) + msglen = skb->len; + skb_pull(skb, msglen); + } +done: + if (success && done) + ss->commit(skb); + else + ss->abort(skb); + + nfnl_unlock(subsys_id); + kfree_skb(nskb); +} + static void nfnetlink_rcv(struct sk_buff *skb) { - netlink_rcv_skb(skb, &nfnetlink_rcv_msg); + struct nlmsghdr *nlh = nlmsg_hdr(skb); + struct net *net = sock_net(skb->sk); + int msglen; + + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + return netlink_ack(skb, nlh, -EPERM); + + if (nlh->nlmsg_len < NLMSG_HDRLEN || + skb->len < nlh->nlmsg_len) + return; + + if (nlh->nlmsg_type == NFNL_MSG_BATCH_BEGIN) { + struct nfgenmsg *nfgenmsg; + + msglen = NLMSG_ALIGN(nlh->nlmsg_len); + if (msglen > skb->len) + msglen = skb->len; + + if (nlh->nlmsg_len < NLMSG_HDRLEN || + skb->len < NLMSG_HDRLEN + sizeof(struct nfgenmsg)) + return; + + nfgenmsg = nlmsg_data(nlh); + skb_pull(skb, msglen); + nfnetlink_rcv_batch(skb, nlh, nfgenmsg->res_id); + } else { + netlink_rcv_skb(skb, &nfnetlink_rcv_msg); + } } #ifdef CONFIG_MODULES From b5bc89bfa0b46de37754610f46c0ef4e2280edb4 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 10 Oct 2013 16:49:19 +0200 Subject: [PATCH 16/17] netfilter: nf_tables: add trace support This patch adds support for tracing the packet travel through the ruleset, in a similar fashion to x_tables. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 ++ net/netfilter/nf_tables_api.c | 1 + net/netfilter/nf_tables_core.c | 57 +++++++++++++++++++++++++++++++ 3 files changed, 60 insertions(+) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 975ad3c573c7..54c4a5cafb64 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -392,6 +392,7 @@ enum nft_chain_flags { * @list: used internally * @rcu_head: used internally * @net: net namespace that this chain belongs to + * @table: table that this chain belongs to * @handle: chain handle * @flags: bitmask of enum nft_chain_flags * @use: number of jump references to this chain @@ -403,6 +404,7 @@ struct nft_chain { struct list_head list; struct rcu_head rcu_head; struct net *net; + struct nft_table *table; u64 handle; u8 flags; u16 use; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 79e1418a6043..dcddc49c0e08 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -979,6 +979,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, INIT_LIST_HEAD(&chain->rules); chain->handle = nf_tables_alloc_handle(table); chain->net = net; + chain->table = table; nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN); if (!(table->flags & NFT_TABLE_F_DORMANT) && diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index d581ef660248..cb9e685caae1 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -19,6 +19,7 @@ #include #include #include +#include static void nft_cmp_fast_eval(const struct nft_expr *expr, struct nft_data data[NFT_REG_MAX + 1]) @@ -63,6 +64,7 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr, struct nft_jumpstack { const struct nft_chain *chain; const struct nft_rule *rule; + int rulenum; }; static inline void @@ -79,6 +81,40 @@ nft_chain_stats(const struct nft_chain *this, const struct nft_pktinfo *pkt, rcu_read_unlock_bh(); } +enum nft_trace { + NFT_TRACE_RULE, + NFT_TRACE_RETURN, + NFT_TRACE_POLICY, +}; + +static const char *const comments[] = { + [NFT_TRACE_RULE] = "rule", + [NFT_TRACE_RETURN] = "return", + [NFT_TRACE_POLICY] = "policy", +}; + +static struct nf_loginfo trace_loginfo = { + .type = NF_LOG_TYPE_LOG, + .u = { + .log = { + .level = 4, + .logflags = NF_LOG_MASK, + }, + }, +}; + +static inline void nft_trace_packet(const struct nft_pktinfo *pkt, + const struct nft_chain *chain, + int rulenum, enum nft_trace type) +{ + struct net *net = dev_net(pkt->in ? pkt->in : pkt->out); + + nf_log_packet(net, pkt->xt.family, pkt->hooknum, pkt->skb, pkt->in, + pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", + chain->table->name, chain->name, comments[type], + rulenum); +} + unsigned int nft_do_chain_pktinfo(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) { @@ -88,6 +124,7 @@ nft_do_chain_pktinfo(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) struct nft_data data[NFT_REG_MAX + 1]; unsigned int stackptr = 0; struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE]; + int rulenum = 0; /* * Cache cursor to avoid problems in case that the cursor is updated * while traversing the ruleset. @@ -104,6 +141,8 @@ next_rule: if (unlikely(rule->genmask & (1 << gencursor))) continue; + rulenum++; + nft_rule_for_each_expr(expr, last, rule) { if (expr->ops == &nft_cmp_fast_ops) nft_cmp_fast_eval(expr, data); @@ -129,17 +168,28 @@ next_rule: case NF_ACCEPT: case NF_DROP: case NF_QUEUE: + if (unlikely(pkt->skb->nf_trace)) + nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); + return data[NFT_REG_VERDICT].verdict; case NFT_JUMP: + if (unlikely(pkt->skb->nf_trace)) + nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); + BUG_ON(stackptr >= NFT_JUMP_STACK_SIZE); jumpstack[stackptr].chain = chain; jumpstack[stackptr].rule = rule; + jumpstack[stackptr].rulenum = rulenum; stackptr++; /* fall through */ case NFT_GOTO: chain = data[NFT_REG_VERDICT].chain; goto do_chain; case NFT_RETURN: + if (unlikely(pkt->skb->nf_trace)) + nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RETURN); + + /* fall through */ case NFT_CONTINUE: break; default: @@ -147,13 +197,20 @@ next_rule: } if (stackptr > 0) { + if (unlikely(pkt->skb->nf_trace)) + nft_trace_packet(pkt, chain, ++rulenum, NFT_TRACE_RETURN); + stackptr--; chain = jumpstack[stackptr].chain; rule = jumpstack[stackptr].rule; + rulenum = jumpstack[stackptr].rulenum; goto next_rule; } nft_chain_stats(chain, pkt, jumpstack, stackptr); + if (unlikely(pkt->skb->nf_trace)) + nft_trace_packet(pkt, chain, ++rulenum, NFT_TRACE_POLICY); + return nft_base_chain(chain)->policy; } EXPORT_SYMBOL_GPL(nft_do_chain_pktinfo); From ed683f138b3dbc8a5e878e24a0bfa0bb61043a09 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 7 Oct 2013 22:53:08 +0200 Subject: [PATCH 17/17] netfilter: nf_tables: add ARP filtering support This patch registers the ARP family and he filter chain type for this family. Signed-off-by: Pablo Neira Ayuso --- include/net/netns/nftables.h | 1 + net/ipv4/netfilter/Kconfig | 4 ++ net/ipv4/netfilter/Makefile | 1 + net/ipv4/netfilter/nf_tables_arp.c | 102 +++++++++++++++++++++++++++++ 4 files changed, 108 insertions(+) create mode 100644 net/ipv4/netfilter/nf_tables_arp.c diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h index 08a4248a12b5..15d056d534e3 100644 --- a/include/net/netns/nftables.h +++ b/include/net/netns/nftables.h @@ -10,6 +10,7 @@ struct netns_nftables { struct list_head commit_list; struct nft_af_info *ipv4; struct nft_af_info *ipv6; + struct nft_af_info *arp; struct nft_af_info *bridge; u8 gencursor; u8 genctr; diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 1f37ef67f1ac..40d56073cd19 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -53,6 +53,10 @@ config NFT_CHAIN_NAT_IPV4 depends on NF_NAT_IPV4 && NFT_NAT tristate "IPv4 nf_tables nat chain support" +config NF_TABLES_ARP + depends on NF_TABLES + tristate "ARP nf_tables support" + config IP_NF_IPTABLES tristate "IP tables support (required for filtering/masq/NAT)" default m if NETFILTER_ADVANCED=n diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 91e0bd71a6d3..19df72b7ba88 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -31,6 +31,7 @@ obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o +obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o # generic IP tables obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c new file mode 100644 index 000000000000..3e67ef1c676f --- /dev/null +++ b/net/ipv4/netfilter/nf_tables_arp.c @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2008-2010 Patrick McHardy + * Copyright (c) 2013 Pablo Neira Ayuso + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include + +static struct nft_af_info nft_af_arp __read_mostly = { + .family = NFPROTO_ARP, + .nhooks = NF_ARP_NUMHOOKS, + .owner = THIS_MODULE, +}; + +static int nf_tables_arp_init_net(struct net *net) +{ + net->nft.arp = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL); + if (net->nft.arp== NULL) + return -ENOMEM; + + memcpy(net->nft.arp, &nft_af_arp, sizeof(nft_af_arp)); + + if (nft_register_afinfo(net, net->nft.arp) < 0) + goto err; + + return 0; +err: + kfree(net->nft.arp); + return -ENOMEM; +} + +static void nf_tables_arp_exit_net(struct net *net) +{ + nft_unregister_afinfo(net->nft.arp); + kfree(net->nft.arp); +} + +static struct pernet_operations nf_tables_arp_net_ops = { + .init = nf_tables_arp_init_net, + .exit = nf_tables_arp_exit_net, +}; + +static unsigned int +nft_do_chain_arp(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct nft_pktinfo pkt; + + nft_set_pktinfo(&pkt, ops, skb, in, out); + + return nft_do_chain_pktinfo(&pkt, ops); +} + +static struct nf_chain_type filter_arp = { + .family = NFPROTO_ARP, + .name = "filter", + .type = NFT_CHAIN_T_DEFAULT, + .hook_mask = (1 << NF_ARP_IN) | + (1 << NF_ARP_OUT) | + (1 << NF_ARP_FORWARD), + .fn = { + [NF_ARP_IN] = nft_do_chain_arp, + [NF_ARP_OUT] = nft_do_chain_arp, + [NF_ARP_FORWARD] = nft_do_chain_arp, + }, +}; + +static int __init nf_tables_arp_init(void) +{ + int ret; + + nft_register_chain_type(&filter_arp); + ret = register_pernet_subsys(&nf_tables_arp_net_ops); + if (ret < 0) + nft_unregister_chain_type(&filter_arp); + + return ret; +} + +static void __exit nf_tables_arp_exit(void) +{ + unregister_pernet_subsys(&nf_tables_arp_net_ops); + nft_unregister_chain_type(&filter_arp); +} + +module_init(nf_tables_arp_init); +module_exit(nf_tables_arp_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_FAMILY(3); /* NFPROTO_ARP */