alistair23-linux/net/netfilter/nf_nat_proto.c
Eyal Birger 61177e911d netfilter: nat: fix ICMP header corruption on ICMP errors
Commit 8303b7e8f0 ("netfilter: nat: fix spurious connection timeouts")
made nf_nat_icmp_reply_translation() use icmp_manip_pkt() as the l4
manipulation function for the outer packet on ICMP errors.

However, icmp_manip_pkt() assumes the packet has an 'id' field which
is not correct for all types of ICMP messages.

This is not correct for ICMP error packets, and leads to bogus bytes
being written the ICMP header, which can be wrongfully regarded as
'length' bytes by RFC 4884 compliant receivers.

Fix by assigning the 'id' field only for ICMP messages that have this
semantic.

Reported-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Fixes: 8303b7e8f0 ("netfilter: nat: fix spurious connection timeouts")
Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2020-01-16 15:08:25 +01:00

1051 lines
27 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
*/
#include <linux/types.h>
#include <linux/export.h>
#include <linux/init.h>
#include <linux/udp.h>
#include <linux/tcp.h>
#include <linux/icmp.h>
#include <linux/icmpv6.h>
#include <linux/dccp.h>
#include <linux/sctp.h>
#include <net/sctp/checksum.h>
#include <linux/netfilter.h>
#include <net/netfilter/nf_nat.h>
#include <linux/ipv6.h>
#include <linux/netfilter_ipv6.h>
#include <net/checksum.h>
#include <net/ip6_checksum.h>
#include <net/ip6_route.h>
#include <net/xfrm.h>
#include <net/ipv6.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack.h>
#include <linux/netfilter/nfnetlink_conntrack.h>
static void nf_csum_update(struct sk_buff *skb,
unsigned int iphdroff, __sum16 *check,
const struct nf_conntrack_tuple *t,
enum nf_nat_manip_type maniptype);
static void
__udp_manip_pkt(struct sk_buff *skb,
unsigned int iphdroff, struct udphdr *hdr,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype, bool do_csum)
{
__be16 *portptr, newport;
if (maniptype == NF_NAT_MANIP_SRC) {
/* Get rid of src port */
newport = tuple->src.u.udp.port;
portptr = &hdr->source;
} else {
/* Get rid of dst port */
newport = tuple->dst.u.udp.port;
portptr = &hdr->dest;
}
if (do_csum) {
nf_csum_update(skb, iphdroff, &hdr->check, tuple, maniptype);
inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport,
false);
if (!hdr->check)
hdr->check = CSUM_MANGLED_0;
}
*portptr = newport;
}
static bool udp_manip_pkt(struct sk_buff *skb,
unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
struct udphdr *hdr;
bool do_csum;
if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
return false;
hdr = (struct udphdr *)(skb->data + hdroff);
do_csum = hdr->check || skb->ip_summed == CHECKSUM_PARTIAL;
__udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, do_csum);
return true;
}
static bool udplite_manip_pkt(struct sk_buff *skb,
unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
#ifdef CONFIG_NF_CT_PROTO_UDPLITE
struct udphdr *hdr;
if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
return false;
hdr = (struct udphdr *)(skb->data + hdroff);
__udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, true);
#endif
return true;
}
static bool
sctp_manip_pkt(struct sk_buff *skb,
unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
#ifdef CONFIG_NF_CT_PROTO_SCTP
struct sctphdr *hdr;
int hdrsize = 8;
/* This could be an inner header returned in imcp packet; in such
* cases we cannot update the checksum field since it is outside
* of the 8 bytes of transport layer headers we are guaranteed.
*/
if (skb->len >= hdroff + sizeof(*hdr))
hdrsize = sizeof(*hdr);
if (skb_ensure_writable(skb, hdroff + hdrsize))
return false;
hdr = (struct sctphdr *)(skb->data + hdroff);
if (maniptype == NF_NAT_MANIP_SRC) {
/* Get rid of src port */
hdr->source = tuple->src.u.sctp.port;
} else {
/* Get rid of dst port */
hdr->dest = tuple->dst.u.sctp.port;
}
if (hdrsize < sizeof(*hdr))
return true;
if (skb->ip_summed != CHECKSUM_PARTIAL) {
hdr->checksum = sctp_compute_cksum(skb, hdroff);
skb->ip_summed = CHECKSUM_NONE;
}
#endif
return true;
}
static bool
tcp_manip_pkt(struct sk_buff *skb,
unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
struct tcphdr *hdr;
__be16 *portptr, newport, oldport;
int hdrsize = 8; /* TCP connection tracking guarantees this much */
/* this could be a inner header returned in icmp packet; in such
cases we cannot update the checksum field since it is outside of
the 8 bytes of transport layer headers we are guaranteed */
if (skb->len >= hdroff + sizeof(struct tcphdr))
hdrsize = sizeof(struct tcphdr);
if (skb_ensure_writable(skb, hdroff + hdrsize))
return false;
hdr = (struct tcphdr *)(skb->data + hdroff);
if (maniptype == NF_NAT_MANIP_SRC) {
/* Get rid of src port */
newport = tuple->src.u.tcp.port;
portptr = &hdr->source;
} else {
/* Get rid of dst port */
newport = tuple->dst.u.tcp.port;
portptr = &hdr->dest;
}
oldport = *portptr;
*portptr = newport;
if (hdrsize < sizeof(*hdr))
return true;
nf_csum_update(skb, iphdroff, &hdr->check, tuple, maniptype);
inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, false);
return true;
}
static bool
dccp_manip_pkt(struct sk_buff *skb,
unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
#ifdef CONFIG_NF_CT_PROTO_DCCP
struct dccp_hdr *hdr;
__be16 *portptr, oldport, newport;
int hdrsize = 8; /* DCCP connection tracking guarantees this much */
if (skb->len >= hdroff + sizeof(struct dccp_hdr))
hdrsize = sizeof(struct dccp_hdr);
if (skb_ensure_writable(skb, hdroff + hdrsize))
return false;
hdr = (struct dccp_hdr *)(skb->data + hdroff);
if (maniptype == NF_NAT_MANIP_SRC) {
newport = tuple->src.u.dccp.port;
portptr = &hdr->dccph_sport;
} else {
newport = tuple->dst.u.dccp.port;
portptr = &hdr->dccph_dport;
}
oldport = *portptr;
*portptr = newport;
if (hdrsize < sizeof(*hdr))
return true;
nf_csum_update(skb, iphdroff, &hdr->dccph_checksum, tuple, maniptype);
inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport,
false);
#endif
return true;
}
static bool
icmp_manip_pkt(struct sk_buff *skb,
unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
struct icmphdr *hdr;
if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
return false;
hdr = (struct icmphdr *)(skb->data + hdroff);
switch (hdr->type) {
case ICMP_ECHO:
case ICMP_ECHOREPLY:
case ICMP_TIMESTAMP:
case ICMP_TIMESTAMPREPLY:
case ICMP_INFO_REQUEST:
case ICMP_INFO_REPLY:
case ICMP_ADDRESS:
case ICMP_ADDRESSREPLY:
break;
default:
return true;
}
inet_proto_csum_replace2(&hdr->checksum, skb,
hdr->un.echo.id, tuple->src.u.icmp.id, false);
hdr->un.echo.id = tuple->src.u.icmp.id;
return true;
}
static bool
icmpv6_manip_pkt(struct sk_buff *skb,
unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
struct icmp6hdr *hdr;
if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
return false;
hdr = (struct icmp6hdr *)(skb->data + hdroff);
nf_csum_update(skb, iphdroff, &hdr->icmp6_cksum, tuple, maniptype);
if (hdr->icmp6_type == ICMPV6_ECHO_REQUEST ||
hdr->icmp6_type == ICMPV6_ECHO_REPLY) {
inet_proto_csum_replace2(&hdr->icmp6_cksum, skb,
hdr->icmp6_identifier,
tuple->src.u.icmp.id, false);
hdr->icmp6_identifier = tuple->src.u.icmp.id;
}
return true;
}
/* manipulate a GRE packet according to maniptype */
static bool
gre_manip_pkt(struct sk_buff *skb,
unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
#if IS_ENABLED(CONFIG_NF_CT_PROTO_GRE)
const struct gre_base_hdr *greh;
struct pptp_gre_header *pgreh;
/* pgreh includes two optional 32bit fields which are not required
* to be there. That's where the magic '8' comes from */
if (skb_ensure_writable(skb, hdroff + sizeof(*pgreh) - 8))
return false;
greh = (void *)skb->data + hdroff;
pgreh = (struct pptp_gre_header *)greh;
/* we only have destination manip of a packet, since 'source key'
* is not present in the packet itself */
if (maniptype != NF_NAT_MANIP_DST)
return true;
switch (greh->flags & GRE_VERSION) {
case GRE_VERSION_0:
/* We do not currently NAT any GREv0 packets.
* Try to behave like "nf_nat_proto_unknown" */
break;
case GRE_VERSION_1:
pr_debug("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key));
pgreh->call_id = tuple->dst.u.gre.key;
break;
default:
pr_debug("can't nat unknown GRE version\n");
return false;
}
#endif
return true;
}
static bool l4proto_manip_pkt(struct sk_buff *skb,
unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
switch (tuple->dst.protonum) {
case IPPROTO_TCP:
return tcp_manip_pkt(skb, iphdroff, hdroff,
tuple, maniptype);
case IPPROTO_UDP:
return udp_manip_pkt(skb, iphdroff, hdroff,
tuple, maniptype);
case IPPROTO_UDPLITE:
return udplite_manip_pkt(skb, iphdroff, hdroff,
tuple, maniptype);
case IPPROTO_SCTP:
return sctp_manip_pkt(skb, iphdroff, hdroff,
tuple, maniptype);
case IPPROTO_ICMP:
return icmp_manip_pkt(skb, iphdroff, hdroff,
tuple, maniptype);
case IPPROTO_ICMPV6:
return icmpv6_manip_pkt(skb, iphdroff, hdroff,
tuple, maniptype);
case IPPROTO_DCCP:
return dccp_manip_pkt(skb, iphdroff, hdroff,
tuple, maniptype);
case IPPROTO_GRE:
return gre_manip_pkt(skb, iphdroff, hdroff,
tuple, maniptype);
}
/* If we don't know protocol -- no error, pass it unmodified. */
return true;
}
static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb,
unsigned int iphdroff,
const struct nf_conntrack_tuple *target,
enum nf_nat_manip_type maniptype)
{
struct iphdr *iph;
unsigned int hdroff;
if (skb_ensure_writable(skb, iphdroff + sizeof(*iph)))
return false;
iph = (void *)skb->data + iphdroff;
hdroff = iphdroff + iph->ihl * 4;
if (!l4proto_manip_pkt(skb, iphdroff, hdroff, target, maniptype))
return false;
iph = (void *)skb->data + iphdroff;
if (maniptype == NF_NAT_MANIP_SRC) {
csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
iph->saddr = target->src.u3.ip;
} else {
csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip);
iph->daddr = target->dst.u3.ip;
}
return true;
}
static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb,
unsigned int iphdroff,
const struct nf_conntrack_tuple *target,
enum nf_nat_manip_type maniptype)
{
#if IS_ENABLED(CONFIG_IPV6)
struct ipv6hdr *ipv6h;
__be16 frag_off;
int hdroff;
u8 nexthdr;
if (skb_ensure_writable(skb, iphdroff + sizeof(*ipv6h)))
return false;
ipv6h = (void *)skb->data + iphdroff;
nexthdr = ipv6h->nexthdr;
hdroff = ipv6_skip_exthdr(skb, iphdroff + sizeof(*ipv6h),
&nexthdr, &frag_off);
if (hdroff < 0)
goto manip_addr;
if ((frag_off & htons(~0x7)) == 0 &&
!l4proto_manip_pkt(skb, iphdroff, hdroff, target, maniptype))
return false;
/* must reload, offset might have changed */
ipv6h = (void *)skb->data + iphdroff;
manip_addr:
if (maniptype == NF_NAT_MANIP_SRC)
ipv6h->saddr = target->src.u3.in6;
else
ipv6h->daddr = target->dst.u3.in6;
#endif
return true;
}
unsigned int nf_nat_manip_pkt(struct sk_buff *skb, struct nf_conn *ct,
enum nf_nat_manip_type mtype,
enum ip_conntrack_dir dir)
{
struct nf_conntrack_tuple target;
/* We are aiming to look like inverse of other direction. */
nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
switch (target.src.l3num) {
case NFPROTO_IPV6:
if (nf_nat_ipv6_manip_pkt(skb, 0, &target, mtype))
return NF_ACCEPT;
break;
case NFPROTO_IPV4:
if (nf_nat_ipv4_manip_pkt(skb, 0, &target, mtype))
return NF_ACCEPT;
break;
default:
WARN_ON_ONCE(1);
break;
}
return NF_DROP;
}
static void nf_nat_ipv4_csum_update(struct sk_buff *skb,
unsigned int iphdroff, __sum16 *check,
const struct nf_conntrack_tuple *t,
enum nf_nat_manip_type maniptype)
{
struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
__be32 oldip, newip;
if (maniptype == NF_NAT_MANIP_SRC) {
oldip = iph->saddr;
newip = t->src.u3.ip;
} else {
oldip = iph->daddr;
newip = t->dst.u3.ip;
}
inet_proto_csum_replace4(check, skb, oldip, newip, true);
}
static void nf_nat_ipv6_csum_update(struct sk_buff *skb,
unsigned int iphdroff, __sum16 *check,
const struct nf_conntrack_tuple *t,
enum nf_nat_manip_type maniptype)
{
#if IS_ENABLED(CONFIG_IPV6)
const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + iphdroff);
const struct in6_addr *oldip, *newip;
if (maniptype == NF_NAT_MANIP_SRC) {
oldip = &ipv6h->saddr;
newip = &t->src.u3.in6;
} else {
oldip = &ipv6h->daddr;
newip = &t->dst.u3.in6;
}
inet_proto_csum_replace16(check, skb, oldip->s6_addr32,
newip->s6_addr32, true);
#endif
}
static void nf_csum_update(struct sk_buff *skb,
unsigned int iphdroff, __sum16 *check,
const struct nf_conntrack_tuple *t,
enum nf_nat_manip_type maniptype)
{
switch (t->src.l3num) {
case NFPROTO_IPV4:
nf_nat_ipv4_csum_update(skb, iphdroff, check, t, maniptype);
return;
case NFPROTO_IPV6:
nf_nat_ipv6_csum_update(skb, iphdroff, check, t, maniptype);
return;
}
}
static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb,
u8 proto, void *data, __sum16 *check,
int datalen, int oldlen)
{
if (skb->ip_summed != CHECKSUM_PARTIAL) {
const struct iphdr *iph = ip_hdr(skb);
skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) +
ip_hdrlen(skb);
skb->csum_offset = (void *)check - data;
*check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, datalen,
proto, 0);
} else {
inet_proto_csum_replace2(check, skb,
htons(oldlen), htons(datalen), true);
}
}
#if IS_ENABLED(CONFIG_IPV6)
static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
u8 proto, void *data, __sum16 *check,
int datalen, int oldlen)
{
if (skb->ip_summed != CHECKSUM_PARTIAL) {
const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) +
(data - (void *)skb->data);
skb->csum_offset = (void *)check - data;
*check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
datalen, proto, 0);
} else {
inet_proto_csum_replace2(check, skb,
htons(oldlen), htons(datalen), true);
}
}
#endif
void nf_nat_csum_recalc(struct sk_buff *skb,
u8 nfproto, u8 proto, void *data, __sum16 *check,
int datalen, int oldlen)
{
switch (nfproto) {
case NFPROTO_IPV4:
nf_nat_ipv4_csum_recalc(skb, proto, data, check,
datalen, oldlen);
return;
#if IS_ENABLED(CONFIG_IPV6)
case NFPROTO_IPV6:
nf_nat_ipv6_csum_recalc(skb, proto, data, check,
datalen, oldlen);
return;
#endif
}
WARN_ON_ONCE(1);
}
int nf_nat_icmp_reply_translation(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int hooknum)
{
struct {
struct icmphdr icmp;
struct iphdr ip;
} *inside;
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
unsigned int hdrlen = ip_hdrlen(skb);
struct nf_conntrack_tuple target;
unsigned long statusbit;
WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
if (skb_ensure_writable(skb, hdrlen + sizeof(*inside)))
return 0;
if (nf_ip_checksum(skb, hooknum, hdrlen, IPPROTO_ICMP))
return 0;
inside = (void *)skb->data + hdrlen;
if (inside->icmp.type == ICMP_REDIRECT) {
if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
return 0;
if (ct->status & IPS_NAT_MASK)
return 0;
}
if (manip == NF_NAT_MANIP_SRC)
statusbit = IPS_SRC_NAT;
else
statusbit = IPS_DST_NAT;
/* Invert if this is reply direction */
if (dir == IP_CT_DIR_REPLY)
statusbit ^= IPS_NAT_MASK;
if (!(ct->status & statusbit))
return 1;
if (!nf_nat_ipv4_manip_pkt(skb, hdrlen + sizeof(inside->icmp),
&ct->tuplehash[!dir].tuple, !manip))
return 0;
if (skb->ip_summed != CHECKSUM_PARTIAL) {
/* Reloading "inside" here since manip_pkt may reallocate */
inside = (void *)skb->data + hdrlen;
inside->icmp.checksum = 0;
inside->icmp.checksum =
csum_fold(skb_checksum(skb, hdrlen,
skb->len - hdrlen, 0));
}
/* Change outer to look like the reply to an incoming packet */
nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
target.dst.protonum = IPPROTO_ICMP;
if (!nf_nat_ipv4_manip_pkt(skb, 0, &target, manip))
return 0;
return 1;
}
EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation);
static unsigned int
nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
ct = nf_ct_get(skb, &ctinfo);
if (!ct)
return NF_ACCEPT;
if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) {
if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
state->hook))
return NF_DROP;
else
return NF_ACCEPT;
}
}
return nf_nat_inet_fn(priv, skb, state);
}
static unsigned int
nf_nat_ipv4_in(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
unsigned int ret;
__be32 daddr = ip_hdr(skb)->daddr;
ret = nf_nat_ipv4_fn(priv, skb, state);
if (ret == NF_ACCEPT && daddr != ip_hdr(skb)->daddr)
skb_dst_drop(skb);
return ret;
}
static unsigned int
nf_nat_ipv4_out(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
#ifdef CONFIG_XFRM
const struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
int err;
#endif
unsigned int ret;
ret = nf_nat_ipv4_fn(priv, skb, state);
#ifdef CONFIG_XFRM
if (ret != NF_ACCEPT)
return ret;
if (IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED)
return ret;
ct = nf_ct_get(skb, &ctinfo);
if (ct) {
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
if (ct->tuplehash[dir].tuple.src.u3.ip !=
ct->tuplehash[!dir].tuple.dst.u3.ip ||
(ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
ct->tuplehash[dir].tuple.src.u.all !=
ct->tuplehash[!dir].tuple.dst.u.all)) {
err = nf_xfrm_me_harder(state->net, skb, AF_INET);
if (err < 0)
ret = NF_DROP_ERR(err);
}
}
#endif
return ret;
}
static unsigned int
nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
const struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
unsigned int ret;
int err;
ret = nf_nat_ipv4_fn(priv, skb, state);
if (ret != NF_ACCEPT)
return ret;
ct = nf_ct_get(skb, &ctinfo);
if (ct) {
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
if (ct->tuplehash[dir].tuple.dst.u3.ip !=
ct->tuplehash[!dir].tuple.src.u3.ip) {
err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
if (err < 0)
ret = NF_DROP_ERR(err);
}
#ifdef CONFIG_XFRM
else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
ct->tuplehash[dir].tuple.dst.u.all !=
ct->tuplehash[!dir].tuple.src.u.all) {
err = nf_xfrm_me_harder(state->net, skb, AF_INET);
if (err < 0)
ret = NF_DROP_ERR(err);
}
#endif
}
return ret;
}
static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
/* Before packet filtering, change destination */
{
.hook = nf_nat_ipv4_in,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP_PRI_NAT_DST,
},
/* After packet filtering, change source */
{
.hook = nf_nat_ipv4_out,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP_PRI_NAT_SRC,
},
/* Before packet filtering, change destination */
{
.hook = nf_nat_ipv4_local_fn,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP_PRI_NAT_DST,
},
/* After packet filtering, change source */
{
.hook = nf_nat_ipv4_fn,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_NAT_SRC,
},
};
int nf_nat_ipv4_register_fn(struct net *net, const struct nf_hook_ops *ops)
{
return nf_nat_register_fn(net, ops->pf, ops, nf_nat_ipv4_ops,
ARRAY_SIZE(nf_nat_ipv4_ops));
}
EXPORT_SYMBOL_GPL(nf_nat_ipv4_register_fn);
void nf_nat_ipv4_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
{
nf_nat_unregister_fn(net, ops->pf, ops, ARRAY_SIZE(nf_nat_ipv4_ops));
}
EXPORT_SYMBOL_GPL(nf_nat_ipv4_unregister_fn);
#if IS_ENABLED(CONFIG_IPV6)
int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int hooknum,
unsigned int hdrlen)
{
struct {
struct icmp6hdr icmp6;
struct ipv6hdr ip6;
} *inside;
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
struct nf_conntrack_tuple target;
unsigned long statusbit;
WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
if (skb_ensure_writable(skb, hdrlen + sizeof(*inside)))
return 0;
if (nf_ip6_checksum(skb, hooknum, hdrlen, IPPROTO_ICMPV6))
return 0;
inside = (void *)skb->data + hdrlen;
if (inside->icmp6.icmp6_type == NDISC_REDIRECT) {
if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
return 0;
if (ct->status & IPS_NAT_MASK)
return 0;
}
if (manip == NF_NAT_MANIP_SRC)
statusbit = IPS_SRC_NAT;
else
statusbit = IPS_DST_NAT;
/* Invert if this is reply direction */
if (dir == IP_CT_DIR_REPLY)
statusbit ^= IPS_NAT_MASK;
if (!(ct->status & statusbit))
return 1;
if (!nf_nat_ipv6_manip_pkt(skb, hdrlen + sizeof(inside->icmp6),
&ct->tuplehash[!dir].tuple, !manip))
return 0;
if (skb->ip_summed != CHECKSUM_PARTIAL) {
struct ipv6hdr *ipv6h = ipv6_hdr(skb);
inside = (void *)skb->data + hdrlen;
inside->icmp6.icmp6_cksum = 0;
inside->icmp6.icmp6_cksum =
csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
skb->len - hdrlen, IPPROTO_ICMPV6,
skb_checksum(skb, hdrlen,
skb->len - hdrlen, 0));
}
nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
target.dst.protonum = IPPROTO_ICMPV6;
if (!nf_nat_ipv6_manip_pkt(skb, 0, &target, manip))
return 0;
return 1;
}
EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation);
static unsigned int
nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
__be16 frag_off;
int hdrlen;
u8 nexthdr;
ct = nf_ct_get(skb, &ctinfo);
/* Can't track? It's not due to stress, or conntrack would
* have dropped it. Hence it's the user's responsibilty to
* packet filter it out, or implement conntrack/NAT for that
* protocol. 8) --RR
*/
if (!ct)
return NF_ACCEPT;
if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) {
nexthdr = ipv6_hdr(skb)->nexthdr;
hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
&nexthdr, &frag_off);
if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
state->hook,
hdrlen))
return NF_DROP;
else
return NF_ACCEPT;
}
}
return nf_nat_inet_fn(priv, skb, state);
}
static unsigned int
nf_nat_ipv6_in(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
unsigned int ret;
struct in6_addr daddr = ipv6_hdr(skb)->daddr;
ret = nf_nat_ipv6_fn(priv, skb, state);
if (ret != NF_DROP && ret != NF_STOLEN &&
ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
skb_dst_drop(skb);
return ret;
}
static unsigned int
nf_nat_ipv6_out(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
#ifdef CONFIG_XFRM
const struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
int err;
#endif
unsigned int ret;
ret = nf_nat_ipv6_fn(priv, skb, state);
#ifdef CONFIG_XFRM
if (ret != NF_ACCEPT)
return ret;
if (IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED)
return ret;
ct = nf_ct_get(skb, &ctinfo);
if (ct) {
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
&ct->tuplehash[!dir].tuple.dst.u3) ||
(ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
ct->tuplehash[dir].tuple.src.u.all !=
ct->tuplehash[!dir].tuple.dst.u.all)) {
err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
if (err < 0)
ret = NF_DROP_ERR(err);
}
}
#endif
return ret;
}
static unsigned int
nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
const struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
unsigned int ret;
int err;
ret = nf_nat_ipv6_fn(priv, skb, state);
if (ret != NF_ACCEPT)
return ret;
ct = nf_ct_get(skb, &ctinfo);
if (ct) {
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
&ct->tuplehash[!dir].tuple.src.u3)) {
err = nf_ip6_route_me_harder(state->net, skb);
if (err < 0)
ret = NF_DROP_ERR(err);
}
#ifdef CONFIG_XFRM
else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
ct->tuplehash[dir].tuple.dst.u.all !=
ct->tuplehash[!dir].tuple.src.u.all) {
err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
if (err < 0)
ret = NF_DROP_ERR(err);
}
#endif
}
return ret;
}
static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
/* Before packet filtering, change destination */
{
.hook = nf_nat_ipv6_in,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP6_PRI_NAT_DST,
},
/* After packet filtering, change source */
{
.hook = nf_nat_ipv6_out,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP6_PRI_NAT_SRC,
},
/* Before packet filtering, change destination */
{
.hook = nf_nat_ipv6_local_fn,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP6_PRI_NAT_DST,
},
/* After packet filtering, change source */
{
.hook = nf_nat_ipv6_fn,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP6_PRI_NAT_SRC,
},
};
int nf_nat_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops)
{
return nf_nat_register_fn(net, ops->pf, ops, nf_nat_ipv6_ops,
ARRAY_SIZE(nf_nat_ipv6_ops));
}
EXPORT_SYMBOL_GPL(nf_nat_ipv6_register_fn);
void nf_nat_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
{
nf_nat_unregister_fn(net, ops->pf, ops, ARRAY_SIZE(nf_nat_ipv6_ops));
}
EXPORT_SYMBOL_GPL(nf_nat_ipv6_unregister_fn);
#endif /* CONFIG_IPV6 */
#if defined(CONFIG_NF_TABLES_INET) && IS_ENABLED(CONFIG_NFT_NAT)
int nf_nat_inet_register_fn(struct net *net, const struct nf_hook_ops *ops)
{
int ret;
if (WARN_ON_ONCE(ops->pf != NFPROTO_INET))
return -EINVAL;
ret = nf_nat_register_fn(net, NFPROTO_IPV6, ops, nf_nat_ipv6_ops,
ARRAY_SIZE(nf_nat_ipv6_ops));
if (ret)
return ret;
ret = nf_nat_register_fn(net, NFPROTO_IPV4, ops, nf_nat_ipv4_ops,
ARRAY_SIZE(nf_nat_ipv4_ops));
if (ret)
nf_nat_ipv6_unregister_fn(net, ops);
return ret;
}
EXPORT_SYMBOL_GPL(nf_nat_inet_register_fn);
void nf_nat_inet_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
{
nf_nat_unregister_fn(net, NFPROTO_IPV4, ops, ARRAY_SIZE(nf_nat_ipv4_ops));
nf_nat_unregister_fn(net, NFPROTO_IPV6, ops, ARRAY_SIZE(nf_nat_ipv6_ops));
}
EXPORT_SYMBOL_GPL(nf_nat_inet_unregister_fn);
#endif /* NFT INET NAT */