Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/net-next

Eric W. Biederman says:

====================
net: Pass net through ip fragmention

This is the next installment of my work to pass struct net through the
output path so the code does not need to guess how to figure out which
network namespace it is in, and ultimately routes can have output
devices in another network namespace.

This round focuses on passing net through ip fragmentation which we seem
to call from about everywhere.  That is the main ip output paths, the
bridge netfilter code, and openvswitch.  This has to happend at once
accross the tree as function pointers are involved.

First some prep work is done, then ipv4 and ipv6 are converted and then
temporary helper functions are removed.
====================

Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2015-10-05 03:39:31 -07:00
commit 40e106801e
8 changed files with 53 additions and 56 deletions

View file

@ -17,8 +17,8 @@ struct nf_ipv6_ops {
int (*chk_addr)(struct net *net, const struct in6_addr *addr, int (*chk_addr)(struct net *net, const struct in6_addr *addr,
const struct net_device *dev, int strict); const struct net_device *dev, int strict);
void (*route_input)(struct sk_buff *skb); void (*route_input)(struct sk_buff *skb);
int (*fragment)(struct sock *sk, struct sk_buff *skb, int (*fragment)(struct net *net, struct sock *sk, struct sk_buff *skb,
int (*output)(struct sock *, struct sk_buff *)); int (*output)(struct net *, struct sock *, struct sk_buff *));
}; };
#ifdef CONFIG_NETFILTER #ifdef CONFIG_NETFILTER

View file

@ -109,8 +109,8 @@ int ip_local_deliver(struct sk_buff *skb);
int ip_mr_input(struct sk_buff *skb); int ip_mr_input(struct sk_buff *skb);
int ip_output(struct sock *sk, struct sk_buff *skb); int ip_output(struct sock *sk, struct sk_buff *skb);
int ip_mc_output(struct sock *sk, struct sk_buff *skb); int ip_mc_output(struct sock *sk, struct sk_buff *skb);
int ip_do_fragment(struct sock *sk, struct sk_buff *skb, int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
int (*output)(struct sock *, struct sk_buff *)); int (*output)(struct net *, struct sock *, struct sk_buff *));
void ip_send_check(struct iphdr *ip); void ip_send_check(struct iphdr *ip);
int __ip_local_out(struct sk_buff *skb); int __ip_local_out(struct sk_buff *skb);
int ip_local_out_sk(struct sock *sk, struct sk_buff *skb); int ip_local_out_sk(struct sock *sk, struct sk_buff *skb);

View file

@ -173,8 +173,8 @@ static inline bool ipv6_anycast_destination(const struct dst_entry *dst,
ipv6_addr_equal(&rt->rt6i_dst.addr, daddr)); ipv6_addr_equal(&rt->rt6i_dst.addr, daddr));
} }
int ip6_fragment(struct sock *sk, struct sk_buff *skb, int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
int (*output)(struct sock *, struct sk_buff *)); int (*output)(struct net *, struct sock *, struct sk_buff *));
static inline int ip6_skb_dst_mtu(struct sk_buff *skb) static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
{ {

View file

@ -691,17 +691,12 @@ static int br_nf_push_frag_xmit(struct net *net, struct sock *sk, struct sk_buff
nf_bridge_info_free(skb); nf_bridge_info_free(skb);
return br_dev_queue_push_xmit(net, sk, skb); return br_dev_queue_push_xmit(net, sk, skb);
} }
static int br_nf_push_frag_xmit_sk(struct sock *sk, struct sk_buff *skb)
{
struct net *net = dev_net(skb_dst(skb)->dev);
return br_nf_push_frag_xmit(net, sk, skb);
}
#endif #endif
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
static int static int
br_nf_ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, br_nf_ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
int (*output)(struct sock *, struct sk_buff *)) int (*output)(struct net *, struct sock *, struct sk_buff *))
{ {
unsigned int mtu = ip_skb_dst_mtu(skb); unsigned int mtu = ip_skb_dst_mtu(skb);
struct iphdr *iph = ip_hdr(skb); struct iphdr *iph = ip_hdr(skb);
@ -714,7 +709,7 @@ br_nf_ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
return -EMSGSIZE; return -EMSGSIZE;
} }
return ip_do_fragment(sk, skb, output); return ip_do_fragment(net, sk, skb, output);
} }
#endif #endif
@ -763,7 +758,7 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff
skb_copy_from_linear_data_offset(skb, -data->size, data->mac, skb_copy_from_linear_data_offset(skb, -data->size, data->mac,
data->size); data->size);
return br_nf_ip_fragment(net, sk, skb, br_nf_push_frag_xmit_sk); return br_nf_ip_fragment(net, sk, skb, br_nf_push_frag_xmit);
} }
#endif #endif
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
@ -786,7 +781,7 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff
data->size); data->size);
if (v6ops) if (v6ops)
return v6ops->fragment(sk, skb, br_nf_push_frag_xmit_sk); return v6ops->fragment(net, sk, skb, br_nf_push_frag_xmit);
kfree_skb(skb); kfree_skb(skb);
return -EMSGSIZE; return -EMSGSIZE;

View file

@ -83,9 +83,10 @@
int sysctl_ip_default_ttl __read_mostly = IPDEFTTL; int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
EXPORT_SYMBOL(sysctl_ip_default_ttl); EXPORT_SYMBOL(sysctl_ip_default_ttl);
static int ip_fragment(struct sock *sk, struct sk_buff *skb, static int
ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
unsigned int mtu, unsigned int mtu,
int (*output)(struct sock *, struct sk_buff *)); int (*output)(struct net *, struct sock *, struct sk_buff *));
/* Generate a checksum for an outgoing IP datagram. */ /* Generate a checksum for an outgoing IP datagram. */
void ip_send_check(struct iphdr *iph) void ip_send_check(struct iphdr *iph)
@ -176,12 +177,11 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk,
} }
EXPORT_SYMBOL_GPL(ip_build_and_send_pkt); EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
static int ip_finish_output2(struct sock *sk, struct sk_buff *skb) static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
{ {
struct dst_entry *dst = skb_dst(skb); struct dst_entry *dst = skb_dst(skb);
struct rtable *rt = (struct rtable *)dst; struct rtable *rt = (struct rtable *)dst;
struct net_device *dev = dst->dev; struct net_device *dev = dst->dev;
struct net *net = dev_net(dev);
unsigned int hh_len = LL_RESERVED_SPACE(dev); unsigned int hh_len = LL_RESERVED_SPACE(dev);
struct neighbour *neigh; struct neighbour *neigh;
u32 nexthop; u32 nexthop;
@ -225,8 +225,8 @@ static int ip_finish_output2(struct sock *sk, struct sk_buff *skb)
return -EINVAL; return -EINVAL;
} }
static int ip_finish_output_gso(struct sock *sk, struct sk_buff *skb, static int ip_finish_output_gso(struct net *net, struct sock *sk,
unsigned int mtu) struct sk_buff *skb, unsigned int mtu)
{ {
netdev_features_t features; netdev_features_t features;
struct sk_buff *segs; struct sk_buff *segs;
@ -235,7 +235,7 @@ static int ip_finish_output_gso(struct sock *sk, struct sk_buff *skb,
/* common case: locally created skb or seglen is <= mtu */ /* common case: locally created skb or seglen is <= mtu */
if (((IPCB(skb)->flags & IPSKB_FORWARDED) == 0) || if (((IPCB(skb)->flags & IPSKB_FORWARDED) == 0) ||
skb_gso_network_seglen(skb) <= mtu) skb_gso_network_seglen(skb) <= mtu)
return ip_finish_output2(sk, skb); return ip_finish_output2(net, sk, skb);
/* Slowpath - GSO segment length is exceeding the dst MTU. /* Slowpath - GSO segment length is exceeding the dst MTU.
* *
@ -258,7 +258,7 @@ static int ip_finish_output_gso(struct sock *sk, struct sk_buff *skb,
int err; int err;
segs->next = NULL; segs->next = NULL;
err = ip_fragment(sk, segs, mtu, ip_finish_output2); err = ip_fragment(net, sk, segs, mtu, ip_finish_output2);
if (err && ret == 0) if (err && ret == 0)
ret = err; ret = err;
@ -281,12 +281,12 @@ static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *sk
#endif #endif
mtu = ip_skb_dst_mtu(skb); mtu = ip_skb_dst_mtu(skb);
if (skb_is_gso(skb)) if (skb_is_gso(skb))
return ip_finish_output_gso(sk, skb, mtu); return ip_finish_output_gso(net, sk, skb, mtu);
if (skb->len > mtu || (IPCB(skb)->flags & IPSKB_FRAG_PMTU)) if (skb->len > mtu || (IPCB(skb)->flags & IPSKB_FRAG_PMTU))
return ip_fragment(sk, skb, mtu, ip_finish_output2); return ip_fragment(net, sk, skb, mtu, ip_finish_output2);
return ip_finish_output2(sk, skb); return ip_finish_output2(net, sk, skb);
} }
int ip_mc_output(struct sock *sk, struct sk_buff *skb) int ip_mc_output(struct sock *sk, struct sk_buff *skb)
@ -495,20 +495,18 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
skb_copy_secmark(to, from); skb_copy_secmark(to, from);
} }
static int ip_fragment(struct sock *sk, struct sk_buff *skb, static int ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
unsigned int mtu, unsigned int mtu,
int (*output)(struct sock *, struct sk_buff *)) int (*output)(struct net *, struct sock *, struct sk_buff *))
{ {
struct iphdr *iph = ip_hdr(skb); struct iphdr *iph = ip_hdr(skb);
if ((iph->frag_off & htons(IP_DF)) == 0) if ((iph->frag_off & htons(IP_DF)) == 0)
return ip_do_fragment(sk, skb, output); return ip_do_fragment(net, sk, skb, output);
if (unlikely(!skb->ignore_df || if (unlikely(!skb->ignore_df ||
(IPCB(skb)->frag_max_size && (IPCB(skb)->frag_max_size &&
IPCB(skb)->frag_max_size > mtu))) { IPCB(skb)->frag_max_size > mtu))) {
struct net *net = dev_net(skb_rtable(skb)->dst.dev);
IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(mtu)); htonl(mtu));
@ -516,7 +514,7 @@ static int ip_fragment(struct sock *sk, struct sk_buff *skb,
return -EMSGSIZE; return -EMSGSIZE;
} }
return ip_do_fragment(sk, skb, output); return ip_do_fragment(net, sk, skb, output);
} }
/* /*
@ -526,8 +524,8 @@ static int ip_fragment(struct sock *sk, struct sk_buff *skb,
* single device frame, and queue such a frame for sending. * single device frame, and queue such a frame for sending.
*/ */
int ip_do_fragment(struct sock *sk, struct sk_buff *skb, int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
int (*output)(struct sock *, struct sk_buff *)) int (*output)(struct net *, struct sock *, struct sk_buff *))
{ {
struct iphdr *iph; struct iphdr *iph;
int ptr; int ptr;
@ -537,11 +535,9 @@ int ip_do_fragment(struct sock *sk, struct sk_buff *skb,
int offset; int offset;
__be16 not_last_frag; __be16 not_last_frag;
struct rtable *rt = skb_rtable(skb); struct rtable *rt = skb_rtable(skb);
struct net *net;
int err = 0; int err = 0;
dev = rt->dst.dev; dev = rt->dst.dev;
net = dev_net(dev);
/* /*
* Point into the IP datagram header. * Point into the IP datagram header.
@ -631,7 +627,7 @@ int ip_do_fragment(struct sock *sk, struct sk_buff *skb,
ip_send_check(iph); ip_send_check(iph);
} }
err = output(sk, skb); err = output(net, sk, skb);
if (!err) if (!err)
IP_INC_STATS(net, IPSTATS_MIB_FRAGCREATES); IP_INC_STATS(net, IPSTATS_MIB_FRAGCREATES);
@ -771,7 +767,7 @@ slow_path:
ip_send_check(iph); ip_send_check(iph);
err = output(sk, skb2); err = output(net, sk, skb2);
if (err) if (err)
goto fail; goto fail;

View file

@ -56,11 +56,10 @@
#include <net/checksum.h> #include <net/checksum.h>
#include <linux/mroute6.h> #include <linux/mroute6.h>
static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb) static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
{ {
struct dst_entry *dst = skb_dst(skb); struct dst_entry *dst = skb_dst(skb);
struct net_device *dev = dst->dev; struct net_device *dev = dst->dev;
struct net *net = dev_net(dev);
struct neighbour *neigh; struct neighbour *neigh;
struct in6_addr *nexthop; struct in6_addr *nexthop;
int ret; int ret;
@ -126,9 +125,9 @@ static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
dst_allfrag(skb_dst(skb)) || dst_allfrag(skb_dst(skb)) ||
(IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
return ip6_fragment(sk, skb, ip6_finish_output2); return ip6_fragment(net, sk, skb, ip6_finish_output2);
else else
return ip6_finish_output2(sk, skb); return ip6_finish_output2(net, sk, skb);
} }
int ip6_output(struct sock *sk, struct sk_buff *skb) int ip6_output(struct sock *sk, struct sk_buff *skb)
@ -554,8 +553,8 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
skb_copy_secmark(to, from); skb_copy_secmark(to, from);
} }
int ip6_fragment(struct sock *sk, struct sk_buff *skb, int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
int (*output)(struct sock *, struct sk_buff *)) int (*output)(struct net *, struct sock *, struct sk_buff *))
{ {
struct sk_buff *frag; struct sk_buff *frag;
struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
@ -568,7 +567,6 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb,
__be32 frag_id; __be32 frag_id;
int ptr, offset = 0, err = 0; int ptr, offset = 0, err = 0;
u8 *prevhdr, nexthdr = 0; u8 *prevhdr, nexthdr = 0;
struct net *net = dev_net(skb_dst(skb)->dev);
hlen = ip6_find_1stfragopt(skb, &prevhdr); hlen = ip6_find_1stfragopt(skb, &prevhdr);
nexthdr = *prevhdr; nexthdr = *prevhdr;
@ -688,7 +686,7 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb,
ip6_copy_metadata(frag, skb); ip6_copy_metadata(frag, skb);
} }
err = output(sk, skb); err = output(net, sk, skb);
if (!err) if (!err)
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
IPSTATS_MIB_FRAGCREATES); IPSTATS_MIB_FRAGCREATES);
@ -816,7 +814,7 @@ slow_path:
/* /*
* Put this fragment into the sending queue. * Put this fragment into the sending queue.
*/ */
err = output(sk, frag); err = output(net, sk, frag);
if (err) if (err)
goto fail; goto fail;

View file

@ -131,6 +131,13 @@ int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb)
return xfrm_output(sk, skb); return xfrm_output(sk, skb);
} }
static int __xfrm6_output_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct xfrm_state *x = skb_dst(skb)->xfrm;
return x->outer_mode->afinfo->output_finish(sk, skb);
}
static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{ {
struct dst_entry *dst = skb_dst(skb); struct dst_entry *dst = skb_dst(skb);
@ -160,8 +167,8 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
if (x->props.mode == XFRM_MODE_TUNNEL && if (x->props.mode == XFRM_MODE_TUNNEL &&
((skb->len > mtu && !skb_is_gso(skb)) || ((skb->len > mtu && !skb_is_gso(skb)) ||
dst_allfrag(skb_dst(skb)))) { dst_allfrag(skb_dst(skb)))) {
return ip6_fragment(sk, skb, return ip6_fragment(net, sk, skb,
x->outer_mode->afinfo->output_finish); __xfrm6_output_finish);
} }
return x->outer_mode->afinfo->output_finish(sk, skb); return x->outer_mode->afinfo->output_finish(sk, skb);
} }

View file

@ -620,7 +620,7 @@ static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,
return 0; return 0;
} }
static int ovs_vport_output(struct sock *sock, struct sk_buff *skb) static int ovs_vport_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{ {
struct ovs_frag_data *data = this_cpu_ptr(&ovs_frag_data_storage); struct ovs_frag_data *data = this_cpu_ptr(&ovs_frag_data_storage);
struct vport *vport = data->vport; struct vport *vport = data->vport;
@ -679,8 +679,8 @@ static void prepare_frag(struct vport *vport, struct sk_buff *skb)
skb_pull(skb, hlen); skb_pull(skb, hlen);
} }
static void ovs_fragment(struct vport *vport, struct sk_buff *skb, u16 mru, static void ovs_fragment(struct net *net, struct vport *vport,
__be16 ethertype) struct sk_buff *skb, u16 mru, __be16 ethertype)
{ {
if (skb_network_offset(skb) > MAX_L2_LEN) { if (skb_network_offset(skb) > MAX_L2_LEN) {
OVS_NLERR(1, "L2 header too long to fragment"); OVS_NLERR(1, "L2 header too long to fragment");
@ -700,7 +700,7 @@ static void ovs_fragment(struct vport *vport, struct sk_buff *skb, u16 mru,
skb_dst_set_noref(skb, &ovs_dst); skb_dst_set_noref(skb, &ovs_dst);
IPCB(skb)->frag_max_size = mru; IPCB(skb)->frag_max_size = mru;
ip_do_fragment(skb->sk, skb, ovs_vport_output); ip_do_fragment(net, skb->sk, skb, ovs_vport_output);
refdst_drop(orig_dst); refdst_drop(orig_dst);
} else if (ethertype == htons(ETH_P_IPV6)) { } else if (ethertype == htons(ETH_P_IPV6)) {
const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
@ -722,7 +722,7 @@ static void ovs_fragment(struct vport *vport, struct sk_buff *skb, u16 mru,
skb_dst_set_noref(skb, &ovs_rt.dst); skb_dst_set_noref(skb, &ovs_rt.dst);
IP6CB(skb)->frag_max_size = mru; IP6CB(skb)->frag_max_size = mru;
v6ops->fragment(skb->sk, skb, ovs_vport_output); v6ops->fragment(net, skb->sk, skb, ovs_vport_output);
refdst_drop(orig_dst); refdst_drop(orig_dst);
} else { } else {
WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.", WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.",
@ -743,6 +743,7 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
if (likely(!mru || (skb->len <= mru + ETH_HLEN))) { if (likely(!mru || (skb->len <= mru + ETH_HLEN))) {
ovs_vport_send(vport, skb); ovs_vport_send(vport, skb);
} else if (mru <= vport->dev->mtu) { } else if (mru <= vport->dev->mtu) {
struct net *net = read_pnet(&dp->net);
__be16 ethertype = key->eth.type; __be16 ethertype = key->eth.type;
if (!is_flow_key_valid(key)) { if (!is_flow_key_valid(key)) {
@ -752,7 +753,7 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
ethertype = vlan_get_protocol(skb); ethertype = vlan_get_protocol(skb);
} }
ovs_fragment(vport, skb, mru, ethertype); ovs_fragment(net, vport, skb, mru, ethertype);
} else { } else {
kfree_skb(skb); kfree_skb(skb);
} }