diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 98f65ed8f8b0..f40f0ab3847a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -352,6 +352,7 @@ enum gro_result { GRO_HELD, GRO_NORMAL, GRO_DROP, + GRO_CONSUMED, }; typedef enum gro_result gro_result_t; @@ -2667,6 +2668,19 @@ static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb, remcsum_unadjust((__sum16 *)ptr, grc->delta); } +#ifdef CONFIG_XFRM_OFFLOAD +static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff **pp, int flush) +{ + if (PTR_ERR(pp) != -EINPROGRESS) + NAPI_GRO_CB(skb)->flush |= flush; +} +#else +static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff **pp, int flush) +{ + NAPI_GRO_CB(skb)->flush |= flush; +} +#endif + static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, diff --git a/include/net/xfrm.h b/include/net/xfrm.h index d9a81dcef53e..14d82bf16692 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -280,9 +280,7 @@ struct net_device; struct xfrm_type; struct xfrm_dst; struct xfrm_policy_afinfo { - unsigned short family; struct dst_ops *dst_ops; - void (*garbage_collect)(struct net *net); struct dst_entry *(*dst_lookup)(struct net *net, int tos, int oif, const xfrm_address_t *saddr, @@ -303,8 +301,8 @@ struct xfrm_policy_afinfo { struct dst_entry *(*blackhole_route)(struct net *net, struct dst_entry *orig); }; -int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo); -int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo); +int xfrm_policy_register_afinfo(const struct xfrm_policy_afinfo *afinfo, int family); +void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo); void km_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c); void km_state_notify(struct xfrm_state *x, const struct km_event *c); @@ -349,13 +347,12 @@ struct xfrm_state_afinfo *xfrm_state_afinfo_get_rcu(unsigned int family); struct xfrm_input_afinfo { unsigned int family; - struct module *owner; int (*callback)(struct sk_buff *skb, u8 protocol, int err); }; -int xfrm_input_register_afinfo(struct xfrm_input_afinfo *afinfo); -int xfrm_input_unregister_afinfo(struct xfrm_input_afinfo *afinfo); +int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo); +int xfrm_input_unregister_afinfo(const struct xfrm_input_afinfo *afinfo); void xfrm_state_delete_tunnel(struct xfrm_state *x); @@ -501,6 +498,7 @@ struct xfrm_tmpl { }; #define XFRM_MAX_DEPTH 6 +#define XFRM_MAX_OFFLOAD_DEPTH 1 struct xfrm_policy_walk_entry { struct list_head all; @@ -684,6 +682,7 @@ struct xfrm_spi_skb_cb { unsigned int daddroff; unsigned int family; + __be32 seq; }; #define XFRM_SPI_SKB_CB(__skb) ((struct xfrm_spi_skb_cb *)&((__skb)->cb[0])) @@ -976,10 +975,41 @@ static inline void xfrm_dst_destroy(struct xfrm_dst *xdst) void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev); +struct xfrm_offload { + /* Output sequence number for replay protection on offloading. */ + struct { + __u32 low; + __u32 hi; + } seq; + + __u32 flags; +#define SA_DELETE_REQ 1 +#define CRYPTO_DONE 2 +#define CRYPTO_NEXT_DONE 4 +#define CRYPTO_FALLBACK 8 +#define XFRM_GSO_SEGMENT 16 +#define XFRM_GRO 32 + + __u32 status; +#define CRYPTO_SUCCESS 1 +#define CRYPTO_GENERIC_ERROR 2 +#define CRYPTO_TRANSPORT_AH_AUTH_FAILED 4 +#define CRYPTO_TRANSPORT_ESP_AUTH_FAILED 8 +#define CRYPTO_TUNNEL_AH_AUTH_FAILED 16 +#define CRYPTO_TUNNEL_ESP_AUTH_FAILED 32 +#define CRYPTO_INVALID_PACKET_SYNTAX 64 +#define CRYPTO_INVALID_PROTOCOL 128 + + __u8 proto; +}; + struct sec_path { atomic_t refcnt; int len; + int olen; + struct xfrm_state *xvec[XFRM_MAX_DEPTH]; + struct xfrm_offload ovec[XFRM_MAX_OFFLOAD_DEPTH]; }; static inline int secpath_exists(struct sk_buff *skb) @@ -1009,6 +1039,7 @@ secpath_put(struct sec_path *sp) } struct sec_path *secpath_dup(struct sec_path *src); +int secpath_set(struct sk_buff *skb); static inline void secpath_reset(struct sk_buff *skb) @@ -1170,6 +1201,7 @@ static inline void xfrm_sk_free_policy(struct sock *sk) } void xfrm_garbage_collect(struct net *net); +void xfrm_garbage_collect_deferred(struct net *net); #else @@ -1521,6 +1553,7 @@ int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); int xfrm4_transport_finish(struct sk_buff *skb, int async); int xfrm4_rcv(struct sk_buff *skb); +int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq); static inline int xfrm4_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) { @@ -1776,6 +1809,15 @@ static inline struct xfrm_state *xfrm_input_state(struct sk_buff *skb) { return skb->sp->xvec[skb->sp->len - 1]; } +static inline struct xfrm_offload *xfrm_offload(struct sk_buff *skb) +{ + struct sec_path *sp = skb->sp; + + if (!sp || !sp->olen || sp->len != sp->olen) + return NULL; + + return &sp->ovec[sp->olen - 1]; +} #endif static inline int xfrm_mark_get(struct nlattr **attrs, struct xfrm_mark *m) diff --git a/net/core/dev.c b/net/core/dev.c index 2f1bbe1bf67c..05d19c6acf94 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4510,6 +4510,11 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff if (&ptype->list == head) goto normal; + if (IS_ERR(pp) && PTR_ERR(pp) == -EINPROGRESS) { + ret = GRO_CONSUMED; + goto ok; + } + same_flow = NAPI_GRO_CB(skb)->same_flow; ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED; @@ -4614,6 +4619,7 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) case GRO_HELD: case GRO_MERGED: + case GRO_CONSUMED: break; } @@ -4685,6 +4691,7 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, break; case GRO_MERGED: + case GRO_CONSUMED: break; } diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index d21be3b3d9cc..1446810047f5 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -475,7 +475,7 @@ struct sk_buff **eth_gro_receive(struct sk_buff **head, out_unlock: rcu_read_unlock(); out: - NAPI_GRO_CB(skb)->flush |= flush; + skb_gro_flush_final(skb, pp, flush); return pp; } diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index e30f9caddae8..91a2557942fa 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -361,6 +361,19 @@ config INET_ESP If unsure, say Y. +config INET_ESP_OFFLOAD + tristate "IP: ESP transformation offload" + depends on INET_ESP + select XFRM_OFFLOAD + default n + ---help--- + Support for ESP transformation offload. This makes sense + only if this system really does IPsec and want to do it + with high throughput. A typical desktop system does not + need it, even if it does IPsec. + + If unsure, say N. + config INET_IPCOMP tristate "IP: IPComp transformation" select INET_XFRM_TUNNEL diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 48af58a5686e..c6d4238ff94a 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -29,6 +29,7 @@ obj-$(CONFIG_NET_IPVTI) += ip_vti.o obj-$(CONFIG_SYN_COOKIES) += syncookies.o obj-$(CONFIG_INET_AH) += ah4.o obj-$(CONFIG_INET_ESP) += esp4.o +obj-$(CONFIG_INET_ESP_OFFLOAD) += esp4_offload.o obj-$(CONFIG_INET_IPCOMP) += ipcomp.o obj-$(CONFIG_INET_XFRM_TUNNEL) += xfrm4_tunnel.o obj-$(CONFIG_INET_XFRM_MODE_BEET) += xfrm4_mode_beet.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 685ba53df2d1..602d40f43687 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1423,7 +1423,7 @@ out_unlock: rcu_read_unlock(); out: - NAPI_GRO_CB(skb)->flush |= flush; + skb_gro_flush_final(skb, pp, flush); return pp; } diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c new file mode 100644 index 000000000000..1de442632406 --- /dev/null +++ b/net/ipv4/esp4_offload.c @@ -0,0 +1,106 @@ +/* + * IPV4 GSO/GRO offload support + * Linux INET implementation + * + * Copyright (C) 2016 secunet Security Networks AG + * Author: Steffen Klassert + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * ESP GRO support + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct sk_buff **esp4_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + int offset = skb_gro_offset(skb); + struct xfrm_offload *xo; + struct xfrm_state *x; + __be32 seq; + __be32 spi; + int err; + + skb_pull(skb, offset); + + if ((err = xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq)) != 0) + goto out; + + err = secpath_set(skb); + if (err) + goto out; + + if (skb->sp->len == XFRM_MAX_DEPTH) + goto out; + + x = xfrm_state_lookup(dev_net(skb->dev), skb->mark, + (xfrm_address_t *)&ip_hdr(skb)->daddr, + spi, IPPROTO_ESP, AF_INET); + if (!x) + goto out; + + skb->sp->xvec[skb->sp->len++] = x; + skb->sp->olen++; + + xo = xfrm_offload(skb); + if (!xo) { + xfrm_state_put(x); + goto out; + } + xo->flags |= XFRM_GRO; + + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL; + XFRM_SPI_SKB_CB(skb)->family = AF_INET; + XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); + XFRM_SPI_SKB_CB(skb)->seq = seq; + + /* We don't need to handle errors from xfrm_input, it does all + * the error handling and frees the resources on error. */ + xfrm_input(skb, IPPROTO_ESP, spi, -2); + + return ERR_PTR(-EINPROGRESS); +out: + skb_push(skb, offset); + NAPI_GRO_CB(skb)->same_flow = 0; + NAPI_GRO_CB(skb)->flush = 1; + + return NULL; +} + +static const struct net_offload esp4_offload = { + .callbacks = { + .gro_receive = esp4_gro_receive, + }, +}; + +static int __init esp4_offload_init(void) +{ + return inet_add_offload(&esp4_offload, IPPROTO_ESP); +} + +static void __exit esp4_offload_exit(void) +{ + inet_del_offload(&esp4_offload, IPPROTO_ESP); +} + +module_init(esp4_offload_init); +module_exit(esp4_offload_exit); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Steffen Klassert "); diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 62e1e72db461..1fc684111ce6 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -40,6 +40,7 @@ drop: int xfrm4_transport_finish(struct sk_buff *skb, int async) { + struct xfrm_offload *xo = xfrm_offload(skb); struct iphdr *iph = ip_hdr(skb); iph->protocol = XFRM_MODE_SKB_CB(skb)->protocol; @@ -53,6 +54,11 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async) iph->tot_len = htons(skb->len); ip_send_check(iph); + if (xo && (xo->flags & XFRM_GRO)) { + skb_mac_header_rebuild(skb); + return 0; + } + NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, dev_net(skb->dev), NULL, skb, skb->dev, NULL, xfrm4_rcv_encap_finish); diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c index fd840c7d75ea..4acc0508c5eb 100644 --- a/net/ipv4/xfrm4_mode_transport.c +++ b/net/ipv4/xfrm4_mode_transport.c @@ -43,6 +43,7 @@ static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb) { int ihl = skb->data - skb_transport_header(skb); + struct xfrm_offload *xo = xfrm_offload(skb); if (skb->transport_header != skb->network_header) { memmove(skb_transport_header(skb), @@ -50,7 +51,8 @@ static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb) skb->network_header = skb->transport_header; } ip_hdr(skb)->tot_len = htons(skb->len + ihl); - skb_reset_transport_header(skb); + if (!xo || !(xo->flags & XFRM_GRO)) + skb_reset_transport_header(skb); return 0; } diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 6a7ff6957535..71b4ecc195c7 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -17,8 +17,6 @@ #include #include -static struct xfrm_policy_afinfo xfrm4_policy_afinfo; - static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4, int tos, int oif, const xfrm_address_t *saddr, @@ -219,7 +217,7 @@ static inline int xfrm4_garbage_collect(struct dst_ops *ops) { struct net *net = container_of(ops, struct net, xfrm.xfrm4_dst_ops); - xfrm4_policy_afinfo.garbage_collect(net); + xfrm_garbage_collect_deferred(net); return (dst_entries_get_slow(ops) > ops->gc_thresh * 2); } @@ -271,8 +269,7 @@ static struct dst_ops xfrm4_dst_ops_template = { .gc_thresh = INT_MAX, }; -static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { - .family = AF_INET, +static const struct xfrm_policy_afinfo xfrm4_policy_afinfo = { .dst_ops = &xfrm4_dst_ops_template, .dst_lookup = xfrm4_dst_lookup, .get_saddr = xfrm4_get_saddr, @@ -376,7 +373,7 @@ static struct pernet_operations __net_initdata xfrm4_net_ops = { static void __init xfrm4_policy_init(void) { - xfrm_policy_register_afinfo(&xfrm4_policy_afinfo); + xfrm_policy_register_afinfo(&xfrm4_policy_afinfo, AF_INET); } void __init xfrm4_init(void) diff --git a/net/ipv4/xfrm4_protocol.c b/net/ipv4/xfrm4_protocol.c index dccefa9d84cf..8dd0e6ab8606 100644 --- a/net/ipv4/xfrm4_protocol.c +++ b/net/ipv4/xfrm4_protocol.c @@ -188,9 +188,8 @@ static const struct net_protocol ipcomp4_protocol = { .netns_ok = 1, }; -static struct xfrm_input_afinfo xfrm4_input_afinfo = { +static const struct xfrm_input_afinfo xfrm4_input_afinfo = { .family = AF_INET, - .owner = THIS_MODULE, .callback = xfrm4_rcv_cb, }; diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 3c7c76b2a7ba..e2afe677a9d9 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -75,6 +75,19 @@ config INET6_ESP If unsure, say Y. +config INET6_ESP_OFFLOAD + tristate "IPv6: ESP transformation offload" + depends on INET6_ESP + select XFRM_OFFLOAD + default n + ---help--- + Support for ESP transformation offload. This makes sense + only if this system really does IPsec and want to do it + with high throughput. A typical desktop system does not + need it, even if it does IPsec. + + If unsure, say N. + config INET6_IPCOMP tristate "IPv6: IPComp transformation" select INET6_XFRM_TUNNEL diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index a9e9fec387ce..217e9ff0e24b 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -30,6 +30,7 @@ ipv6-objs += $(ipv6-y) obj-$(CONFIG_INET6_AH) += ah6.o obj-$(CONFIG_INET6_ESP) += esp6.o +obj-$(CONFIG_INET6_ESP_OFFLOAD) += esp6_offload.o obj-$(CONFIG_INET6_IPCOMP) += ipcomp6.o obj-$(CONFIG_INET6_XFRM_TUNNEL) += xfrm6_tunnel.o obj-$(CONFIG_INET6_TUNNEL) += tunnel6.o diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c new file mode 100644 index 000000000000..d914eb93204a --- /dev/null +++ b/net/ipv6/esp6_offload.c @@ -0,0 +1,108 @@ +/* + * IPV6 GSO/GRO offload support + * Linux INET implementation + * + * Copyright (C) 2016 secunet Security Networks AG + * Author: Steffen Klassert + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * ESP GRO support + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct sk_buff **esp6_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + int offset = skb_gro_offset(skb); + struct xfrm_offload *xo; + struct xfrm_state *x; + __be32 seq; + __be32 spi; + int err; + + skb_pull(skb, offset); + + if ((err = xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq)) != 0) + goto out; + + err = secpath_set(skb); + if (err) + goto out; + + if (skb->sp->len == XFRM_MAX_DEPTH) + goto out; + + x = xfrm_state_lookup(dev_net(skb->dev), skb->mark, + (xfrm_address_t *)&ipv6_hdr(skb)->daddr, + spi, IPPROTO_ESP, AF_INET6); + if (!x) + goto out; + + skb->sp->xvec[skb->sp->len++] = x; + skb->sp->olen++; + + xo = xfrm_offload(skb); + if (!xo) { + xfrm_state_put(x); + goto out; + } + xo->flags |= XFRM_GRO; + + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL; + XFRM_SPI_SKB_CB(skb)->family = AF_INET6; + XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr); + XFRM_SPI_SKB_CB(skb)->seq = seq; + + /* We don't need to handle errors from xfrm_input, it does all + * the error handling and frees the resources on error. */ + xfrm_input(skb, IPPROTO_ESP, spi, -2); + + return ERR_PTR(-EINPROGRESS); +out: + skb_push(skb, offset); + NAPI_GRO_CB(skb)->same_flow = 0; + NAPI_GRO_CB(skb)->flush = 1; + + return NULL; +} + +static const struct net_offload esp6_offload = { + .callbacks = { + .gro_receive = esp6_gro_receive, + }, +}; + +static int __init esp6_offload_init(void) +{ + return inet6_add_offload(&esp6_offload, IPPROTO_ESP); +} + +static void __exit esp6_offload_exit(void) +{ + inet6_del_offload(&esp6_offload, IPPROTO_ESP); +} + +module_init(esp6_offload_init); +module_exit(esp6_offload_exit); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Steffen Klassert "); diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index fc7b4017ba24..0838e6d01d2e 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -253,7 +253,7 @@ out_unlock: rcu_read_unlock(); out: - NAPI_GRO_CB(skb)->flush |= flush; + skb_gro_flush_final(skb, pp, flush); return pp; } diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index b5789562aded..08a807b29298 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -33,6 +33,8 @@ EXPORT_SYMBOL(xfrm6_rcv_spi); int xfrm6_transport_finish(struct sk_buff *skb, int async) { + struct xfrm_offload *xo = xfrm_offload(skb); + skb_network_header(skb)[IP6CB(skb)->nhoff] = XFRM_MODE_SKB_CB(skb)->protocol; @@ -44,6 +46,11 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async) ipv6_hdr(skb)->payload_len = htons(skb->len); __skb_push(skb, skb->data - skb_network_header(skb)); + if (xo && (xo->flags & XFRM_GRO)) { + skb_mac_header_rebuild(skb); + return -1; + } + NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, dev_net(skb->dev), NULL, skb, skb->dev, NULL, ip6_rcv_finish); @@ -69,18 +76,9 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, struct xfrm_state *x = NULL; int i = 0; - /* Allocate new secpath or COW existing one. */ - if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) { - struct sec_path *sp; - - sp = secpath_dup(skb->sp); - if (!sp) { - XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR); - goto drop; - } - if (skb->sp) - secpath_put(skb->sp); - skb->sp = sp; + if (secpath_set(skb)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR); + goto drop; } if (1 + skb->sp->len == XFRM_MAX_DEPTH) { diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c index 4e344105b3fd..4439ee44c8b0 100644 --- a/net/ipv6/xfrm6_mode_transport.c +++ b/net/ipv6/xfrm6_mode_transport.c @@ -47,6 +47,7 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb) { int ihl = skb->data - skb_transport_header(skb); + struct xfrm_offload *xo = xfrm_offload(skb); if (skb->transport_header != skb->network_header) { memmove(skb_transport_header(skb), @@ -55,7 +56,8 @@ static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb) } ipv6_hdr(skb)->payload_len = htons(skb->len + ihl - sizeof(struct ipv6hdr)); - skb_reset_transport_header(skb); + if (!xo || !(xo->flags & XFRM_GRO)) + skb_reset_transport_header(skb); return 0; } diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index e0f71c01d728..79651bc71bf0 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -25,8 +25,6 @@ #include #endif -static struct xfrm_policy_afinfo xfrm6_policy_afinfo; - static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif, const xfrm_address_t *saddr, const xfrm_address_t *daddr) @@ -220,7 +218,7 @@ static inline int xfrm6_garbage_collect(struct dst_ops *ops) { struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops); - xfrm6_policy_afinfo.garbage_collect(net); + xfrm_garbage_collect_deferred(net); return dst_entries_get_fast(ops) > ops->gc_thresh * 2; } @@ -291,8 +289,7 @@ static struct dst_ops xfrm6_dst_ops_template = { .gc_thresh = INT_MAX, }; -static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { - .family = AF_INET6, +static const struct xfrm_policy_afinfo xfrm6_policy_afinfo = { .dst_ops = &xfrm6_dst_ops_template, .dst_lookup = xfrm6_dst_lookup, .get_saddr = xfrm6_get_saddr, @@ -305,7 +302,7 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { static int __init xfrm6_policy_init(void) { - return xfrm_policy_register_afinfo(&xfrm6_policy_afinfo); + return xfrm_policy_register_afinfo(&xfrm6_policy_afinfo, AF_INET6); } static void xfrm6_policy_fini(void) diff --git a/net/ipv6/xfrm6_protocol.c b/net/ipv6/xfrm6_protocol.c index 54d13f8dbbae..b2dc8ce49378 100644 --- a/net/ipv6/xfrm6_protocol.c +++ b/net/ipv6/xfrm6_protocol.c @@ -162,9 +162,8 @@ static const struct inet6_protocol ipcomp6_protocol = { .flags = INET6_PROTO_NOPOLICY, }; -static struct xfrm_input_afinfo xfrm6_input_afinfo = { +static const struct xfrm_input_afinfo xfrm6_input_afinfo = { .family = AF_INET6, - .owner = THIS_MODULE, .callback = xfrm6_rcv_cb, }; diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig index c06d3997c6e7..286ed25c1a69 100644 --- a/net/xfrm/Kconfig +++ b/net/xfrm/Kconfig @@ -6,6 +6,10 @@ config XFRM depends on NET select GRO_CELLS +config XFRM_OFFLOAD + bool + depends on XFRM + config XFRM_ALGO tristate select XFRM diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 3213fe8027be..46bdb4fbed0b 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -19,19 +19,18 @@ static struct kmem_cache *secpath_cachep __read_mostly; static DEFINE_SPINLOCK(xfrm_input_afinfo_lock); -static struct xfrm_input_afinfo __rcu *xfrm_input_afinfo[NPROTO]; +static struct xfrm_input_afinfo const __rcu *xfrm_input_afinfo[AF_INET6 + 1]; static struct gro_cells gro_cells; static struct net_device xfrm_napi_dev; -int xfrm_input_register_afinfo(struct xfrm_input_afinfo *afinfo) +int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo) { int err = 0; - if (unlikely(afinfo == NULL)) - return -EINVAL; - if (unlikely(afinfo->family >= NPROTO)) + if (WARN_ON(afinfo->family >= ARRAY_SIZE(xfrm_input_afinfo))) return -EAFNOSUPPORT; + spin_lock_bh(&xfrm_input_afinfo_lock); if (unlikely(xfrm_input_afinfo[afinfo->family] != NULL)) err = -EEXIST; @@ -42,14 +41,10 @@ int xfrm_input_register_afinfo(struct xfrm_input_afinfo *afinfo) } EXPORT_SYMBOL(xfrm_input_register_afinfo); -int xfrm_input_unregister_afinfo(struct xfrm_input_afinfo *afinfo) +int xfrm_input_unregister_afinfo(const struct xfrm_input_afinfo *afinfo) { int err = 0; - if (unlikely(afinfo == NULL)) - return -EINVAL; - if (unlikely(afinfo->family >= NPROTO)) - return -EAFNOSUPPORT; spin_lock_bh(&xfrm_input_afinfo_lock); if (likely(xfrm_input_afinfo[afinfo->family] != NULL)) { if (unlikely(xfrm_input_afinfo[afinfo->family] != afinfo)) @@ -63,12 +58,13 @@ int xfrm_input_unregister_afinfo(struct xfrm_input_afinfo *afinfo) } EXPORT_SYMBOL(xfrm_input_unregister_afinfo); -static struct xfrm_input_afinfo *xfrm_input_get_afinfo(unsigned int family) +static const struct xfrm_input_afinfo *xfrm_input_get_afinfo(unsigned int family) { - struct xfrm_input_afinfo *afinfo; + const struct xfrm_input_afinfo *afinfo; - if (unlikely(family >= NPROTO)) + if (WARN_ON_ONCE(family >= ARRAY_SIZE(xfrm_input_afinfo))) return NULL; + rcu_read_lock(); afinfo = rcu_dereference(xfrm_input_afinfo[family]); if (unlikely(!afinfo)) @@ -76,22 +72,17 @@ static struct xfrm_input_afinfo *xfrm_input_get_afinfo(unsigned int family) return afinfo; } -static void xfrm_input_put_afinfo(struct xfrm_input_afinfo *afinfo) -{ - rcu_read_unlock(); -} - static int xfrm_rcv_cb(struct sk_buff *skb, unsigned int family, u8 protocol, int err) { int ret; - struct xfrm_input_afinfo *afinfo = xfrm_input_get_afinfo(family); + const struct xfrm_input_afinfo *afinfo = xfrm_input_get_afinfo(family); if (!afinfo) return -EAFNOSUPPORT; ret = afinfo->callback(skb, protocol, err); - xfrm_input_put_afinfo(afinfo); + rcu_read_unlock(); return ret; } @@ -114,6 +105,8 @@ struct sec_path *secpath_dup(struct sec_path *src) return NULL; sp->len = 0; + sp->olen = 0; + if (src) { int i; @@ -126,6 +119,24 @@ struct sec_path *secpath_dup(struct sec_path *src) } EXPORT_SYMBOL(secpath_dup); +int secpath_set(struct sk_buff *skb) +{ + struct sec_path *sp; + + /* Allocate new secpath or COW existing one. */ + if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) { + sp = secpath_dup(skb->sp); + if (!sp) + return -ENOMEM; + + if (skb->sp) + secpath_put(skb->sp); + skb->sp = sp; + } + return 0; +} +EXPORT_SYMBOL(secpath_set); + /* Fetch spi and seq from ipsec header */ int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq) @@ -161,6 +172,7 @@ int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq) *seq = *(__be32 *)(skb_transport_header(skb) + offset_seq); return 0; } +EXPORT_SYMBOL(xfrm_parse_spi); int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb) { @@ -195,14 +207,23 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) unsigned int family; int decaps = 0; int async = 0; + struct xfrm_offload *xo; + bool xfrm_gro = false; - /* A negative encap_type indicates async resumption. */ if (encap_type < 0) { - async = 1; x = xfrm_input_state(skb); - seq = XFRM_SKB_CB(skb)->seq.input.low; family = x->outer_mode->afinfo->family; - goto resume; + + /* An encap_type of -1 indicates async resumption. */ + if (encap_type == -1) { + async = 1; + seq = XFRM_SKB_CB(skb)->seq.input.low; + goto resume; + } + /* encap_type < -1 indicates a GRO call. */ + encap_type = 0; + seq = XFRM_SPI_SKB_CB(skb)->seq; + goto lock; } daddr = (xfrm_address_t *)(skb_network_header(skb) + @@ -221,18 +242,10 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) break; } - /* Allocate new secpath or COW existing one. */ - if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) { - struct sec_path *sp; - - sp = secpath_dup(skb->sp); - if (!sp) { - XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR); - goto drop; - } - if (skb->sp) - secpath_put(skb->sp); - skb->sp = sp; + err = secpath_set(skb); + if (err) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR); + goto drop; } seq = 0; @@ -256,6 +269,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) skb->sp->xvec[skb->sp->len++] = x; +lock: spin_lock(&x->lock); if (unlikely(x->km.state != XFRM_STATE_VALID)) { @@ -377,7 +391,18 @@ resume: gro_cells_receive(&gro_cells, skb); return 0; } else { - return x->inner_mode->afinfo->transport_finish(skb, async); + xo = xfrm_offload(skb); + if (xo) + xfrm_gro = xo->flags & XFRM_GRO; + + err = x->inner_mode->afinfo->transport_finish(skb, async); + if (xfrm_gro) { + skb_dst_drop(skb); + gro_cells_receive(&gro_cells, skb); + return err; + } + + return err; } drop_unlock: diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 0a0f63d3cc96..5f3e87866438 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -45,7 +45,7 @@ struct xfrm_flo { }; static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock); -static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO] +static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1] __read_mostly; static struct kmem_cache *xfrm_dst_cache __read_mostly; @@ -103,11 +103,11 @@ bool xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl return false; } -static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family) +static const struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family) { - struct xfrm_policy_afinfo *afinfo; + const struct xfrm_policy_afinfo *afinfo; - if (unlikely(family >= NPROTO)) + if (unlikely(family >= ARRAY_SIZE(xfrm_policy_afinfo))) return NULL; rcu_read_lock(); afinfo = rcu_dereference(xfrm_policy_afinfo[family]); @@ -116,18 +116,13 @@ static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family) return afinfo; } -static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo) -{ - rcu_read_unlock(); -} - static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif, const xfrm_address_t *saddr, const xfrm_address_t *daddr, int family) { - struct xfrm_policy_afinfo *afinfo; + const struct xfrm_policy_afinfo *afinfo; struct dst_entry *dst; afinfo = xfrm_policy_get_afinfo(family); @@ -136,7 +131,7 @@ static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, dst = afinfo->dst_lookup(net, tos, oif, saddr, daddr); - xfrm_policy_put_afinfo(afinfo); + rcu_read_unlock(); return dst; } @@ -1431,12 +1426,12 @@ xfrm_get_saddr(struct net *net, int oif, xfrm_address_t *local, xfrm_address_t *remote, unsigned short family) { int err; - struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); + const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); if (unlikely(afinfo == NULL)) return -EINVAL; err = afinfo->get_saddr(net, oif, local, remote); - xfrm_policy_put_afinfo(afinfo); + rcu_read_unlock(); return err; } @@ -1538,21 +1533,15 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl, } -/* Check that the bundle accepts the flow and its components are - * still valid. - */ - -static inline int xfrm_get_tos(const struct flowi *fl, int family) +static int xfrm_get_tos(const struct flowi *fl, int family) { - struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); - int tos; + const struct xfrm_policy_afinfo *afinfo; + int tos = 0; - if (!afinfo) - return -EINVAL; + afinfo = xfrm_policy_get_afinfo(family); + tos = afinfo ? afinfo->get_tos(fl) : 0; - tos = afinfo->get_tos(fl); - - xfrm_policy_put_afinfo(afinfo); + rcu_read_unlock(); return tos; } @@ -1609,7 +1598,7 @@ static const struct flow_cache_ops xfrm_bundle_fc_ops = { static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) { - struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); + const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); struct dst_ops *dst_ops; struct xfrm_dst *xdst; @@ -1638,7 +1627,7 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) } else xdst = ERR_PTR(-ENOBUFS); - xfrm_policy_put_afinfo(afinfo); + rcu_read_unlock(); return xdst; } @@ -1646,7 +1635,7 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst, int nfheader_len) { - struct xfrm_policy_afinfo *afinfo = + const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(dst->ops->family); int err; @@ -1655,7 +1644,7 @@ static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst, err = afinfo->init_path(path, dst, nfheader_len); - xfrm_policy_put_afinfo(afinfo); + rcu_read_unlock(); return err; } @@ -1663,7 +1652,7 @@ static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst, static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, const struct flowi *fl) { - struct xfrm_policy_afinfo *afinfo = + const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(xdst->u.dst.ops->family); int err; @@ -1672,7 +1661,7 @@ static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, err = afinfo->fill_dst(xdst, dev, fl); - xfrm_policy_put_afinfo(afinfo); + rcu_read_unlock(); return err; } @@ -1705,9 +1694,6 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, xfrm_flowi_addr_get(fl, &saddr, &daddr, family); tos = xfrm_get_tos(fl, family); - err = tos; - if (tos < 0) - goto put_states; dst_hold(dst); @@ -2215,7 +2201,7 @@ error: static struct dst_entry *make_blackhole(struct net *net, u16 family, struct dst_entry *dst_orig) { - struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); + const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); struct dst_entry *ret; if (!afinfo) { @@ -2224,7 +2210,7 @@ static struct dst_entry *make_blackhole(struct net *net, u16 family, } else { ret = afinfo->blackhole_route(net, dst_orig); } - xfrm_policy_put_afinfo(afinfo); + rcu_read_unlock(); return ret; } @@ -2466,7 +2452,7 @@ xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int star int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned int family, int reverse) { - struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); + const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); int err; if (unlikely(afinfo == NULL)) @@ -2474,7 +2460,7 @@ int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, afinfo->decode_session(skb, fl, reverse); err = security_xfrm_decode_session(skb, &fl->flowi_secid); - xfrm_policy_put_afinfo(afinfo); + rcu_read_unlock(); return err; } EXPORT_SYMBOL(__xfrm_decode_session); @@ -2742,10 +2728,11 @@ void xfrm_garbage_collect(struct net *net) } EXPORT_SYMBOL(xfrm_garbage_collect); -static void xfrm_garbage_collect_deferred(struct net *net) +void xfrm_garbage_collect_deferred(struct net *net) { flow_cache_flush_deferred(net); } +EXPORT_SYMBOL(xfrm_garbage_collect_deferred); static void xfrm_init_pmtu(struct dst_entry *dst) { @@ -2873,15 +2860,15 @@ static void xfrm_confirm_neigh(const struct dst_entry *dst, const void *daddr) path->ops->confirm_neigh(path, daddr); } -int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) +int xfrm_policy_register_afinfo(const struct xfrm_policy_afinfo *afinfo, int family) { int err = 0; - if (unlikely(afinfo == NULL)) - return -EINVAL; - if (unlikely(afinfo->family >= NPROTO)) + + if (WARN_ON(family >= ARRAY_SIZE(xfrm_policy_afinfo))) return -EAFNOSUPPORT; + spin_lock(&xfrm_policy_afinfo_lock); - if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL)) + if (unlikely(xfrm_policy_afinfo[family] != NULL)) err = -EEXIST; else { struct dst_ops *dst_ops = afinfo->dst_ops; @@ -2901,9 +2888,7 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) dst_ops->neigh_lookup = xfrm_neigh_lookup; if (likely(!dst_ops->confirm_neigh)) dst_ops->confirm_neigh = xfrm_confirm_neigh; - if (likely(afinfo->garbage_collect == NULL)) - afinfo->garbage_collect = xfrm_garbage_collect_deferred; - rcu_assign_pointer(xfrm_policy_afinfo[afinfo->family], afinfo); + rcu_assign_pointer(xfrm_policy_afinfo[family], afinfo); } spin_unlock(&xfrm_policy_afinfo_lock); @@ -2911,34 +2896,24 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) } EXPORT_SYMBOL(xfrm_policy_register_afinfo); -int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo) +void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo) { - int err = 0; - if (unlikely(afinfo == NULL)) - return -EINVAL; - if (unlikely(afinfo->family >= NPROTO)) - return -EAFNOSUPPORT; - spin_lock(&xfrm_policy_afinfo_lock); - if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) { - if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo)) - err = -EINVAL; - else - RCU_INIT_POINTER(xfrm_policy_afinfo[afinfo->family], - NULL); - } - spin_unlock(&xfrm_policy_afinfo_lock); - if (!err) { - struct dst_ops *dst_ops = afinfo->dst_ops; + struct dst_ops *dst_ops = afinfo->dst_ops; + int i; - synchronize_rcu(); - - dst_ops->kmem_cachep = NULL; - dst_ops->check = NULL; - dst_ops->negative_advice = NULL; - dst_ops->link_failure = NULL; - afinfo->garbage_collect = NULL; + for (i = 0; i < ARRAY_SIZE(xfrm_policy_afinfo); i++) { + if (xfrm_policy_afinfo[i] != afinfo) + continue; + RCU_INIT_POINTER(xfrm_policy_afinfo[i], NULL); + break; } - return err; + + synchronize_rcu(); + + dst_ops->kmem_cachep = NULL; + dst_ops->check = NULL; + dst_ops->negative_advice = NULL; + dst_ops->link_failure = NULL; } EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);