diff --git a/include/linux/udp.h b/include/linux/udp.h index eaea63bc79bb..ca840345571b 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -55,6 +55,7 @@ struct udp_sock { * when the socket is uncorked. */ __u16 len; /* total length of pending frames */ + __u16 gso_size; /* * Fields specific to UDP-Lite. */ @@ -87,6 +88,8 @@ struct udp_sock { int forward_deficit; }; +#define UDP_MAX_SEGMENTS (1 << 6UL) + static inline struct udp_sock *udp_sk(const struct sock *sk) { return (struct udp_sock *)sk; diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 0a671c32d6b9..83d5b3c2ac42 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -147,6 +147,7 @@ struct inet_cork { __u8 ttl; __s16 tos; char priority; + __u16 gso_size; }; struct inet_cork_full { diff --git a/include/net/ip.h b/include/net/ip.h index 7ec543a64bbc..bada1f1f871e 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -76,6 +76,7 @@ struct ipcm_cookie { __u8 ttl; __s16 tos; char priority; + __u16 gso_size; }; #define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb)) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 0dd722cab037..0a872a7c33c8 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -298,6 +298,7 @@ struct ipcm6_cookie { __s16 tclass; __s8 dontfrag; struct ipv6_txoptions *opt; + __u16 gso_size; }; static inline struct ipv6_txoptions *txopt_get(const struct ipv6_pinfo *np) diff --git a/include/uapi/linux/udp.h b/include/uapi/linux/udp.h index efb7b5991c2f..09d00f8c442b 100644 --- a/include/uapi/linux/udp.h +++ b/include/uapi/linux/udp.h @@ -32,6 +32,7 @@ struct udphdr { #define UDP_ENCAP 100 /* Set the socket to accept encapsulated packets */ #define UDP_NO_CHECK6_TX 101 /* Disable sending checksum for UDP6X */ #define UDP_NO_CHECK6_RX 102 /* Disable accpeting checksum for UDP6 */ +#define UDP_SEGMENT 103 /* Set GSO segmentation size */ /* UDP encapsulation types */ #define UDP_ENCAP_ESPINUDP_NON_IKE 1 /* draft-ietf-ipsec-nat-t-ike-00/01 */ diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 2883ff1e909c..da4abbee10f7 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -882,7 +882,8 @@ static int __ip_append_data(struct sock *sk, skb = skb_peek_tail(queue); exthdrlen = !skb ? rt->dst.header_len : 0; - mtu = cork->fragsize; + mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize; + if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP && sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) tskey = sk->sk_tskey++; @@ -906,7 +907,7 @@ static int __ip_append_data(struct sock *sk, if (transhdrlen && length + fragheaderlen <= mtu && rt->dst.dev->features & (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM) && - !(flags & MSG_MORE) && + (!(flags & MSG_MORE) || cork->gso_size) && !exthdrlen) csummode = CHECKSUM_PARTIAL; @@ -1135,6 +1136,8 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, *rtp = NULL; cork->fragsize = ip_sk_use_pmtu(sk) ? dst_mtu(&rt->dst) : rt->dst.dev->mtu; + + cork->gso_size = sk->sk_type == SOCK_DGRAM ? ipc->gso_size : 0; cork->dst = &rt->dst; cork->length = 0; cork->ttl = ipc->ttl; @@ -1214,7 +1217,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, return -EOPNOTSUPP; hh_len = LL_RESERVED_SPACE(rt->dst.dev); - mtu = cork->fragsize; + mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize; fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 6b9d8017b319..bda022c5480b 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -757,7 +757,8 @@ void udp_set_csum(bool nocheck, struct sk_buff *skb, } EXPORT_SYMBOL(udp_set_csum); -static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4) +static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4, + struct inet_cork *cork) { struct sock *sk = skb->sk; struct inet_sock *inet = inet_sk(sk); @@ -777,6 +778,21 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4) uh->len = htons(len); uh->check = 0; + if (cork->gso_size) { + const int hlen = skb_network_header_len(skb) + + sizeof(struct udphdr); + + if (hlen + cork->gso_size > cork->fragsize) + return -EINVAL; + if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) + return -EINVAL; + if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite) + return -EIO; + + skb_shinfo(skb)->gso_size = cork->gso_size; + skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4; + } + if (is_udplite) /* UDP-Lite */ csum = udplite_csum(skb); @@ -828,7 +844,7 @@ int udp_push_pending_frames(struct sock *sk) if (!skb) goto out; - err = udp_send_skb(skb, fl4); + err = udp_send_skb(skb, fl4, &inet->cork.base); out: up->len = 0; @@ -922,6 +938,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipc.sockc.tsflags = sk->sk_tsflags; ipc.addr = inet->inet_saddr; ipc.oif = sk->sk_bound_dev_if; + ipc.gso_size = up->gso_size; if (msg->msg_controllen) { err = ip_cmsg_send(sk, msg, &ipc, sk->sk_family == AF_INET6); @@ -1037,7 +1054,7 @@ back_from_confirm: &cork, msg->msg_flags); err = PTR_ERR(skb); if (!IS_ERR_OR_NULL(skb)) - err = udp_send_skb(skb, fl4); + err = udp_send_skb(skb, fl4, &cork); goto out; } @@ -2367,6 +2384,12 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, up->no_check6_rx = valbool; break; + case UDP_SEGMENT: + if (val < 0 || val > USHRT_MAX) + return -EINVAL; + up->gso_size = val; + break; + /* * UDP-Lite's partial checksum coverage (RFC 3828). */ @@ -2457,6 +2480,10 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, val = up->no_check6_rx; break; + case UDP_SEGMENT: + val = up->gso_size; + break; + /* The following two cannot be changed on UDP sockets, the return is * always 0 (which corresponds to the full checksum coverage of UDP). */ case UDPLITE_SEND_CSCOV: diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 7fa1db447405..a1c4a78132d2 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1240,6 +1240,8 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, if (mtu < IPV6_MIN_MTU) return -EINVAL; cork->base.fragsize = mtu; + cork->base.gso_size = sk->sk_type == SOCK_DGRAM ? ipc6->gso_size : 0; + if (dst_allfrag(xfrm_dst_path(&rt->dst))) cork->base.flags |= IPCORK_ALLFRAG; cork->base.length = 0; @@ -1281,7 +1283,7 @@ static int __ip6_append_data(struct sock *sk, dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len; } - mtu = cork->fragsize; + mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize; orig_mtu = mtu; hh_len = LL_RESERVED_SPACE(rt->dst.dev); @@ -1329,7 +1331,7 @@ emsgsize: if (transhdrlen && sk->sk_protocol == IPPROTO_UDP && headersize == sizeof(struct ipv6hdr) && length <= mtu - headersize && - !(flags & MSG_MORE) && + (!(flags & MSG_MORE) || cork->gso_size) && rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) csummode = CHECKSUM_PARTIAL; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 824797f8d1ab..86b7dd58d4b4 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1023,7 +1023,8 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, * Sending */ -static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6) +static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, + struct inet_cork *cork) { struct sock *sk = skb->sk; struct udphdr *uh; @@ -1042,6 +1043,21 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6) uh->len = htons(len); uh->check = 0; + if (cork->gso_size) { + const int hlen = skb_network_header_len(skb) + + sizeof(struct udphdr); + + if (hlen + cork->gso_size > cork->fragsize) + return -EINVAL; + if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) + return -EINVAL; + if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite) + return -EIO; + + skb_shinfo(skb)->gso_size = cork->gso_size; + skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4; + } + if (is_udplite) csum = udplite_csum(skb); else if (udp_sk(sk)->no_check6_tx) { /* UDP csum disabled */ @@ -1093,7 +1109,7 @@ static int udp_v6_push_pending_frames(struct sock *sk) if (!skb) goto out; - err = udp_v6_send_skb(skb, &fl6); + err = udp_v6_send_skb(skb, &fl6, &inet_sk(sk)->cork.base); out: up->len = 0; @@ -1127,6 +1143,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipc6.hlimit = -1; ipc6.tclass = -1; ipc6.dontfrag = -1; + ipc6.gso_size = up->gso_size; sockc.tsflags = sk->sk_tsflags; /* destination address check */ @@ -1333,7 +1350,7 @@ back_from_confirm: msg->msg_flags, &cork, &sockc); err = PTR_ERR(skb); if (!IS_ERR_OR_NULL(skb)) - err = udp_v6_send_skb(skb, &fl6); + err = udp_v6_send_skb(skb, &fl6, &cork.base); goto out; }