Merge branch 'csums-next'

Tom Herbert says:

====================
net: Checksum offload changes - Part VI

I am working on overhauling RX checksum offload. Goals of this effort
are:

- Specify what exactly it means when driver returns CHECKSUM_UNNECESSARY
- Preserve CHECKSUM_COMPLETE through encapsulation layers
- Don't do skb_checksum more than once per packet
- Unify GRO and non-GRO csum verification as much as possible
- Unify the checksum functions (checksum_init)
- Simplify code

What is in this sixth patch set:

- Clarify the specific requirements of devices returning
  CHECKSUM_UNNECESSARY (comments in skbuff.h).
- Add csum_level field to skbuff. This is used to express how
  many checksums are covered by CHECKSUM_UNNECESSARY (stores n - 1).
- Change __skb_checksum_validate_needed to "consume" each checksum
  as indicated by csum_level as layers of the the packet are parsed.
- Remove skb_pop_rcv_encapsulation, no longer needed in the new
  csum_level model.
- Allow GRO path to "consume" checksums provided in CHECKSUM_UNNECESSARY
  and to report new verfied checksums for use in normal path fallback.
- Add proper support to SCTP to accept CHECKSUM_UNNECESSARY to validate
  header CRC.
- Modify drivers to set skb->csum_level instead of setting
  skb->encapsulation to indicate validation of an encapsulated
  checksum on receive.

v2:

Allocate a new 16 bits for flags in skbuff.

Please review carefully and test if possible, mucking with basic
checksum functions is always a little precarious :-)
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2014-08-29 20:41:17 -07:00
commit 030824e039
13 changed files with 101 additions and 64 deletions

View file

@ -1683,7 +1683,7 @@ static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
if (netdev->features & NETIF_F_RXHASH)
skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
skb->encapsulation = rxcp->tunneled;
skb->csum_level = rxcp->tunneled;
skb_mark_napi_id(skb, napi);
if (rxcp->vlanf)
@ -1741,7 +1741,7 @@ static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
if (adapter->netdev->features & NETIF_F_RXHASH)
skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
skb->encapsulation = rxcp->tunneled;
skb->csum_level = rxcp->tunneled;
skb_mark_napi_id(skb, napi);
if (rxcp->vlanf)

View file

@ -1241,7 +1241,6 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
ipv6_tunnel = (rx_ptype > I40E_RX_PTYPE_GRENAT6_MAC_PAY3) &&
(rx_ptype < I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4);
skb->encapsulation = ipv4_tunnel || ipv6_tunnel;
skb->ip_summed = CHECKSUM_NONE;
/* Rx csum enabled and ip headers found? */
@ -1315,6 +1314,7 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
}
skb->ip_summed = CHECKSUM_UNNECESSARY;
skb->csum_level = ipv4_tunnel || ipv6_tunnel;
return;

View file

@ -746,7 +746,6 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
ipv6_tunnel = (rx_ptype > I40E_RX_PTYPE_GRENAT6_MAC_PAY3) &&
(rx_ptype < I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4);
skb->encapsulation = ipv4_tunnel || ipv6_tunnel;
skb->ip_summed = CHECKSUM_NONE;
/* Rx csum enabled and ip headers found? */
@ -820,6 +819,7 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
}
skb->ip_summed = CHECKSUM_UNNECESSARY;
skb->csum_level = ipv4_tunnel || ipv6_tunnel;
return;

View file

@ -769,7 +769,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
gro_skb->ip_summed = CHECKSUM_UNNECESSARY;
if (l2_tunnel)
gro_skb->encapsulation = 1;
gro_skb->csum_level = 1;
if ((cqe->vlan_my_qpn &
cpu_to_be32(MLX4_CQE_VLAN_PRESENT_MASK)) &&
(dev->features & NETIF_F_HW_VLAN_CTAG_RX)) {
@ -823,8 +823,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
skb->protocol = eth_type_trans(skb, dev);
skb_record_rx_queue(skb, cq->ring);
if (l2_tunnel)
skb->encapsulation = 1;
if (l2_tunnel && ip_summed == CHECKSUM_UNNECESSARY)
skb->csum_level = 1;
if (dev->features & NETIF_F_RXHASH)
skb_set_hash(skb,

View file

@ -1753,7 +1753,7 @@ qlcnic_83xx_process_rcv(struct qlcnic_adapter *adapter,
if (qlcnic_encap_length(sts_data[1]) &&
skb->ip_summed == CHECKSUM_UNNECESSARY) {
skb->encapsulation = 1;
skb->csum_level = 1;
adapter->stats.encap_rx_csummed++;
}

View file

@ -1158,8 +1158,6 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
if (!vs)
goto drop;
skb_pop_rcv_encapsulation(skb);
vs->rcv(vs, skb, vxh->vx_vni);
return 0;

View file

@ -1883,8 +1883,8 @@ struct napi_gro_cb {
/* GRO checksum is valid */
u8 csum_valid:1;
/* Number encapsulation layers crossed */
u8 encapsulation;
/* Number of checksums via CHECKSUM_UNNECESSARY */
u8 csum_cnt:3;
/* used to support CHECKSUM_COMPLETE for tunneling protocols */
__wsum csum;
@ -2179,8 +2179,7 @@ static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb,
__sum16 check)
{
return (skb->ip_summed != CHECKSUM_PARTIAL &&
(skb->ip_summed != CHECKSUM_UNNECESSARY ||
(NAPI_GRO_CB(skb)->encapsulation > skb->encapsulation)) &&
NAPI_GRO_CB(skb)->csum_cnt == 0 &&
(!zero_okay || check));
}
@ -2196,18 +2195,17 @@ static inline __sum16 __skb_gro_checksum_validate_complete(struct sk_buff *skb,
return __skb_gro_checksum_complete(skb);
}
/* Update skb for CHECKSUM_UNNECESSARY when we verified a top level
* checksum or an encapsulated one during GRO. This saves work
* if we fallback to normal path with the packet.
*/
static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb)
{
if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
if (NAPI_GRO_CB(skb)->encapsulation)
skb->encapsulation = 1;
} else if (skb->ip_summed != CHECKSUM_PARTIAL) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
skb->encapsulation = 0;
if (NAPI_GRO_CB(skb)->csum_cnt > 0) {
/* Consume a checksum from CHECKSUM_UNNECESSARY */
NAPI_GRO_CB(skb)->csum_cnt--;
} else {
/* Update skb for CHECKSUM_UNNECESSARY and csum_level when we
* verified a new top level checksum or an encapsulated one
* during GRO. This saves work if we fallback to normal path.
*/
__skb_incr_checksum_unnecessary(skb);
}
}

View file

@ -47,11 +47,29 @@
*
* The hardware you're dealing with doesn't calculate the full checksum
* (as in CHECKSUM_COMPLETE), but it does parse headers and verify checksums
* for specific protocols e.g. TCP/UDP/SCTP, then, for such packets it will
* set CHECKSUM_UNNECESSARY if their checksums are okay. skb->csum is still
* undefined in this case though. It is a bad option, but, unfortunately,
* nowadays most vendors do this. Apparently with the secret goal to sell
* you new devices, when you will add new protocol to your host, f.e. IPv6 8)
* for specific protocols. For such packets it will set CHECKSUM_UNNECESSARY
* if their checksums are okay. skb->csum is still undefined in this case
* though. It is a bad option, but, unfortunately, nowadays most vendors do
* this. Apparently with the secret goal to sell you new devices, when you
* will add new protocol to your host, f.e. IPv6 8)
*
* CHECKSUM_UNNECESSARY is applicable to following protocols:
* TCP: IPv6 and IPv4.
* UDP: IPv4 and IPv6. A device may apply CHECKSUM_UNNECESSARY to a
* zero UDP checksum for either IPv4 or IPv6, the networking stack
* may perform further validation in this case.
* GRE: only if the checksum is present in the header.
* SCTP: indicates the CRC in SCTP header has been validated.
*
* skb->csum_level indicates the number of consecutive checksums found in
* the packet minus one that have been verified as CHECKSUM_UNNECESSARY.
* For instance if a device receives an IPv6->UDP->GRE->IPv4->TCP packet
* and a device is able to verify the checksums for UDP (possibly zero),
* GRE (checksum flag is set), and TCP-- skb->csum_level would be set to
* two. If the device were only able to verify the UDP checksum and not
* GRE, either because it doesn't support GRE checksum of because GRE
* checksum is bad, skb->csum_level would be set to zero (TCP checksum is
* not considered in this case).
*
* CHECKSUM_COMPLETE:
*
@ -112,6 +130,9 @@
#define CHECKSUM_COMPLETE 2
#define CHECKSUM_PARTIAL 3
/* Maximum value in skb->csum_level */
#define SKB_MAX_CSUM_LEVEL 3
#define SKB_DATA_ALIGN(X) ALIGN(X, SMP_CACHE_BYTES)
#define SKB_WITH_OVERHEAD(X) \
((X) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
@ -571,11 +592,7 @@ struct sk_buff {
__u8 wifi_acked:1;
__u8 no_fcs:1;
__u8 head_frag:1;
/* Encapsulation protocol and NIC drivers should use
* this flag to indicate to each other if the skb contains
* encapsulated packet or not and maybe use the inner packet
* headers if needed
*/
/* Indicates the inner headers are valid in the skbuff. */
__u8 encapsulation:1;
__u8 encap_hdr_csum:1;
__u8 csum_valid:1;
@ -598,6 +615,11 @@ struct sk_buff {
__u32 reserved_tailroom;
};
kmemcheck_bitfield_begin(flags3);
__u8 csum_level:2;
/* 14 bit hole */
kmemcheck_bitfield_end(flags3);
__be16 inner_protocol;
__u16 inner_transport_header;
__u16 inner_network_header;
@ -1862,18 +1884,6 @@ static inline int pskb_network_may_pull(struct sk_buff *skb, unsigned int len)
return pskb_may_pull(skb, skb_network_offset(skb) + len);
}
static inline void skb_pop_rcv_encapsulation(struct sk_buff *skb)
{
/* Only continue with checksum unnecessary if device indicated
* it is valid across encapsulation (skb->encapsulation was set).
*/
if (skb->ip_summed == CHECKSUM_UNNECESSARY && !skb->encapsulation)
skb->ip_summed = CHECKSUM_NONE;
skb->encapsulation = 0;
skb->csum_valid = 0;
}
/*
* CPUs often take a performance hit when accessing unaligned memory
* locations. The actual performance hit varies, it can be small if the
@ -2794,6 +2804,27 @@ static inline __sum16 skb_checksum_complete(struct sk_buff *skb)
0 : __skb_checksum_complete(skb);
}
static inline void __skb_decr_checksum_unnecessary(struct sk_buff *skb)
{
if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
if (skb->csum_level == 0)
skb->ip_summed = CHECKSUM_NONE;
else
skb->csum_level--;
}
}
static inline void __skb_incr_checksum_unnecessary(struct sk_buff *skb)
{
if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
skb->csum_level++;
} else if (skb->ip_summed == CHECKSUM_NONE) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
skb->csum_level = 0;
}
}
/* Check if we need to perform checksum complete validation.
*
* Returns true if checksum complete is needed, false otherwise
@ -2805,6 +2836,7 @@ static inline bool __skb_checksum_validate_needed(struct sk_buff *skb,
{
if (skb_csum_unnecessary(skb) || (zero_okay && !check)) {
skb->csum_valid = 1;
__skb_decr_checksum_unnecessary(skb);
return false;
}

View file

@ -3962,13 +3962,6 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
gro_list_prepare(napi, skb);
if (skb->ip_summed == CHECKSUM_COMPLETE) {
NAPI_GRO_CB(skb)->csum = skb->csum;
NAPI_GRO_CB(skb)->csum_valid = 1;
} else {
NAPI_GRO_CB(skb)->csum_valid = 0;
}
rcu_read_lock();
list_for_each_entry_rcu(ptype, head, list) {
if (ptype->type != type || !ptype->callbacks.gro_receive)
@ -3980,7 +3973,22 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
NAPI_GRO_CB(skb)->flush = 0;
NAPI_GRO_CB(skb)->free = 0;
NAPI_GRO_CB(skb)->udp_mark = 0;
NAPI_GRO_CB(skb)->encapsulation = 0;
/* Setup for GRO checksum validation */
switch (skb->ip_summed) {
case CHECKSUM_COMPLETE:
NAPI_GRO_CB(skb)->csum = skb->csum;
NAPI_GRO_CB(skb)->csum_valid = 1;
NAPI_GRO_CB(skb)->csum_cnt = 0;
break;
case CHECKSUM_UNNECESSARY:
NAPI_GRO_CB(skb)->csum_cnt = skb->csum_level + 1;
NAPI_GRO_CB(skb)->csum_valid = 0;
break;
default:
NAPI_GRO_CB(skb)->csum_cnt = 0;
NAPI_GRO_CB(skb)->csum_valid = 0;
}
pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);
break;

View file

@ -125,7 +125,6 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
*csum_err = true;
return -EINVAL;
}
skb_pop_rcv_encapsulation(skb);
options++;
}

View file

@ -172,12 +172,9 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head,
}
/* Don't bother verifying checksum if we're going to flush anyway. */
if (greh->flags & GRE_CSUM) {
if (!NAPI_GRO_CB(skb)->flush &&
skb_gro_checksum_simple_validate(skb))
if ((greh->flags & GRE_CSUM) && !NAPI_GRO_CB(skb)->flush &&
skb_gro_checksum_simple_validate(skb))
goto out_unlock;
NAPI_GRO_CB(skb)->encapsulation++;
}
flush = 0;

View file

@ -238,12 +238,13 @@ struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb,
int flush = 1;
if (NAPI_GRO_CB(skb)->udp_mark ||
(!skb->encapsulation && !NAPI_GRO_CB(skb)->csum_valid))
(skb->ip_summed != CHECKSUM_PARTIAL &&
NAPI_GRO_CB(skb)->csum_cnt == 0 &&
!NAPI_GRO_CB(skb)->csum_valid))
goto out;
/* mark that this skb passed once through the udp gro layer */
NAPI_GRO_CB(skb)->udp_mark = 1;
NAPI_GRO_CB(skb)->encapsulation++;
rcu_read_lock();
uo_priv = rcu_dereference(udp_offload_base);

View file

@ -133,9 +133,13 @@ int sctp_rcv(struct sk_buff *skb)
__skb_pull(skb, skb_transport_offset(skb));
if (skb->len < sizeof(struct sctphdr))
goto discard_it;
if (!sctp_checksum_disable && !skb_csum_unnecessary(skb) &&
sctp_rcv_checksum(net, skb) < 0)
skb->csum_valid = 0; /* Previous value not applicable */
if (skb_csum_unnecessary(skb))
__skb_decr_checksum_unnecessary(skb);
else if (!sctp_checksum_disable && sctp_rcv_checksum(net, skb) < 0)
goto discard_it;
skb->csum_valid = 1;
skb_pull(skb, sizeof(struct sctphdr));