diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index d5495d87f399..12f2f725a945 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -89,6 +89,7 @@ #include #include #include +#include #ifdef CONFIG_INET #include #endif @@ -1168,6 +1169,47 @@ static void packet_increment_head(struct packet_ring_buffer *buff) buff->head = buff->head != buff->frame_max ? buff->head+1 : 0; } +static void packet_inc_pending(struct packet_ring_buffer *rb) +{ + this_cpu_inc(*rb->pending_refcnt); +} + +static void packet_dec_pending(struct packet_ring_buffer *rb) +{ + this_cpu_dec(*rb->pending_refcnt); +} + +static unsigned int packet_read_pending(const struct packet_ring_buffer *rb) +{ + unsigned int refcnt = 0; + int cpu; + + /* We don't use pending refcount in rx_ring. */ + if (rb->pending_refcnt == NULL) + return 0; + + for_each_possible_cpu(cpu) + refcnt += *per_cpu_ptr(rb->pending_refcnt, cpu); + + return refcnt; +} + +static int packet_alloc_pending(struct packet_sock *po) +{ + po->rx_ring.pending_refcnt = NULL; + + po->tx_ring.pending_refcnt = alloc_percpu(unsigned int); + if (unlikely(po->tx_ring.pending_refcnt == NULL)) + return -ENOBUFS; + + return 0; +} + +static void packet_free_pending(struct packet_sock *po) +{ + free_percpu(po->tx_ring.pending_refcnt); +} + static bool packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb) { struct sock *sk = &po->sk; @@ -2014,8 +2056,7 @@ static void tpacket_destruct_skb(struct sk_buff *skb) __u32 ts; ph = skb_shinfo(skb)->destructor_arg; - BUG_ON(atomic_read(&po->tx_ring.pending) == 0); - atomic_dec(&po->tx_ring.pending); + packet_dec_pending(&po->tx_ring); ts = __packet_set_timestamp(po, ph, skb); __packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts); @@ -2236,7 +2277,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) skb_set_queue_mapping(skb, packet_pick_tx_queue(dev)); skb->destructor = tpacket_destruct_skb; __packet_set_status(po, ph, TP_STATUS_SENDING); - atomic_inc(&po->tx_ring.pending); + packet_inc_pending(&po->tx_ring); status = TP_STATUS_SEND_REQUEST; err = po->xmit(skb); @@ -2256,8 +2297,14 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) } packet_increment_head(&po->tx_ring); len_sum += tp_len; - } while (likely((ph != NULL) || (need_wait && - atomic_read(&po->tx_ring.pending)))); + } while (likely((ph != NULL) || + /* Note: packet_read_pending() might be slow if we have + * to call it as it's per_cpu variable, but in fast-path + * we already short-circuit the loop with the first + * condition, and luckily don't have to go that path + * anyway. + */ + (need_wait && packet_read_pending(&po->tx_ring)))); err = len_sum; goto out_put; @@ -2556,6 +2603,7 @@ static int packet_release(struct socket *sock) /* Purge queues */ skb_queue_purge(&sk->sk_receive_queue); + packet_free_pending(po); sk_refcnt_debug_release(sk); sock_put(sk); @@ -2717,6 +2765,10 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, po->num = proto; po->xmit = dev_queue_xmit; + err = packet_alloc_pending(po); + if (err) + goto out2; + packet_cached_dev_reset(po); sk->sk_destruct = packet_sock_destruct; @@ -2749,6 +2801,8 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, preempt_enable(); return 0; +out2: + sk_free(sk); out: return err; } @@ -3676,7 +3730,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, if (!closing) { if (atomic_read(&po->mapped)) goto out; - if (atomic_read(&rb->pending)) + if (packet_read_pending(rb)) goto out; } diff --git a/net/packet/diag.c b/net/packet/diag.c index a9584a2f6d69..533ce4ff108a 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include diff --git a/net/packet/internal.h b/net/packet/internal.h index 0a87d7b36c9e..eb9580a6b25f 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -64,7 +64,7 @@ struct packet_ring_buffer { unsigned int pg_vec_pages; unsigned int pg_vec_len; - atomic_t pending; + unsigned int __percpu *pending_refcnt; struct tpacket_kbdq_core prb_bdqc; };