remarkable-linux/net/core/datagram.c
Linus Torvalds 8d65b08deb Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Millar:
 "Here are some highlights from the 2065 networking commits that
  happened this development cycle:

   1) XDP support for IXGBE (John Fastabend) and thunderx (Sunil Kowuri)

   2) Add a generic XDP driver, so that anyone can test XDP even if they
      lack a networking device whose driver has explicit XDP support
      (me).

   3) Sparc64 now has an eBPF JIT too (me)

   4) Add a BPF program testing framework via BPF_PROG_TEST_RUN (Alexei
      Starovoitov)

   5) Make netfitler network namespace teardown less expensive (Florian
      Westphal)

   6) Add symmetric hashing support to nft_hash (Laura Garcia Liebana)

   7) Implement NAPI and GRO in netvsc driver (Stephen Hemminger)

   8) Support TC flower offload statistics in mlxsw (Arkadi Sharshevsky)

   9) Multiqueue support in stmmac driver (Joao Pinto)

  10) Remove TCP timewait recycling, it never really could possibly work
      well in the real world and timestamp randomization really zaps any
      hint of usability this feature had (Soheil Hassas Yeganeh)

  11) Support level3 vs level4 ECMP route hashing in ipv4 (Nikolay
      Aleksandrov)

  12) Add socket busy poll support to epoll (Sridhar Samudrala)

  13) Netlink extended ACK support (Johannes Berg, Pablo Neira Ayuso,
      and several others)

  14) IPSEC hw offload infrastructure (Steffen Klassert)"

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (2065 commits)
  tipc: refactor function tipc_sk_recv_stream()
  tipc: refactor function tipc_sk_recvmsg()
  net: thunderx: Optimize page recycling for XDP
  net: thunderx: Support for XDP header adjustment
  net: thunderx: Add support for XDP_TX
  net: thunderx: Add support for XDP_DROP
  net: thunderx: Add basic XDP support
  net: thunderx: Cleanup receive buffer allocation
  net: thunderx: Optimize CQE_TX handling
  net: thunderx: Optimize RBDR descriptor handling
  net: thunderx: Support for page recycling
  ipx: call ipxitf_put() in ioctl error path
  net: sched: add helpers to handle extended actions
  qed*: Fix issues in the ptp filter config implementation.
  qede: Fix concurrency issue in PTP Tx path processing.
  stmmac: Add support for SIMATIC IOT2000 platform
  net: hns: fix ethtool_get_strings overflow in hns driver
  tcp: fix wraparound issue in tcp_lp
  bpf, arm64: fix jit branch offset related to ldimm64
  bpf, arm64: implement jiting of BPF_XADD
  ...
2017-05-02 16:40:27 -07:00

845 lines
20 KiB
C

/*
* SUCS NET3:
*
* Generic datagram handling routines. These are generic for all
* protocols. Possibly a generic IP version on top of these would
* make sense. Not tonight however 8-).
* This is used because UDP, RAW, PACKET, DDP, IPX, AX.25 and
* NetROM layer all have identical poll code and mostly
* identical recvmsg() code. So we share it here. The poll was
* shared before but buried in udp.c so I moved it.
*
* Authors: Alan Cox <alan@lxorguk.ukuu.org.uk>. (datagram_poll() from old
* udp.c code)
*
* Fixes:
* Alan Cox : NULL return from skb_peek_copy()
* understood
* Alan Cox : Rewrote skb_read_datagram to avoid the
* skb_peek_copy stuff.
* Alan Cox : Added support for SOCK_SEQPACKET.
* IPX can no longer use the SO_TYPE hack
* but AX.25 now works right, and SPX is
* feasible.
* Alan Cox : Fixed write poll of non IP protocol
* crash.
* Florian La Roche: Changed for my new skbuff handling.
* Darryl Miles : Fixed non-blocking SOCK_SEQPACKET.
* Linus Torvalds : BSD semantic fixes.
* Alan Cox : Datagram iovec handling
* Darryl Miles : Fixed non-blocking SOCK_STREAM.
* Alan Cox : POSIXisms
* Pete Wyckoff : Unconnected accept() fix.
*
*/
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/uaccess.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/rtnetlink.h>
#include <linux/poll.h>
#include <linux/highmem.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
#include <linux/uio.h>
#include <net/protocol.h>
#include <linux/skbuff.h>
#include <net/checksum.h>
#include <net/sock.h>
#include <net/tcp_states.h>
#include <trace/events/skb.h>
#include <net/busy_poll.h>
/*
* Is a socket 'connection oriented' ?
*/
static inline int connection_based(struct sock *sk)
{
return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM;
}
static int receiver_wake_function(wait_queue_t *wait, unsigned int mode, int sync,
void *key)
{
unsigned long bits = (unsigned long)key;
/*
* Avoid a wakeup if event not interesting for us
*/
if (bits && !(bits & (POLLIN | POLLERR)))
return 0;
return autoremove_wake_function(wait, mode, sync, key);
}
/*
* Wait for the last received packet to be different from skb
*/
int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p,
const struct sk_buff *skb)
{
int error;
DEFINE_WAIT_FUNC(wait, receiver_wake_function);
prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
/* Socket errors? */
error = sock_error(sk);
if (error)
goto out_err;
if (sk->sk_receive_queue.prev != skb)
goto out;
/* Socket shut down? */
if (sk->sk_shutdown & RCV_SHUTDOWN)
goto out_noerr;
/* Sequenced packets can come disconnected.
* If so we report the problem
*/
error = -ENOTCONN;
if (connection_based(sk) &&
!(sk->sk_state == TCP_ESTABLISHED || sk->sk_state == TCP_LISTEN))
goto out_err;
/* handle signals */
if (signal_pending(current))
goto interrupted;
error = 0;
*timeo_p = schedule_timeout(*timeo_p);
out:
finish_wait(sk_sleep(sk), &wait);
return error;
interrupted:
error = sock_intr_errno(*timeo_p);
out_err:
*err = error;
goto out;
out_noerr:
*err = 0;
error = 1;
goto out;
}
EXPORT_SYMBOL(__skb_wait_for_more_packets);
static struct sk_buff *skb_set_peeked(struct sk_buff *skb)
{
struct sk_buff *nskb;
if (skb->peeked)
return skb;
/* We have to unshare an skb before modifying it. */
if (!skb_shared(skb))
goto done;
nskb = skb_clone(skb, GFP_ATOMIC);
if (!nskb)
return ERR_PTR(-ENOMEM);
skb->prev->next = nskb;
skb->next->prev = nskb;
nskb->prev = skb->prev;
nskb->next = skb->next;
consume_skb(skb);
skb = nskb;
done:
skb->peeked = 1;
return skb;
}
/**
* __skb_try_recv_datagram - Receive a datagram skbuff
* @sk: socket
* @flags: MSG_ flags
* @destructor: invoked under the receive lock on successful dequeue
* @peeked: returns non-zero if this packet has been seen before
* @off: an offset in bytes to peek skb from. Returns an offset
* within an skb where data actually starts
* @err: error code returned
* @last: set to last peeked message to inform the wait function
* what to look for when peeking
*
* Get a datagram skbuff, understands the peeking, nonblocking wakeups
* and possible races. This replaces identical code in packet, raw and
* udp, as well as the IPX AX.25 and Appletalk. It also finally fixes
* the long standing peek and read race for datagram sockets. If you
* alter this routine remember it must be re-entrant.
*
* This function will lock the socket if a skb is returned, so
* the caller needs to unlock the socket in that case (usually by
* calling skb_free_datagram). Returns NULL with *err set to
* -EAGAIN if no data was available or to some other value if an
* error was detected.
*
* * It does not lock socket since today. This function is
* * free of race conditions. This measure should/can improve
* * significantly datagram socket latencies at high loads,
* * when data copying to user space takes lots of time.
* * (BTW I've just killed the last cli() in IP/IPv6/core/netlink/packet
* * 8) Great win.)
* * --ANK (980729)
*
* The order of the tests when we find no data waiting are specified
* quite explicitly by POSIX 1003.1g, don't change them without having
* the standard around please.
*/
struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
void (*destructor)(struct sock *sk,
struct sk_buff *skb),
int *peeked, int *off, int *err,
struct sk_buff **last)
{
struct sk_buff_head *queue = &sk->sk_receive_queue;
struct sk_buff *skb;
unsigned long cpu_flags;
/*
* Caller is allowed not to check sk->sk_err before skb_recv_datagram()
*/
int error = sock_error(sk);
if (error)
goto no_packet;
*peeked = 0;
do {
/* Again only user level code calls this function, so nothing
* interrupt level will suddenly eat the receive_queue.
*
* Look at current nfs client by the way...
* However, this function was correct in any case. 8)
*/
int _off = *off;
*last = (struct sk_buff *)queue;
spin_lock_irqsave(&queue->lock, cpu_flags);
skb_queue_walk(queue, skb) {
*last = skb;
if (flags & MSG_PEEK) {
if (_off >= skb->len && (skb->len || _off ||
skb->peeked)) {
_off -= skb->len;
continue;
}
if (!skb->len) {
skb = skb_set_peeked(skb);
if (IS_ERR(skb)) {
error = PTR_ERR(skb);
spin_unlock_irqrestore(&queue->lock,
cpu_flags);
goto no_packet;
}
}
*peeked = 1;
atomic_inc(&skb->users);
} else {
__skb_unlink(skb, queue);
if (destructor)
destructor(sk, skb);
}
spin_unlock_irqrestore(&queue->lock, cpu_flags);
*off = _off;
return skb;
}
spin_unlock_irqrestore(&queue->lock, cpu_flags);
if (!sk_can_busy_loop(sk))
break;
sk_busy_loop(sk, flags & MSG_DONTWAIT);
} while (!skb_queue_empty(&sk->sk_receive_queue));
error = -EAGAIN;
no_packet:
*err = error;
return NULL;
}
EXPORT_SYMBOL(__skb_try_recv_datagram);
struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
void (*destructor)(struct sock *sk,
struct sk_buff *skb),
int *peeked, int *off, int *err)
{
struct sk_buff *skb, *last;
long timeo;
timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
do {
skb = __skb_try_recv_datagram(sk, flags, destructor, peeked,
off, err, &last);
if (skb)
return skb;
if (*err != -EAGAIN)
break;
} while (timeo &&
!__skb_wait_for_more_packets(sk, err, &timeo, last));
return NULL;
}
EXPORT_SYMBOL(__skb_recv_datagram);
struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags,
int noblock, int *err)
{
int peeked, off = 0;
return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
NULL, &peeked, &off, err);
}
EXPORT_SYMBOL(skb_recv_datagram);
void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
{
consume_skb(skb);
sk_mem_reclaim_partial(sk);
}
EXPORT_SYMBOL(skb_free_datagram);
void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len)
{
bool slow;
if (likely(atomic_read(&skb->users) == 1))
smp_rmb();
else if (likely(!atomic_dec_and_test(&skb->users))) {
sk_peek_offset_bwd(sk, len);
return;
}
slow = lock_sock_fast(sk);
sk_peek_offset_bwd(sk, len);
skb_orphan(skb);
sk_mem_reclaim_partial(sk);
unlock_sock_fast(sk, slow);
/* skb is now orphaned, can be freed outside of locked section */
__kfree_skb(skb);
}
EXPORT_SYMBOL(__skb_free_datagram_locked);
int __sk_queue_drop_skb(struct sock *sk, struct sk_buff *skb,
unsigned int flags,
void (*destructor)(struct sock *sk,
struct sk_buff *skb))
{
int err = 0;
if (flags & MSG_PEEK) {
err = -ENOENT;
spin_lock_bh(&sk->sk_receive_queue.lock);
if (skb == skb_peek(&sk->sk_receive_queue)) {
__skb_unlink(skb, &sk->sk_receive_queue);
atomic_dec(&skb->users);
if (destructor)
destructor(sk, skb);
err = 0;
}
spin_unlock_bh(&sk->sk_receive_queue.lock);
}
atomic_inc(&sk->sk_drops);
return err;
}
EXPORT_SYMBOL(__sk_queue_drop_skb);
/**
* skb_kill_datagram - Free a datagram skbuff forcibly
* @sk: socket
* @skb: datagram skbuff
* @flags: MSG_ flags
*
* This function frees a datagram skbuff that was received by
* skb_recv_datagram. The flags argument must match the one
* used for skb_recv_datagram.
*
* If the MSG_PEEK flag is set, and the packet is still on the
* receive queue of the socket, it will be taken off the queue
* before it is freed.
*
* This function currently only disables BH when acquiring the
* sk_receive_queue lock. Therefore it must not be used in a
* context where that lock is acquired in an IRQ context.
*
* It returns 0 if the packet was removed by us.
*/
int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
{
int err = __sk_queue_drop_skb(sk, skb, flags, NULL);
kfree_skb(skb);
sk_mem_reclaim_partial(sk);
return err;
}
EXPORT_SYMBOL(skb_kill_datagram);
/**
* skb_copy_datagram_iter - Copy a datagram to an iovec iterator.
* @skb: buffer to copy
* @offset: offset in the buffer to start copying from
* @to: iovec iterator to copy to
* @len: amount of data to copy from buffer to iovec
*/
int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
struct iov_iter *to, int len)
{
int start = skb_headlen(skb);
int i, copy = start - offset, start_off = offset, n;
struct sk_buff *frag_iter;
trace_skb_copy_datagram_iovec(skb, len);
/* Copy header. */
if (copy > 0) {
if (copy > len)
copy = len;
n = copy_to_iter(skb->data + offset, copy, to);
offset += n;
if (n != copy)
goto short_copy;
if ((len -= copy) == 0)
return 0;
}
/* Copy paged appendix. Hmm... why does this look so complicated? */
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
int end;
const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
WARN_ON(start > offset + len);
end = start + skb_frag_size(frag);
if ((copy = end - offset) > 0) {
if (copy > len)
copy = len;
n = copy_page_to_iter(skb_frag_page(frag),
frag->page_offset + offset -
start, copy, to);
offset += n;
if (n != copy)
goto short_copy;
if (!(len -= copy))
return 0;
}
start = end;
}
skb_walk_frags(skb, frag_iter) {
int end;
WARN_ON(start > offset + len);
end = start + frag_iter->len;
if ((copy = end - offset) > 0) {
if (copy > len)
copy = len;
if (skb_copy_datagram_iter(frag_iter, offset - start,
to, copy))
goto fault;
if ((len -= copy) == 0)
return 0;
offset += copy;
}
start = end;
}
if (!len)
return 0;
/* This is not really a user copy fault, but rather someone
* gave us a bogus length on the skb. We should probably
* print a warning here as it may indicate a kernel bug.
*/
fault:
iov_iter_revert(to, offset - start_off);
return -EFAULT;
short_copy:
if (iov_iter_count(to))
goto fault;
return 0;
}
EXPORT_SYMBOL(skb_copy_datagram_iter);
/**
* skb_copy_datagram_from_iter - Copy a datagram from an iov_iter.
* @skb: buffer to copy
* @offset: offset in the buffer to start copying to
* @from: the copy source
* @len: amount of data to copy to buffer from iovec
*
* Returns 0 or -EFAULT.
*/
int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
struct iov_iter *from,
int len)
{
int start = skb_headlen(skb);
int i, copy = start - offset;
struct sk_buff *frag_iter;
/* Copy header. */
if (copy > 0) {
if (copy > len)
copy = len;
if (copy_from_iter(skb->data + offset, copy, from) != copy)
goto fault;
if ((len -= copy) == 0)
return 0;
offset += copy;
}
/* Copy paged appendix. Hmm... why does this look so complicated? */
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
int end;
const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
WARN_ON(start > offset + len);
end = start + skb_frag_size(frag);
if ((copy = end - offset) > 0) {
size_t copied;
if (copy > len)
copy = len;
copied = copy_page_from_iter(skb_frag_page(frag),
frag->page_offset + offset - start,
copy, from);
if (copied != copy)
goto fault;
if (!(len -= copy))
return 0;
offset += copy;
}
start = end;
}
skb_walk_frags(skb, frag_iter) {
int end;
WARN_ON(start > offset + len);
end = start + frag_iter->len;
if ((copy = end - offset) > 0) {
if (copy > len)
copy = len;
if (skb_copy_datagram_from_iter(frag_iter,
offset - start,
from, copy))
goto fault;
if ((len -= copy) == 0)
return 0;
offset += copy;
}
start = end;
}
if (!len)
return 0;
fault:
return -EFAULT;
}
EXPORT_SYMBOL(skb_copy_datagram_from_iter);
/**
* zerocopy_sg_from_iter - Build a zerocopy datagram from an iov_iter
* @skb: buffer to copy
* @from: the source to copy from
*
* The function will first copy up to headlen, and then pin the userspace
* pages and build frags through them.
*
* Returns 0, -EFAULT or -EMSGSIZE.
*/
int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
{
int len = iov_iter_count(from);
int copy = min_t(int, skb_headlen(skb), len);
int frag = 0;
/* copy up to skb headlen */
if (skb_copy_datagram_from_iter(skb, 0, from, copy))
return -EFAULT;
while (iov_iter_count(from)) {
struct page *pages[MAX_SKB_FRAGS];
size_t start;
ssize_t copied;
unsigned long truesize;
int n = 0;
if (frag == MAX_SKB_FRAGS)
return -EMSGSIZE;
copied = iov_iter_get_pages(from, pages, ~0U,
MAX_SKB_FRAGS - frag, &start);
if (copied < 0)
return -EFAULT;
iov_iter_advance(from, copied);
truesize = PAGE_ALIGN(copied + start);
skb->data_len += copied;
skb->len += copied;
skb->truesize += truesize;
atomic_add(truesize, &skb->sk->sk_wmem_alloc);
while (copied) {
int size = min_t(int, copied, PAGE_SIZE - start);
skb_fill_page_desc(skb, frag++, pages[n], start, size);
start = 0;
copied -= size;
n++;
}
}
return 0;
}
EXPORT_SYMBOL(zerocopy_sg_from_iter);
static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
struct iov_iter *to, int len,
__wsum *csump)
{
int start = skb_headlen(skb);
int i, copy = start - offset, start_off = offset;
struct sk_buff *frag_iter;
int pos = 0;
int n;
/* Copy header. */
if (copy > 0) {
if (copy > len)
copy = len;
n = csum_and_copy_to_iter(skb->data + offset, copy, csump, to);
offset += n;
if (n != copy)
goto fault;
if ((len -= copy) == 0)
return 0;
pos = copy;
}
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
int end;
const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
WARN_ON(start > offset + len);
end = start + skb_frag_size(frag);
if ((copy = end - offset) > 0) {
__wsum csum2 = 0;
struct page *page = skb_frag_page(frag);
u8 *vaddr = kmap(page);
if (copy > len)
copy = len;
n = csum_and_copy_to_iter(vaddr + frag->page_offset +
offset - start, copy,
&csum2, to);
kunmap(page);
offset += n;
if (n != copy)
goto fault;
*csump = csum_block_add(*csump, csum2, pos);
if (!(len -= copy))
return 0;
pos += copy;
}
start = end;
}
skb_walk_frags(skb, frag_iter) {
int end;
WARN_ON(start > offset + len);
end = start + frag_iter->len;
if ((copy = end - offset) > 0) {
__wsum csum2 = 0;
if (copy > len)
copy = len;
if (skb_copy_and_csum_datagram(frag_iter,
offset - start,
to, copy,
&csum2))
goto fault;
*csump = csum_block_add(*csump, csum2, pos);
if ((len -= copy) == 0)
return 0;
offset += copy;
pos += copy;
}
start = end;
}
if (!len)
return 0;
fault:
iov_iter_revert(to, offset - start_off);
return -EFAULT;
}
__sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
{
__sum16 sum;
sum = csum_fold(skb_checksum(skb, 0, len, skb->csum));
if (likely(!sum)) {
if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
!skb->csum_complete_sw)
netdev_rx_csum_fault(skb->dev);
}
if (!skb_shared(skb))
skb->csum_valid = !sum;
return sum;
}
EXPORT_SYMBOL(__skb_checksum_complete_head);
__sum16 __skb_checksum_complete(struct sk_buff *skb)
{
__wsum csum;
__sum16 sum;
csum = skb_checksum(skb, 0, skb->len, 0);
/* skb->csum holds pseudo checksum */
sum = csum_fold(csum_add(skb->csum, csum));
if (likely(!sum)) {
if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
!skb->csum_complete_sw)
netdev_rx_csum_fault(skb->dev);
}
if (!skb_shared(skb)) {
/* Save full packet checksum */
skb->csum = csum;
skb->ip_summed = CHECKSUM_COMPLETE;
skb->csum_complete_sw = 1;
skb->csum_valid = !sum;
}
return sum;
}
EXPORT_SYMBOL(__skb_checksum_complete);
/**
* skb_copy_and_csum_datagram_msg - Copy and checksum skb to user iovec.
* @skb: skbuff
* @hlen: hardware length
* @msg: destination
*
* Caller _must_ check that skb will fit to this iovec.
*
* Returns: 0 - success.
* -EINVAL - checksum failure.
* -EFAULT - fault during copy.
*/
int skb_copy_and_csum_datagram_msg(struct sk_buff *skb,
int hlen, struct msghdr *msg)
{
__wsum csum;
int chunk = skb->len - hlen;
if (!chunk)
return 0;
if (msg_data_left(msg) < chunk) {
if (__skb_checksum_complete(skb))
return -EINVAL;
if (skb_copy_datagram_msg(skb, hlen, msg, chunk))
goto fault;
} else {
csum = csum_partial(skb->data, hlen, skb->csum);
if (skb_copy_and_csum_datagram(skb, hlen, &msg->msg_iter,
chunk, &csum))
goto fault;
if (csum_fold(csum)) {
iov_iter_revert(&msg->msg_iter, chunk);
return -EINVAL;
}
if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
netdev_rx_csum_fault(skb->dev);
}
return 0;
fault:
return -EFAULT;
}
EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg);
/**
* datagram_poll - generic datagram poll
* @file: file struct
* @sock: socket
* @wait: poll table
*
* Datagram poll: Again totally generic. This also handles
* sequenced packet sockets providing the socket receive queue
* is only ever holding data ready to receive.
*
* Note: when you _don't_ use this routine for this protocol,
* and you use a different write policy from sock_writeable()
* then please supply your own write_space callback.
*/
unsigned int datagram_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
struct sock *sk = sock->sk;
unsigned int mask;
sock_poll_wait(file, sk_sleep(sk), wait);
mask = 0;
/* exceptional events? */
if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
mask |= POLLERR |
(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
if (sk->sk_shutdown & RCV_SHUTDOWN)
mask |= POLLRDHUP | POLLIN | POLLRDNORM;
if (sk->sk_shutdown == SHUTDOWN_MASK)
mask |= POLLHUP;
/* readable? */
if (!skb_queue_empty(&sk->sk_receive_queue))
mask |= POLLIN | POLLRDNORM;
/* Connection-based need to check for termination and startup */
if (connection_based(sk)) {
if (sk->sk_state == TCP_CLOSE)
mask |= POLLHUP;
/* connection hasn't started yet? */
if (sk->sk_state == TCP_SYN_SENT)
return mask;
}
/* writable? */
if (sock_writeable(sk))
mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
else
sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
return mask;
}
EXPORT_SYMBOL(datagram_poll);