remarkable-linux/net/tipc/server.c
Linus Torvalds b2fe5fa686 Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:

 1) Significantly shrink the core networking routing structures. Result
    of http://vger.kernel.org/~davem/seoul2017_netdev_keynote.pdf

 2) Add netdevsim driver for testing various offloads, from Jakub
    Kicinski.

 3) Support cross-chip FDB operations in DSA, from Vivien Didelot.

 4) Add a 2nd listener hash table for TCP, similar to what was done for
    UDP. From Martin KaFai Lau.

 5) Add eBPF based queue selection to tun, from Jason Wang.

 6) Lockless qdisc support, from John Fastabend.

 7) SCTP stream interleave support, from Xin Long.

 8) Smoother TCP receive autotuning, from Eric Dumazet.

 9) Lots of erspan tunneling enhancements, from William Tu.

10) Add true function call support to BPF, from Alexei Starovoitov.

11) Add explicit support for GRO HW offloading, from Michael Chan.

12) Support extack generation in more netlink subsystems. From Alexander
    Aring, Quentin Monnet, and Jakub Kicinski.

13) Add 1000BaseX, flow control, and EEE support to mvneta driver. From
    Russell King.

14) Add flow table abstraction to netfilter, from Pablo Neira Ayuso.

15) Many improvements and simplifications to the NFP driver bpf JIT,
    from Jakub Kicinski.

16) Support for ipv6 non-equal cost multipath routing, from Ido
    Schimmel.

17) Add resource abstration to devlink, from Arkadi Sharshevsky.

18) Packet scheduler classifier shared filter block support, from Jiri
    Pirko.

19) Avoid locking in act_csum, from Davide Caratti.

20) devinet_ioctl() simplifications from Al viro.

21) More TCP bpf improvements from Lawrence Brakmo.

22) Add support for onlink ipv6 route flag, similar to ipv4, from David
    Ahern.

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1925 commits)
  tls: Add support for encryption using async offload accelerator
  ip6mr: fix stale iterator
  net/sched: kconfig: Remove blank help texts
  openvswitch: meter: Use 64-bit arithmetic instead of 32-bit
  tcp_nv: fix potential integer overflow in tcpnv_acked
  r8169: fix RTL8168EP take too long to complete driver initialization.
  qmi_wwan: Add support for Quectel EP06
  rtnetlink: enable IFLA_IF_NETNSID for RTM_NEWLINK
  ipmr: Fix ptrdiff_t print formatting
  ibmvnic: Wait for device response when changing MAC
  qlcnic: fix deadlock bug
  tcp: release sk_frag.page in tcp_disconnect
  ipv4: Get the address of interface correctly.
  net_sched: gen_estimator: fix lockdep splat
  net: macb: Handle HRESP error
  net/mlx5e: IPoIB, Fix copy-paste bug in flow steering refactoring
  ipv6: addrconf: break critical section in addrconf_verify_rtnl()
  ipv6: change route cache aging logic
  i40e/i40evf: Update DESC_NEEDED value to reflect larger value
  bnxt_en: cleanup DIM work on device shutdown
  ...
2018-01-31 14:31:10 -08:00

711 lines
17 KiB
C

/*
* net/tipc/server.c: TIPC server infrastructure
*
* Copyright (c) 2012-2013, Wind River Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the names of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* Alternatively, this software may be distributed under the terms of the
* GNU General Public License ("GPL") version 2 as published by the Free
* Software Foundation.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "server.h"
#include "core.h"
#include "socket.h"
#include "addr.h"
#include "msg.h"
#include <net/sock.h>
#include <linux/module.h>
/* Number of messages to send before rescheduling */
#define MAX_SEND_MSG_COUNT 25
#define MAX_RECV_MSG_COUNT 25
#define CF_CONNECTED 1
#define CF_SERVER 2
#define sock2con(x) ((struct tipc_conn *)(x)->sk_user_data)
/**
* struct tipc_conn - TIPC connection structure
* @kref: reference counter to connection object
* @conid: connection identifier
* @sock: socket handler associated with connection
* @flags: indicates connection state
* @server: pointer to connected server
* @rwork: receive work item
* @usr_data: user-specified field
* @rx_action: what to do when connection socket is active
* @outqueue: pointer to first outbound message in queue
* @outqueue_lock: control access to the outqueue
* @outqueue: list of connection objects for its server
* @swork: send work item
*/
struct tipc_conn {
struct kref kref;
int conid;
struct socket *sock;
unsigned long flags;
struct tipc_server *server;
struct work_struct rwork;
int (*rx_action) (struct tipc_conn *con);
void *usr_data;
struct list_head outqueue;
spinlock_t outqueue_lock;
struct work_struct swork;
};
/* An entry waiting to be sent */
struct outqueue_entry {
struct list_head list;
struct kvec iov;
struct sockaddr_tipc dest;
};
static void tipc_recv_work(struct work_struct *work);
static void tipc_send_work(struct work_struct *work);
static void tipc_clean_outqueues(struct tipc_conn *con);
static void tipc_conn_kref_release(struct kref *kref)
{
struct tipc_conn *con = container_of(kref, struct tipc_conn, kref);
struct tipc_server *s = con->server;
struct sockaddr_tipc *saddr = s->saddr;
struct socket *sock = con->sock;
struct sock *sk;
if (sock) {
sk = sock->sk;
if (test_bit(CF_SERVER, &con->flags)) {
__module_get(sock->ops->owner);
__module_get(sk->sk_prot_creator->owner);
}
saddr->scope = -TIPC_NODE_SCOPE;
kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr));
sock_release(sock);
con->sock = NULL;
}
spin_lock_bh(&s->idr_lock);
idr_remove(&s->conn_idr, con->conid);
s->idr_in_use--;
spin_unlock_bh(&s->idr_lock);
tipc_clean_outqueues(con);
kfree(con);
}
static void conn_put(struct tipc_conn *con)
{
kref_put(&con->kref, tipc_conn_kref_release);
}
static void conn_get(struct tipc_conn *con)
{
kref_get(&con->kref);
}
static struct tipc_conn *tipc_conn_lookup(struct tipc_server *s, int conid)
{
struct tipc_conn *con;
spin_lock_bh(&s->idr_lock);
con = idr_find(&s->conn_idr, conid);
if (con) {
if (!test_bit(CF_CONNECTED, &con->flags) ||
!kref_get_unless_zero(&con->kref))
con = NULL;
}
spin_unlock_bh(&s->idr_lock);
return con;
}
static void sock_data_ready(struct sock *sk)
{
struct tipc_conn *con;
read_lock_bh(&sk->sk_callback_lock);
con = sock2con(sk);
if (con && test_bit(CF_CONNECTED, &con->flags)) {
conn_get(con);
if (!queue_work(con->server->rcv_wq, &con->rwork))
conn_put(con);
}
read_unlock_bh(&sk->sk_callback_lock);
}
static void sock_write_space(struct sock *sk)
{
struct tipc_conn *con;
read_lock_bh(&sk->sk_callback_lock);
con = sock2con(sk);
if (con && test_bit(CF_CONNECTED, &con->flags)) {
conn_get(con);
if (!queue_work(con->server->send_wq, &con->swork))
conn_put(con);
}
read_unlock_bh(&sk->sk_callback_lock);
}
static void tipc_register_callbacks(struct socket *sock, struct tipc_conn *con)
{
struct sock *sk = sock->sk;
write_lock_bh(&sk->sk_callback_lock);
sk->sk_data_ready = sock_data_ready;
sk->sk_write_space = sock_write_space;
sk->sk_user_data = con;
con->sock = sock;
write_unlock_bh(&sk->sk_callback_lock);
}
static void tipc_close_conn(struct tipc_conn *con)
{
struct tipc_server *s = con->server;
struct sock *sk = con->sock->sk;
bool disconnect = false;
write_lock_bh(&sk->sk_callback_lock);
disconnect = test_and_clear_bit(CF_CONNECTED, &con->flags);
if (disconnect) {
sk->sk_user_data = NULL;
if (con->conid)
s->tipc_conn_release(con->conid, con->usr_data);
}
write_unlock_bh(&sk->sk_callback_lock);
/* Handle concurrent calls from sending and receiving threads */
if (!disconnect)
return;
/* Don't flush pending works, -just let them expire */
kernel_sock_shutdown(con->sock, SHUT_RDWR);
conn_put(con);
}
static struct tipc_conn *tipc_alloc_conn(struct tipc_server *s)
{
struct tipc_conn *con;
int ret;
con = kzalloc(sizeof(struct tipc_conn), GFP_ATOMIC);
if (!con)
return ERR_PTR(-ENOMEM);
kref_init(&con->kref);
INIT_LIST_HEAD(&con->outqueue);
spin_lock_init(&con->outqueue_lock);
INIT_WORK(&con->swork, tipc_send_work);
INIT_WORK(&con->rwork, tipc_recv_work);
spin_lock_bh(&s->idr_lock);
ret = idr_alloc(&s->conn_idr, con, 0, 0, GFP_ATOMIC);
if (ret < 0) {
kfree(con);
spin_unlock_bh(&s->idr_lock);
return ERR_PTR(-ENOMEM);
}
con->conid = ret;
s->idr_in_use++;
spin_unlock_bh(&s->idr_lock);
set_bit(CF_CONNECTED, &con->flags);
con->server = s;
return con;
}
static int tipc_receive_from_sock(struct tipc_conn *con)
{
struct tipc_server *s = con->server;
struct sock *sk = con->sock->sk;
struct sockaddr_tipc addr;
struct msghdr msg = {};
struct kvec iov;
void *buf;
int ret;
buf = kmem_cache_alloc(s->rcvbuf_cache, GFP_ATOMIC);
if (!buf) {
ret = -ENOMEM;
goto out_close;
}
iov.iov_base = buf;
iov.iov_len = s->max_rcvbuf_size;
msg.msg_name = &addr;
iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, iov.iov_len);
ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT);
if (ret <= 0) {
kmem_cache_free(s->rcvbuf_cache, buf);
goto out_close;
}
read_lock_bh(&sk->sk_callback_lock);
if (test_bit(CF_CONNECTED, &con->flags))
ret = s->tipc_conn_recvmsg(sock_net(con->sock->sk), con->conid,
&addr, con->usr_data, buf, ret);
read_unlock_bh(&sk->sk_callback_lock);
kmem_cache_free(s->rcvbuf_cache, buf);
if (ret < 0)
tipc_conn_terminate(s, con->conid);
return ret;
out_close:
if (ret != -EWOULDBLOCK)
tipc_close_conn(con);
else if (ret == 0)
/* Don't return success if we really got EOF */
ret = -EAGAIN;
return ret;
}
static int tipc_accept_from_sock(struct tipc_conn *con)
{
struct tipc_server *s = con->server;
struct socket *sock = con->sock;
struct socket *newsock;
struct tipc_conn *newcon;
int ret;
ret = kernel_accept(sock, &newsock, O_NONBLOCK);
if (ret < 0)
return ret;
newcon = tipc_alloc_conn(con->server);
if (IS_ERR(newcon)) {
ret = PTR_ERR(newcon);
sock_release(newsock);
return ret;
}
newcon->rx_action = tipc_receive_from_sock;
tipc_register_callbacks(newsock, newcon);
/* Notify that new connection is incoming */
newcon->usr_data = s->tipc_conn_new(newcon->conid);
if (!newcon->usr_data) {
sock_release(newsock);
conn_put(newcon);
return -ENOMEM;
}
/* Wake up receive process in case of 'SYN+' message */
newsock->sk->sk_data_ready(newsock->sk);
return ret;
}
static struct socket *tipc_create_listen_sock(struct tipc_conn *con)
{
struct tipc_server *s = con->server;
struct socket *sock = NULL;
int ret;
ret = sock_create_kern(s->net, AF_TIPC, SOCK_SEQPACKET, 0, &sock);
if (ret < 0)
return NULL;
ret = kernel_setsockopt(sock, SOL_TIPC, TIPC_IMPORTANCE,
(char *)&s->imp, sizeof(s->imp));
if (ret < 0)
goto create_err;
ret = kernel_bind(sock, (struct sockaddr *)s->saddr, sizeof(*s->saddr));
if (ret < 0)
goto create_err;
switch (s->type) {
case SOCK_STREAM:
case SOCK_SEQPACKET:
con->rx_action = tipc_accept_from_sock;
ret = kernel_listen(sock, 0);
if (ret < 0)
goto create_err;
break;
case SOCK_DGRAM:
case SOCK_RDM:
con->rx_action = tipc_receive_from_sock;
break;
default:
pr_err("Unknown socket type %d\n", s->type);
goto create_err;
}
/* As server's listening socket owner and creator is the same module,
* we have to decrease TIPC module reference count to guarantee that
* it remains zero after the server socket is created, otherwise,
* executing "rmmod" command is unable to make TIPC module deleted
* after TIPC module is inserted successfully.
*
* However, the reference count is ever increased twice in
* sock_create_kern(): one is to increase the reference count of owner
* of TIPC socket's proto_ops struct; another is to increment the
* reference count of owner of TIPC proto struct. Therefore, we must
* decrement the module reference count twice to ensure that it keeps
* zero after server's listening socket is created. Of course, we
* must bump the module reference count twice as well before the socket
* is closed.
*/
module_put(sock->ops->owner);
module_put(sock->sk->sk_prot_creator->owner);
set_bit(CF_SERVER, &con->flags);
return sock;
create_err:
kernel_sock_shutdown(sock, SHUT_RDWR);
sock_release(sock);
return NULL;
}
static int tipc_open_listening_sock(struct tipc_server *s)
{
struct socket *sock;
struct tipc_conn *con;
con = tipc_alloc_conn(s);
if (IS_ERR(con))
return PTR_ERR(con);
sock = tipc_create_listen_sock(con);
if (!sock) {
idr_remove(&s->conn_idr, con->conid);
s->idr_in_use--;
kfree(con);
return -EINVAL;
}
tipc_register_callbacks(sock, con);
return 0;
}
static struct outqueue_entry *tipc_alloc_entry(void *data, int len)
{
struct outqueue_entry *entry;
void *buf;
entry = kmalloc(sizeof(struct outqueue_entry), GFP_ATOMIC);
if (!entry)
return NULL;
buf = kmemdup(data, len, GFP_ATOMIC);
if (!buf) {
kfree(entry);
return NULL;
}
entry->iov.iov_base = buf;
entry->iov.iov_len = len;
return entry;
}
static void tipc_free_entry(struct outqueue_entry *e)
{
kfree(e->iov.iov_base);
kfree(e);
}
static void tipc_clean_outqueues(struct tipc_conn *con)
{
struct outqueue_entry *e, *safe;
spin_lock_bh(&con->outqueue_lock);
list_for_each_entry_safe(e, safe, &con->outqueue, list) {
list_del(&e->list);
tipc_free_entry(e);
}
spin_unlock_bh(&con->outqueue_lock);
}
int tipc_conn_sendmsg(struct tipc_server *s, int conid,
struct sockaddr_tipc *addr, void *data, size_t len)
{
struct outqueue_entry *e;
struct tipc_conn *con;
con = tipc_conn_lookup(s, conid);
if (!con)
return -EINVAL;
if (!test_bit(CF_CONNECTED, &con->flags)) {
conn_put(con);
return 0;
}
e = tipc_alloc_entry(data, len);
if (!e) {
conn_put(con);
return -ENOMEM;
}
if (addr)
memcpy(&e->dest, addr, sizeof(struct sockaddr_tipc));
spin_lock_bh(&con->outqueue_lock);
list_add_tail(&e->list, &con->outqueue);
spin_unlock_bh(&con->outqueue_lock);
if (!queue_work(s->send_wq, &con->swork))
conn_put(con);
return 0;
}
void tipc_conn_terminate(struct tipc_server *s, int conid)
{
struct tipc_conn *con;
con = tipc_conn_lookup(s, conid);
if (con) {
tipc_close_conn(con);
conn_put(con);
}
}
bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
u32 upper, u32 filter, int *conid)
{
struct tipc_subscriber *scbr;
struct tipc_subscr sub;
struct tipc_server *s;
struct tipc_conn *con;
sub.seq.type = type;
sub.seq.lower = lower;
sub.seq.upper = upper;
sub.timeout = TIPC_WAIT_FOREVER;
sub.filter = filter;
*(u32 *)&sub.usr_handle = port;
con = tipc_alloc_conn(tipc_topsrv(net));
if (IS_ERR(con))
return false;
*conid = con->conid;
s = con->server;
scbr = s->tipc_conn_new(*conid);
if (!scbr) {
conn_put(con);
return false;
}
con->usr_data = scbr;
con->sock = NULL;
s->tipc_conn_recvmsg(net, *conid, NULL, scbr, &sub, sizeof(sub));
return true;
}
void tipc_topsrv_kern_unsubscr(struct net *net, int conid)
{
struct tipc_conn *con;
struct tipc_server *srv;
con = tipc_conn_lookup(tipc_topsrv(net), conid);
if (!con)
return;
test_and_clear_bit(CF_CONNECTED, &con->flags);
srv = con->server;
if (con->conid)
srv->tipc_conn_release(con->conid, con->usr_data);
conn_put(con);
conn_put(con);
}
static void tipc_send_kern_top_evt(struct net *net, struct tipc_event *evt)
{
u32 port = *(u32 *)&evt->s.usr_handle;
u32 self = tipc_own_addr(net);
struct sk_buff_head evtq;
struct sk_buff *skb;
skb = tipc_msg_create(TOP_SRV, 0, INT_H_SIZE, sizeof(*evt),
self, self, port, port, 0);
if (!skb)
return;
msg_set_dest_droppable(buf_msg(skb), true);
memcpy(msg_data(buf_msg(skb)), evt, sizeof(*evt));
skb_queue_head_init(&evtq);
__skb_queue_tail(&evtq, skb);
tipc_sk_rcv(net, &evtq);
}
static void tipc_send_to_sock(struct tipc_conn *con)
{
struct tipc_server *s = con->server;
struct outqueue_entry *e;
struct tipc_event *evt;
struct msghdr msg;
int count = 0;
int ret;
spin_lock_bh(&con->outqueue_lock);
while (test_bit(CF_CONNECTED, &con->flags)) {
e = list_entry(con->outqueue.next, struct outqueue_entry, list);
if ((struct list_head *) e == &con->outqueue)
break;
spin_unlock_bh(&con->outqueue_lock);
if (con->sock) {
memset(&msg, 0, sizeof(msg));
msg.msg_flags = MSG_DONTWAIT;
if (s->type == SOCK_DGRAM || s->type == SOCK_RDM) {
msg.msg_name = &e->dest;
msg.msg_namelen = sizeof(struct sockaddr_tipc);
}
ret = kernel_sendmsg(con->sock, &msg, &e->iov, 1,
e->iov.iov_len);
if (ret == -EWOULDBLOCK || ret == 0) {
cond_resched();
goto out;
} else if (ret < 0) {
goto send_err;
}
} else {
evt = e->iov.iov_base;
tipc_send_kern_top_evt(s->net, evt);
}
/* Don't starve users filling buffers */
if (++count >= MAX_SEND_MSG_COUNT) {
cond_resched();
count = 0;
}
spin_lock_bh(&con->outqueue_lock);
list_del(&e->list);
tipc_free_entry(e);
}
spin_unlock_bh(&con->outqueue_lock);
out:
return;
send_err:
tipc_close_conn(con);
}
static void tipc_recv_work(struct work_struct *work)
{
struct tipc_conn *con = container_of(work, struct tipc_conn, rwork);
int count = 0;
while (test_bit(CF_CONNECTED, &con->flags)) {
if (con->rx_action(con))
break;
/* Don't flood Rx machine */
if (++count >= MAX_RECV_MSG_COUNT) {
cond_resched();
count = 0;
}
}
conn_put(con);
}
static void tipc_send_work(struct work_struct *work)
{
struct tipc_conn *con = container_of(work, struct tipc_conn, swork);
if (test_bit(CF_CONNECTED, &con->flags))
tipc_send_to_sock(con);
conn_put(con);
}
static void tipc_work_stop(struct tipc_server *s)
{
destroy_workqueue(s->rcv_wq);
destroy_workqueue(s->send_wq);
}
static int tipc_work_start(struct tipc_server *s)
{
s->rcv_wq = alloc_ordered_workqueue("tipc_rcv", 0);
if (!s->rcv_wq) {
pr_err("can't start tipc receive workqueue\n");
return -ENOMEM;
}
s->send_wq = alloc_ordered_workqueue("tipc_send", 0);
if (!s->send_wq) {
pr_err("can't start tipc send workqueue\n");
destroy_workqueue(s->rcv_wq);
return -ENOMEM;
}
return 0;
}
int tipc_server_start(struct tipc_server *s)
{
int ret;
spin_lock_init(&s->idr_lock);
idr_init(&s->conn_idr);
s->idr_in_use = 0;
s->rcvbuf_cache = kmem_cache_create(s->name, s->max_rcvbuf_size,
0, SLAB_HWCACHE_ALIGN, NULL);
if (!s->rcvbuf_cache)
return -ENOMEM;
ret = tipc_work_start(s);
if (ret < 0) {
kmem_cache_destroy(s->rcvbuf_cache);
return ret;
}
ret = tipc_open_listening_sock(s);
if (ret < 0) {
tipc_work_stop(s);
kmem_cache_destroy(s->rcvbuf_cache);
return ret;
}
return ret;
}
void tipc_server_stop(struct tipc_server *s)
{
struct tipc_conn *con;
int id;
spin_lock_bh(&s->idr_lock);
for (id = 0; s->idr_in_use; id++) {
con = idr_find(&s->conn_idr, id);
if (con) {
spin_unlock_bh(&s->idr_lock);
tipc_close_conn(con);
spin_lock_bh(&s->idr_lock);
}
}
spin_unlock_bh(&s->idr_lock);
tipc_work_stop(s);
kmem_cache_destroy(s->rcvbuf_cache);
idr_destroy(&s->conn_idr);
}