alistair23-linux/net/netfilter/ipvs/ip_vs_nq.c
Simon Kirby c16526a7b9 ipvs: fix overflow on dest weight multiply
Schedulers such as lblc and lblcr require the weight to be as high as the
maximum number of active connections. In commit b552f7e3a9
("ipvs: unify the formula to estimate the overhead of processing
connections"), the consideration of inactconns and activeconns was cleaned
up to always count activeconns as 256 times more important than inactconns.
In cases where 3000 or more connections are expected, a weight of 3000 *
256 * 3000 connections overflows the 32-bit signed result used to determine
if rescheduling is required.

On amd64, this merely changes the multiply and comparison instructions to
64-bit. On x86, a 64-bit result is already present from imull, so only
a few more comparison instructions are emitted.

Signed-off-by: Simon Kirby <sim@hostway.ca>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
2013-09-18 14:38:53 -05:00

143 lines
3.5 KiB
C

/*
* IPVS: Never Queue scheduling module
*
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Changes:
*
*/
/*
* The NQ algorithm adopts a two-speed model. When there is an idle server
* available, the job will be sent to the idle server, instead of waiting
* for a fast one. When there is no idle server available, the job will be
* sent to the server that minimize its expected delay (The Shortest
* Expected Delay scheduling algorithm).
*
* See the following paper for more information:
* A. Weinrib and S. Shenker, Greed is not enough: Adaptive load sharing
* in large heterogeneous systems. In Proceedings IEEE INFOCOM'88,
* pages 986-994, 1988.
*
* Thanks must go to Marko Buuri <marko@buuri.name> for talking NQ to me.
*
* The difference between NQ and SED is that NQ can improve overall
* system utilization.
*
*/
#define KMSG_COMPONENT "IPVS"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/module.h>
#include <linux/kernel.h>
#include <net/ip_vs.h>
static inline int
ip_vs_nq_dest_overhead(struct ip_vs_dest *dest)
{
/*
* We only use the active connection number in the cost
* calculation here.
*/
return atomic_read(&dest->activeconns) + 1;
}
/*
* Weighted Least Connection scheduling
*/
static struct ip_vs_dest *
ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
struct ip_vs_iphdr *iph)
{
struct ip_vs_dest *dest, *least = NULL;
int loh = 0, doh;
IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
/*
* We calculate the load of each dest server as follows:
* (server expected overhead) / dest->weight
*
* Remember -- no floats in kernel mode!!!
* The comparison of h1*w2 > h2*w1 is equivalent to that of
* h1/w1 > h2/w2
* if every weight is larger than zero.
*
* The server with weight=0 is quiesced and will not receive any
* new connections.
*/
list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
if (dest->flags & IP_VS_DEST_F_OVERLOAD ||
!atomic_read(&dest->weight))
continue;
doh = ip_vs_nq_dest_overhead(dest);
/* return the server directly if it is idle */
if (atomic_read(&dest->activeconns) == 0) {
least = dest;
loh = doh;
goto out;
}
if (!least ||
((__s64)loh * atomic_read(&dest->weight) >
(__s64)doh * atomic_read(&least->weight))) {
least = dest;
loh = doh;
}
}
if (!least) {
ip_vs_scheduler_err(svc, "no destination available");
return NULL;
}
out:
IP_VS_DBG_BUF(6, "NQ: server %s:%u "
"activeconns %d refcnt %d weight %d overhead %d\n",
IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
atomic_read(&least->activeconns),
atomic_read(&least->refcnt),
atomic_read(&least->weight), loh);
return least;
}
static struct ip_vs_scheduler ip_vs_nq_scheduler =
{
.name = "nq",
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
.n_list = LIST_HEAD_INIT(ip_vs_nq_scheduler.n_list),
.schedule = ip_vs_nq_schedule,
};
static int __init ip_vs_nq_init(void)
{
return register_ip_vs_scheduler(&ip_vs_nq_scheduler);
}
static void __exit ip_vs_nq_cleanup(void)
{
unregister_ip_vs_scheduler(&ip_vs_nq_scheduler);
synchronize_rcu();
}
module_init(ip_vs_nq_init);
module_exit(ip_vs_nq_cleanup);
MODULE_LICENSE("GPL");