Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next

Pablo Neira Ayuso says:

====================
Netfilter updates for net-next

A small batch with accumulated updates in nf-next, mostly IPVS updates,
they are:

1) Add 64-bits stats counters to IPVS, from Julian Anastasov.

2) Move NETFILTER_XT_MATCH_ADDRTYPE out of NETFILTER_ADVANCED as docker
seem to require this, from Anton Blanchard.

3) Use boolean instead of numeric value in set_match_v*(), from
coccinelle via Fengguang Wu.

4) Allows rescheduling of new connections in IPVS when port reuse is
detected, from Marcelo Ricardo Leitner.

5) Add missing bits to support arptables extensions from nft_compat,
from Arturo Borrero.

Patrick is preparing a large batch to enhance the set infrastructure,
named expressions among other things, that should follow up soon after
this batch.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2015-03-02 14:55:05 -05:00
commit 77f0379fa8
10 changed files with 319 additions and 145 deletions

View file

@ -22,6 +22,27 @@ backup_only - BOOLEAN
If set, disable the director function while the server is
in backup mode to avoid packet loops for DR/TUN methods.
conn_reuse_mode - INTEGER
1 - default
Controls how ipvs will deal with connections that are detected
port reuse. It is a bitmap, with the values being:
0: disable any special handling on port reuse. The new
connection will be delivered to the same real server that was
servicing the previous connection. This will effectively
disable expire_nodest_conn.
bit 1: enable rescheduling of new connections when it is safe.
That is, whenever expire_nodest_conn and for TCP sockets, when
the connection is in TIME_WAIT state (which is only possible if
you use NAT mode).
bit 2: it is bit 1 plus, for TCP connections, when connections
are in FIN_WAIT state, as this is the last state seen by load
balancer in Direct Routing mode. This bit helps on adding new
real servers to a very busy cluster.
conntrack - BOOLEAN
0 - disabled (default)
not 0 - enabled

View file

@ -365,15 +365,15 @@ struct ip_vs_seq {
/* counters per cpu */
struct ip_vs_counters {
__u32 conns; /* connections scheduled */
__u32 inpkts; /* incoming packets */
__u32 outpkts; /* outgoing packets */
__u64 conns; /* connections scheduled */
__u64 inpkts; /* incoming packets */
__u64 outpkts; /* outgoing packets */
__u64 inbytes; /* incoming bytes */
__u64 outbytes; /* outgoing bytes */
};
/* Stats per cpu */
struct ip_vs_cpu_stats {
struct ip_vs_counters ustats;
struct ip_vs_counters cnt;
struct u64_stats_sync syncp;
};
@ -383,23 +383,40 @@ struct ip_vs_estimator {
u64 last_inbytes;
u64 last_outbytes;
u32 last_conns;
u32 last_inpkts;
u32 last_outpkts;
u64 last_conns;
u64 last_inpkts;
u64 last_outpkts;
u32 cps;
u32 inpps;
u32 outpps;
u32 inbps;
u32 outbps;
u64 cps;
u64 inpps;
u64 outpps;
u64 inbps;
u64 outbps;
};
/*
* IPVS statistics object, 64-bit kernel version of struct ip_vs_stats_user
*/
struct ip_vs_kstats {
u64 conns; /* connections scheduled */
u64 inpkts; /* incoming packets */
u64 outpkts; /* outgoing packets */
u64 inbytes; /* incoming bytes */
u64 outbytes; /* outgoing bytes */
u64 cps; /* current connection rate */
u64 inpps; /* current in packet rate */
u64 outpps; /* current out packet rate */
u64 inbps; /* current in byte rate */
u64 outbps; /* current out byte rate */
};
struct ip_vs_stats {
struct ip_vs_stats_user ustats; /* statistics */
struct ip_vs_kstats kstats; /* kernel statistics */
struct ip_vs_estimator est; /* estimator */
struct ip_vs_cpu_stats __percpu *cpustats; /* per cpu counters */
spinlock_t lock; /* spin lock */
struct ip_vs_stats_user ustats0; /* reset values */
struct ip_vs_kstats kstats0; /* reset values */
};
struct dst_entry;
@ -924,6 +941,7 @@ struct netns_ipvs {
int sysctl_nat_icmp_send;
int sysctl_pmtu_disc;
int sysctl_backup_only;
int sysctl_conn_reuse_mode;
/* ip_vs_lblc */
int sysctl_lblc_expiration;
@ -1042,6 +1060,11 @@ static inline int sysctl_backup_only(struct netns_ipvs *ipvs)
ipvs->sysctl_backup_only;
}
static inline int sysctl_conn_reuse_mode(struct netns_ipvs *ipvs)
{
return ipvs->sysctl_conn_reuse_mode;
}
#else
static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
@ -1109,6 +1132,11 @@ static inline int sysctl_backup_only(struct netns_ipvs *ipvs)
return 0;
}
static inline int sysctl_conn_reuse_mode(struct netns_ipvs *ipvs)
{
return 1;
}
#endif
/* IPVS core functions
@ -1388,8 +1416,7 @@ void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts);
void ip_vs_start_estimator(struct net *net, struct ip_vs_stats *stats);
void ip_vs_stop_estimator(struct net *net, struct ip_vs_stats *stats);
void ip_vs_zero_estimator(struct ip_vs_stats *stats);
void ip_vs_read_estimator(struct ip_vs_stats_user *dst,
struct ip_vs_stats *stats);
void ip_vs_read_estimator(struct ip_vs_kstats *dst, struct ip_vs_stats *stats);
/* Various IPVS packet transmitters (from ip_vs_xmit.c) */
int ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,

View file

@ -358,6 +358,8 @@ enum {
IPVS_SVC_ATTR_PE_NAME, /* name of ct retriever */
IPVS_SVC_ATTR_STATS64, /* nested attribute for service stats */
__IPVS_SVC_ATTR_MAX,
};
@ -387,6 +389,8 @@ enum {
IPVS_DEST_ATTR_ADDR_FAMILY, /* Address family of address */
IPVS_DEST_ATTR_STATS64, /* nested attribute for dest stats */
__IPVS_DEST_ATTR_MAX,
};
@ -410,7 +414,8 @@ enum {
/*
* Attributes used to describe service or destination entry statistics
*
* Used inside nested attributes IPVS_SVC_ATTR_STATS and IPVS_DEST_ATTR_STATS
* Used inside nested attributes IPVS_SVC_ATTR_STATS, IPVS_DEST_ATTR_STATS,
* IPVS_SVC_ATTR_STATS64 and IPVS_DEST_ATTR_STATS64.
*/
enum {
IPVS_STATS_ATTR_UNSPEC = 0,

View file

@ -951,7 +951,7 @@ comment "Xtables matches"
config NETFILTER_XT_MATCH_ADDRTYPE
tristate '"addrtype" address type match support'
depends on NETFILTER_ADVANCED
default m if NETFILTER_ADVANCED=n
---help---
This option allows you to match what routing thinks of an address,
eg. UNICAST, LOCAL, BROADCAST, ...

View file

@ -119,24 +119,24 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
struct ip_vs_service *svc;
s = this_cpu_ptr(dest->stats.cpustats);
s->ustats.inpkts++;
u64_stats_update_begin(&s->syncp);
s->ustats.inbytes += skb->len;
s->cnt.inpkts++;
s->cnt.inbytes += skb->len;
u64_stats_update_end(&s->syncp);
rcu_read_lock();
svc = rcu_dereference(dest->svc);
s = this_cpu_ptr(svc->stats.cpustats);
s->ustats.inpkts++;
u64_stats_update_begin(&s->syncp);
s->ustats.inbytes += skb->len;
s->cnt.inpkts++;
s->cnt.inbytes += skb->len;
u64_stats_update_end(&s->syncp);
rcu_read_unlock();
s = this_cpu_ptr(ipvs->tot_stats.cpustats);
s->ustats.inpkts++;
u64_stats_update_begin(&s->syncp);
s->ustats.inbytes += skb->len;
s->cnt.inpkts++;
s->cnt.inbytes += skb->len;
u64_stats_update_end(&s->syncp);
}
}
@ -153,24 +153,24 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
struct ip_vs_service *svc;
s = this_cpu_ptr(dest->stats.cpustats);
s->ustats.outpkts++;
u64_stats_update_begin(&s->syncp);
s->ustats.outbytes += skb->len;
s->cnt.outpkts++;
s->cnt.outbytes += skb->len;
u64_stats_update_end(&s->syncp);
rcu_read_lock();
svc = rcu_dereference(dest->svc);
s = this_cpu_ptr(svc->stats.cpustats);
s->ustats.outpkts++;
u64_stats_update_begin(&s->syncp);
s->ustats.outbytes += skb->len;
s->cnt.outpkts++;
s->cnt.outbytes += skb->len;
u64_stats_update_end(&s->syncp);
rcu_read_unlock();
s = this_cpu_ptr(ipvs->tot_stats.cpustats);
s->ustats.outpkts++;
u64_stats_update_begin(&s->syncp);
s->ustats.outbytes += skb->len;
s->cnt.outpkts++;
s->cnt.outbytes += skb->len;
u64_stats_update_end(&s->syncp);
}
}
@ -183,13 +183,19 @@ ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
struct ip_vs_cpu_stats *s;
s = this_cpu_ptr(cp->dest->stats.cpustats);
s->ustats.conns++;
u64_stats_update_begin(&s->syncp);
s->cnt.conns++;
u64_stats_update_end(&s->syncp);
s = this_cpu_ptr(svc->stats.cpustats);
s->ustats.conns++;
u64_stats_update_begin(&s->syncp);
s->cnt.conns++;
u64_stats_update_end(&s->syncp);
s = this_cpu_ptr(ipvs->tot_stats.cpustats);
s->ustats.conns++;
u64_stats_update_begin(&s->syncp);
s->cnt.conns++;
u64_stats_update_end(&s->syncp);
}
@ -1046,6 +1052,26 @@ static inline bool is_new_conn(const struct sk_buff *skb,
}
}
static inline bool is_new_conn_expected(const struct ip_vs_conn *cp,
int conn_reuse_mode)
{
/* Controlled (FTP DATA or persistence)? */
if (cp->control)
return false;
switch (cp->protocol) {
case IPPROTO_TCP:
return (cp->state == IP_VS_TCP_S_TIME_WAIT) ||
((conn_reuse_mode & 2) &&
(cp->state == IP_VS_TCP_S_FIN_WAIT) &&
(cp->flags & IP_VS_CONN_F_NOOUTPUT));
case IPPROTO_SCTP:
return cp->state == IP_VS_SCTP_S_CLOSED;
default:
return false;
}
}
/* Handle response packets: rewrite addresses and send away...
*/
static unsigned int
@ -1585,6 +1611,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
struct ip_vs_conn *cp;
int ret, pkts;
struct netns_ipvs *ipvs;
int conn_reuse_mode;
/* Already marked as IPVS request or reply? */
if (skb->ipvs_property)
@ -1653,10 +1680,14 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
*/
cp = pp->conn_in_get(af, skb, &iph, 0);
if (unlikely(sysctl_expire_nodest_conn(ipvs)) && cp && cp->dest &&
unlikely(!atomic_read(&cp->dest->weight)) && !iph.fragoffs &&
is_new_conn(skb, &iph)) {
ip_vs_conn_expire_now(cp);
conn_reuse_mode = sysctl_conn_reuse_mode(ipvs);
if (conn_reuse_mode && !iph.fragoffs &&
is_new_conn(skb, &iph) && cp &&
((unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest &&
unlikely(!atomic_read(&cp->dest->weight))) ||
unlikely(is_new_conn_expected(cp, conn_reuse_mode)))) {
if (!atomic_read(&cp->n_control))
ip_vs_conn_expire_now(cp);
__ip_vs_conn_put(cp);
cp = NULL;
}

View file

@ -729,9 +729,9 @@ static void ip_vs_trash_cleanup(struct net *net)
}
static void
ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
ip_vs_copy_stats(struct ip_vs_kstats *dst, struct ip_vs_stats *src)
{
#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c
#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->kstats.c - src->kstats0.c
spin_lock_bh(&src->lock);
@ -746,6 +746,21 @@ ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
spin_unlock_bh(&src->lock);
}
static void
ip_vs_export_stats_user(struct ip_vs_stats_user *dst, struct ip_vs_kstats *src)
{
dst->conns = (u32)src->conns;
dst->inpkts = (u32)src->inpkts;
dst->outpkts = (u32)src->outpkts;
dst->inbytes = src->inbytes;
dst->outbytes = src->outbytes;
dst->cps = (u32)src->cps;
dst->inpps = (u32)src->inpps;
dst->outpps = (u32)src->outpps;
dst->inbps = (u32)src->inbps;
dst->outbps = (u32)src->outbps;
}
static void
ip_vs_zero_stats(struct ip_vs_stats *stats)
{
@ -753,7 +768,7 @@ ip_vs_zero_stats(struct ip_vs_stats *stats)
/* get current counters as zero point, rates are zeroed */
#define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c
#define IP_VS_ZERO_STATS_COUNTER(c) stats->kstats0.c = stats->kstats.c
IP_VS_ZERO_STATS_COUNTER(conns);
IP_VS_ZERO_STATS_COUNTER(inpkts);
@ -1808,6 +1823,12 @@ static struct ctl_table vs_vars[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "conn_reuse_mode",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
#ifdef CONFIG_IP_VS_DEBUG
{
.procname = "debug_level",
@ -2044,7 +2065,7 @@ static const struct file_operations ip_vs_info_fops = {
static int ip_vs_stats_show(struct seq_file *seq, void *v)
{
struct net *net = seq_file_single_net(seq);
struct ip_vs_stats_user show;
struct ip_vs_kstats show;
/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
seq_puts(seq,
@ -2053,17 +2074,22 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v)
" Conns Packets Packets Bytes Bytes\n");
ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns,
show.inpkts, show.outpkts,
(unsigned long long) show.inbytes,
(unsigned long long) show.outbytes);
seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n\n",
(unsigned long long)show.conns,
(unsigned long long)show.inpkts,
(unsigned long long)show.outpkts,
(unsigned long long)show.inbytes,
(unsigned long long)show.outbytes);
/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
/* 01234567 01234567 01234567 0123456701234567 0123456701234567*/
seq_puts(seq,
" Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
seq_printf(seq, "%8X %8X %8X %16X %16X\n",
show.cps, show.inpps, show.outpps,
show.inbps, show.outbps);
" Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n",
(unsigned long long)show.cps,
(unsigned long long)show.inpps,
(unsigned long long)show.outpps,
(unsigned long long)show.inbps,
(unsigned long long)show.outbps);
return 0;
}
@ -2086,7 +2112,7 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
struct net *net = seq_file_single_net(seq);
struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
struct ip_vs_cpu_stats __percpu *cpustats = tot_stats->cpustats;
struct ip_vs_stats_user rates;
struct ip_vs_kstats kstats;
int i;
/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
@ -2098,41 +2124,41 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
for_each_possible_cpu(i) {
struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
unsigned int start;
__u64 inbytes, outbytes;
u64 conns, inpkts, outpkts, inbytes, outbytes;
do {
start = u64_stats_fetch_begin_irq(&u->syncp);
inbytes = u->ustats.inbytes;
outbytes = u->ustats.outbytes;
conns = u->cnt.conns;
inpkts = u->cnt.inpkts;
outpkts = u->cnt.outpkts;
inbytes = u->cnt.inbytes;
outbytes = u->cnt.outbytes;
} while (u64_stats_fetch_retry_irq(&u->syncp, start));
seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
i, u->ustats.conns, u->ustats.inpkts,
u->ustats.outpkts, (__u64)inbytes,
(__u64)outbytes);
seq_printf(seq, "%3X %8LX %8LX %8LX %16LX %16LX\n",
i, (u64)conns, (u64)inpkts,
(u64)outpkts, (u64)inbytes,
(u64)outbytes);
}
spin_lock_bh(&tot_stats->lock);
ip_vs_copy_stats(&kstats, tot_stats);
seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n",
tot_stats->ustats.conns, tot_stats->ustats.inpkts,
tot_stats->ustats.outpkts,
(unsigned long long) tot_stats->ustats.inbytes,
(unsigned long long) tot_stats->ustats.outbytes);
seq_printf(seq, " ~ %8LX %8LX %8LX %16LX %16LX\n\n",
(unsigned long long)kstats.conns,
(unsigned long long)kstats.inpkts,
(unsigned long long)kstats.outpkts,
(unsigned long long)kstats.inbytes,
(unsigned long long)kstats.outbytes);
ip_vs_read_estimator(&rates, tot_stats);
spin_unlock_bh(&tot_stats->lock);
/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
/* ... 01234567 01234567 01234567 0123456701234567 0123456701234567 */
seq_puts(seq,
" Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
seq_printf(seq, " %8X %8X %8X %16X %16X\n",
rates.cps,
rates.inpps,
rates.outpps,
rates.inbps,
rates.outbps);
" Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
seq_printf(seq, " %8LX %8LX %8LX %16LX %16LX\n",
kstats.cps,
kstats.inpps,
kstats.outpps,
kstats.inbps,
kstats.outbps);
return 0;
}
@ -2400,6 +2426,7 @@ static void
ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
{
struct ip_vs_scheduler *sched;
struct ip_vs_kstats kstats;
sched = rcu_dereference_protected(src->scheduler, 1);
dst->protocol = src->protocol;
@ -2411,7 +2438,8 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
dst->timeout = src->timeout / HZ;
dst->netmask = src->netmask;
dst->num_dests = src->num_dests;
ip_vs_copy_stats(&dst->stats, &src->stats);
ip_vs_copy_stats(&kstats, &src->stats);
ip_vs_export_stats_user(&dst->stats, &kstats);
}
static inline int
@ -2485,6 +2513,7 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
int count = 0;
struct ip_vs_dest *dest;
struct ip_vs_dest_entry entry;
struct ip_vs_kstats kstats;
memset(&entry, 0, sizeof(entry));
list_for_each_entry(dest, &svc->destinations, n_list) {
@ -2506,7 +2535,8 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
entry.activeconns = atomic_read(&dest->activeconns);
entry.inactconns = atomic_read(&dest->inactconns);
entry.persistconns = atomic_read(&dest->persistconns);
ip_vs_copy_stats(&entry.stats, &dest->stats);
ip_vs_copy_stats(&kstats, &dest->stats);
ip_vs_export_stats_user(&entry.stats, &kstats);
if (copy_to_user(&uptr->entrytable[count],
&entry, sizeof(entry))) {
ret = -EFAULT;
@ -2798,25 +2828,51 @@ static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
};
static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
struct ip_vs_stats *stats)
struct ip_vs_kstats *kstats)
{
struct ip_vs_stats_user ustats;
struct nlattr *nl_stats = nla_nest_start(skb, container_type);
if (!nl_stats)
return -EMSGSIZE;
ip_vs_copy_stats(&ustats, stats);
if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, (u32)kstats->conns) ||
nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, (u32)kstats->inpkts) ||
nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, (u32)kstats->outpkts) ||
nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes) ||
nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes) ||
nla_put_u32(skb, IPVS_STATS_ATTR_CPS, (u32)kstats->cps) ||
nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, (u32)kstats->inpps) ||
nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, (u32)kstats->outpps) ||
nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, (u32)kstats->inbps) ||
nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, (u32)kstats->outbps))
goto nla_put_failure;
nla_nest_end(skb, nl_stats);
if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns) ||
nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts) ||
nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts) ||
nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes) ||
nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes) ||
nla_put_u32(skb, IPVS_STATS_ATTR_CPS, ustats.cps) ||
nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps) ||
nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps) ||
nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps) ||
nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps))
return 0;
nla_put_failure:
nla_nest_cancel(skb, nl_stats);
return -EMSGSIZE;
}
static int ip_vs_genl_fill_stats64(struct sk_buff *skb, int container_type,
struct ip_vs_kstats *kstats)
{
struct nlattr *nl_stats = nla_nest_start(skb, container_type);
if (!nl_stats)
return -EMSGSIZE;
if (nla_put_u64(skb, IPVS_STATS_ATTR_CONNS, kstats->conns) ||
nla_put_u64(skb, IPVS_STATS_ATTR_INPKTS, kstats->inpkts) ||
nla_put_u64(skb, IPVS_STATS_ATTR_OUTPKTS, kstats->outpkts) ||
nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes) ||
nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes) ||
nla_put_u64(skb, IPVS_STATS_ATTR_CPS, kstats->cps) ||
nla_put_u64(skb, IPVS_STATS_ATTR_INPPS, kstats->inpps) ||
nla_put_u64(skb, IPVS_STATS_ATTR_OUTPPS, kstats->outpps) ||
nla_put_u64(skb, IPVS_STATS_ATTR_INBPS, kstats->inbps) ||
nla_put_u64(skb, IPVS_STATS_ATTR_OUTBPS, kstats->outbps))
goto nla_put_failure;
nla_nest_end(skb, nl_stats);
@ -2835,6 +2891,7 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
struct nlattr *nl_service;
struct ip_vs_flags flags = { .flags = svc->flags,
.mask = ~0 };
struct ip_vs_kstats kstats;
nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
if (!nl_service)
@ -2860,7 +2917,10 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) ||
nla_put_be32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask))
goto nla_put_failure;
if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
ip_vs_copy_stats(&kstats, &svc->stats);
if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &kstats))
goto nla_put_failure;
if (ip_vs_genl_fill_stats64(skb, IPVS_SVC_ATTR_STATS64, &kstats))
goto nla_put_failure;
nla_nest_end(skb, nl_service);
@ -3032,6 +3092,7 @@ static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
{
struct nlattr *nl_dest;
struct ip_vs_kstats kstats;
nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
if (!nl_dest)
@ -3054,7 +3115,10 @@ static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
atomic_read(&dest->persistconns)) ||
nla_put_u16(skb, IPVS_DEST_ATTR_ADDR_FAMILY, dest->af))
goto nla_put_failure;
if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
ip_vs_copy_stats(&kstats, &dest->stats);
if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &kstats))
goto nla_put_failure;
if (ip_vs_genl_fill_stats64(skb, IPVS_DEST_ATTR_STATS64, &kstats))
goto nla_put_failure;
nla_nest_end(skb, nl_dest);
@ -3732,6 +3796,8 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
ipvs->sysctl_pmtu_disc = 1;
tbl[idx++].data = &ipvs->sysctl_pmtu_disc;
tbl[idx++].data = &ipvs->sysctl_backup_only;
ipvs->sysctl_conn_reuse_mode = 1;
tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode;
ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);

View file

@ -45,17 +45,19 @@
NOTES.
* The stored value for average bps is scaled by 2^5, so that maximal
rate is ~2.15Gbits/s, average pps and cps are scaled by 2^10.
* Average bps is scaled by 2^5, while average pps and cps are scaled by 2^10.
* A lot code is taken from net/sched/estimator.c
* Netlink users can see 64-bit values but sockopt users are restricted
to 32-bit values for conns, packets, bps, cps and pps.
* A lot of code is taken from net/core/gen_estimator.c
*/
/*
* Make a summary from each cpu
*/
static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
static void ip_vs_read_cpu_stats(struct ip_vs_kstats *sum,
struct ip_vs_cpu_stats __percpu *stats)
{
int i;
@ -64,27 +66,31 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
for_each_possible_cpu(i) {
struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i);
unsigned int start;
__u64 inbytes, outbytes;
u64 conns, inpkts, outpkts, inbytes, outbytes;
if (add) {
sum->conns += s->ustats.conns;
sum->inpkts += s->ustats.inpkts;
sum->outpkts += s->ustats.outpkts;
do {
start = u64_stats_fetch_begin(&s->syncp);
inbytes = s->ustats.inbytes;
outbytes = s->ustats.outbytes;
conns = s->cnt.conns;
inpkts = s->cnt.inpkts;
outpkts = s->cnt.outpkts;
inbytes = s->cnt.inbytes;
outbytes = s->cnt.outbytes;
} while (u64_stats_fetch_retry(&s->syncp, start));
sum->conns += conns;
sum->inpkts += inpkts;
sum->outpkts += outpkts;
sum->inbytes += inbytes;
sum->outbytes += outbytes;
} else {
add = true;
sum->conns = s->ustats.conns;
sum->inpkts = s->ustats.inpkts;
sum->outpkts = s->ustats.outpkts;
do {
start = u64_stats_fetch_begin(&s->syncp);
sum->inbytes = s->ustats.inbytes;
sum->outbytes = s->ustats.outbytes;
sum->conns = s->cnt.conns;
sum->inpkts = s->cnt.inpkts;
sum->outpkts = s->cnt.outpkts;
sum->inbytes = s->cnt.inbytes;
sum->outbytes = s->cnt.outbytes;
} while (u64_stats_fetch_retry(&s->syncp, start));
}
}
@ -95,10 +101,7 @@ static void estimation_timer(unsigned long arg)
{
struct ip_vs_estimator *e;
struct ip_vs_stats *s;
u32 n_conns;
u32 n_inpkts, n_outpkts;
u64 n_inbytes, n_outbytes;
u32 rate;
u64 rate;
struct net *net = (struct net *)arg;
struct netns_ipvs *ipvs;
@ -108,33 +111,29 @@ static void estimation_timer(unsigned long arg)
s = container_of(e, struct ip_vs_stats, est);
spin_lock(&s->lock);
ip_vs_read_cpu_stats(&s->ustats, s->cpustats);
n_conns = s->ustats.conns;
n_inpkts = s->ustats.inpkts;
n_outpkts = s->ustats.outpkts;
n_inbytes = s->ustats.inbytes;
n_outbytes = s->ustats.outbytes;
ip_vs_read_cpu_stats(&s->kstats, s->cpustats);
/* scaled by 2^10, but divided 2 seconds */
rate = (n_conns - e->last_conns) << 9;
e->last_conns = n_conns;
e->cps += ((long)rate - (long)e->cps) >> 2;
rate = (s->kstats.conns - e->last_conns) << 9;
e->last_conns = s->kstats.conns;
e->cps += ((s64)rate - (s64)e->cps) >> 2;
rate = (n_inpkts - e->last_inpkts) << 9;
e->last_inpkts = n_inpkts;
e->inpps += ((long)rate - (long)e->inpps) >> 2;
rate = (s->kstats.inpkts - e->last_inpkts) << 9;
e->last_inpkts = s->kstats.inpkts;
e->inpps += ((s64)rate - (s64)e->inpps) >> 2;
rate = (n_outpkts - e->last_outpkts) << 9;
e->last_outpkts = n_outpkts;
e->outpps += ((long)rate - (long)e->outpps) >> 2;
rate = (s->kstats.outpkts - e->last_outpkts) << 9;
e->last_outpkts = s->kstats.outpkts;
e->outpps += ((s64)rate - (s64)e->outpps) >> 2;
rate = (n_inbytes - e->last_inbytes) << 4;
e->last_inbytes = n_inbytes;
e->inbps += ((long)rate - (long)e->inbps) >> 2;
/* scaled by 2^5, but divided 2 seconds */
rate = (s->kstats.inbytes - e->last_inbytes) << 4;
e->last_inbytes = s->kstats.inbytes;
e->inbps += ((s64)rate - (s64)e->inbps) >> 2;
rate = (n_outbytes - e->last_outbytes) << 4;
e->last_outbytes = n_outbytes;
e->outbps += ((long)rate - (long)e->outbps) >> 2;
rate = (s->kstats.outbytes - e->last_outbytes) << 4;
e->last_outbytes = s->kstats.outbytes;
e->outbps += ((s64)rate - (s64)e->outbps) >> 2;
spin_unlock(&s->lock);
}
spin_unlock(&ipvs->est_lock);
@ -166,14 +165,14 @@ void ip_vs_stop_estimator(struct net *net, struct ip_vs_stats *stats)
void ip_vs_zero_estimator(struct ip_vs_stats *stats)
{
struct ip_vs_estimator *est = &stats->est;
struct ip_vs_stats_user *u = &stats->ustats;
struct ip_vs_kstats *k = &stats->kstats;
/* reset counters, caller must hold the stats->lock lock */
est->last_inbytes = u->inbytes;
est->last_outbytes = u->outbytes;
est->last_conns = u->conns;
est->last_inpkts = u->inpkts;
est->last_outpkts = u->outpkts;
est->last_inbytes = k->inbytes;
est->last_outbytes = k->outbytes;
est->last_conns = k->conns;
est->last_inpkts = k->inpkts;
est->last_outpkts = k->outpkts;
est->cps = 0;
est->inpps = 0;
est->outpps = 0;
@ -182,8 +181,7 @@ void ip_vs_zero_estimator(struct ip_vs_stats *stats)
}
/* Get decoded rates */
void ip_vs_read_estimator(struct ip_vs_stats_user *dst,
struct ip_vs_stats *stats)
void ip_vs_read_estimator(struct ip_vs_kstats *dst, struct ip_vs_stats *stats)
{
struct ip_vs_estimator *e = &stats->est;

View file

@ -845,10 +845,27 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
struct ip_vs_conn *cp;
struct netns_ipvs *ipvs = net_ipvs(net);
if (!(flags & IP_VS_CONN_F_TEMPLATE))
if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
cp = ip_vs_conn_in_get(param);
else
if (cp && ((cp->dport != dport) ||
!ip_vs_addr_equal(cp->daf, &cp->daddr, daddr))) {
if (!(flags & IP_VS_CONN_F_INACTIVE)) {
ip_vs_conn_expire_now(cp);
__ip_vs_conn_put(cp);
cp = NULL;
} else {
/* This is the expiration message for the
* connection that was already replaced, so we
* just ignore it.
*/
__ip_vs_conn_put(cp);
kfree(param->pe_data);
return;
}
}
} else {
cp = ip_vs_ct_in_get(param);
}
if (cp) {
/* Free pe_data */

View file

@ -20,6 +20,7 @@
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv6/ip6_tables.h>
#include <linux/netfilter_bridge/ebtables.h>
#include <linux/netfilter_arp/arp_tables.h>
#include <net/netfilter/nf_tables.h>
static int nft_compat_chain_validate_dependency(const char *tablename,
@ -42,6 +43,7 @@ union nft_entry {
struct ipt_entry e4;
struct ip6t_entry e6;
struct ebt_entry ebt;
struct arpt_entry arp;
};
static inline void
@ -140,6 +142,8 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par,
entry->ebt.ethproto = proto;
entry->ebt.invflags = inv ? EBT_IPROTO : 0;
break;
case NFPROTO_ARP:
break;
}
par->entryinfo = entry;
par->target = target;
@ -351,6 +355,8 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx,
entry->ebt.ethproto = proto;
entry->ebt.invflags = inv ? EBT_IPROTO : 0;
break;
case NFPROTO_ARP:
break;
}
par->entryinfo = entry;
par->match = match;
@ -537,6 +543,9 @@ nfnl_compat_get(struct sock *nfnl, struct sk_buff *skb,
case NFPROTO_BRIDGE:
fmt = "ebt_%s";
break;
case NFPROTO_ARP:
fmt = "arpt_%s";
break;
default:
pr_err("nft_compat: unsupported protocol %d\n",
nfmsg->nfgen_family);

View file

@ -193,7 +193,7 @@ set_match_v3(const struct sk_buff *skb, struct xt_action_param *par)
return ret;
if (!match_counter0(opt.ext.packets, &info->packets))
return 0;
return false;
return match_counter0(opt.ext.bytes, &info->bytes);
}
@ -239,7 +239,7 @@ set_match_v4(const struct sk_buff *skb, struct xt_action_param *par)
return ret;
if (!match_counter(opt.ext.packets, &info->packets))
return 0;
return false;
return match_counter(opt.ext.bytes, &info->bytes);
}