Merge branch 'TC-Introduce-qevents'

Petr Machata says:

====================
TC: Introduce qevents

The Spectrum hardware allows execution of one of several actions as a
result of queue management decisions: tail-dropping, early-dropping,
marking a packet, or passing a configured latency threshold or buffer
size. Such packets can be mirrored, trapped, or sampled.

Modeling the action to be taken as simply a TC action is very attractive,
but it is not obvious where to put these actions. At least with ECN marking
one could imagine a tree of qdiscs and classifiers that effectively
accomplishes this task, albeit in an impractically complex manner. But
there is just no way to match on dropped-ness of a packet, let alone
dropped-ness due to a particular reason.

To allow configuring user-defined actions as a result of inner workings of
a qdisc, this patch set introduces a concept of qevents. Those are attach
points for TC blocks, where filters can be put that are executed as the
packet hits well-defined points in the qdisc algorithms. The attached
blocks can be shared, in a manner similar to clsact ingress and egress
blocks, arbitrary classifiers with arbitrary actions can be put on them,
etc.

For example:

	red limit 500K avpkt 1K qevent early_drop block 10
	matchall action mirred egress mirror dev eth1

The central patch #2 introduces several helpers to allow easy and uniform
addition of qevents to qdiscs: initialization, destruction, qevent block
number change validation, and qevent handling, i.e. dispatch of the filters
attached to the block bound to a qevent.

Patch #1 adds root_lock argument to qdisc enqueue op. The problem this is
tackling is that if a qevent filter pushes packets to the same qdisc tree
that holds the qevent in the first place, attempt to take qdisc root lock
for the second time will lead to a deadlock. To solve the issue, qevent
handler needs to unlock and relock the root lock around the filter
processing. Passing root_lock around makes it possible to get the lock
where it is needed, and visibly so, such that it is obvious the lock will
be used when invoking a qevent.

The following two patches, #3 and #4, then add two qevents to the RED
qdisc: "early_drop" qevent fires when a packet is early-dropped; "mark"
qevent, when it is ECN-marked.

Patch #5 contains a selftest. I have mentioned this test when pushing the
RED ECN nodrop mode and said that "I have no confidence in its portability
to [...] different configurations". That still holds. The backlog and
packet size are tuned to make the test deterministic. But it is better than
nothing, and on the boxes that I ran it on it does work and shows that
qevents work the way they are supposed to, and that their addition has not
broken the other tested features.

This patch set does not deal with offloading. The idea there is that a
driver will be able to figure out that a given block is used in qevent
context by looking at binder type. A future patch-set will add a qdisc
pointer to struct flow_block_offload, which a driver will be able to
consult to glean the TC or other relevant attributes.

Changes from RFC to v1:
- Move a "q = qdisc_priv(sch)" from patch #3 to patch #4
- Fix deadlock caused by mirroring packet back to the same qdisc tree.
- Rename "tail" qevent to "tail_drop".
- Adapt to the new 100-column standard.
- Add a selftest
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2020-06-29 17:08:28 -07:00
commit 989d957a8b
40 changed files with 822 additions and 84 deletions

View file

@ -424,6 +424,8 @@ enum flow_block_binder_type {
FLOW_BLOCK_BINDER_TYPE_UNSPEC,
FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS,
FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS,
FLOW_BLOCK_BINDER_TYPE_RED_EARLY_DROP,
FLOW_BLOCK_BINDER_TYPE_RED_MARK,
};
struct flow_block {

View file

@ -32,6 +32,12 @@ struct tcf_block_ext_info {
u32 block_index;
};
struct tcf_qevent {
struct tcf_block *block;
struct tcf_block_ext_info info;
struct tcf_proto __rcu *filter_chain;
};
struct tcf_block_cb;
bool tcf_queue_work(struct rcu_work *rwork, work_func_t func);
@ -553,6 +559,49 @@ int tc_setup_cb_reoffload(struct tcf_block *block, struct tcf_proto *tp,
void *cb_priv, u32 *flags, unsigned int *in_hw_count);
unsigned int tcf_exts_num_actions(struct tcf_exts *exts);
#ifdef CONFIG_NET_CLS_ACT
int tcf_qevent_init(struct tcf_qevent *qe, struct Qdisc *sch,
enum flow_block_binder_type binder_type,
struct nlattr *block_index_attr,
struct netlink_ext_ack *extack);
void tcf_qevent_destroy(struct tcf_qevent *qe, struct Qdisc *sch);
int tcf_qevent_validate_change(struct tcf_qevent *qe, struct nlattr *block_index_attr,
struct netlink_ext_ack *extack);
struct sk_buff *tcf_qevent_handle(struct tcf_qevent *qe, struct Qdisc *sch, struct sk_buff *skb,
spinlock_t *root_lock, struct sk_buff **to_free, int *ret);
int tcf_qevent_dump(struct sk_buff *skb, int attr_name, struct tcf_qevent *qe);
#else
static inline int tcf_qevent_init(struct tcf_qevent *qe, struct Qdisc *sch,
enum flow_block_binder_type binder_type,
struct nlattr *block_index_attr,
struct netlink_ext_ack *extack)
{
return 0;
}
static inline void tcf_qevent_destroy(struct tcf_qevent *qe, struct Qdisc *sch)
{
}
static inline int tcf_qevent_validate_change(struct tcf_qevent *qe, struct nlattr *block_index_attr,
struct netlink_ext_ack *extack)
{
return 0;
}
static inline struct sk_buff *
tcf_qevent_handle(struct tcf_qevent *qe, struct Qdisc *sch, struct sk_buff *skb,
spinlock_t *root_lock, struct sk_buff **to_free, int *ret)
{
return skb;
}
static inline int tcf_qevent_dump(struct sk_buff *skb, int attr_name, struct tcf_qevent *qe)
{
return 0;
}
#endif
struct tc_cls_u32_knode {
struct tcf_exts *exts;
struct tcf_result *res;

View file

@ -57,6 +57,7 @@ struct qdisc_skb_head {
struct Qdisc {
int (*enqueue)(struct sk_buff *skb,
struct Qdisc *sch,
spinlock_t *root_lock,
struct sk_buff **to_free);
struct sk_buff * (*dequeue)(struct Qdisc *sch);
unsigned int flags;
@ -241,6 +242,7 @@ struct Qdisc_ops {
int (*enqueue)(struct sk_buff *skb,
struct Qdisc *sch,
spinlock_t *root_lock,
struct sk_buff **to_free);
struct sk_buff * (*dequeue)(struct Qdisc *);
struct sk_buff * (*peek)(struct Qdisc *);
@ -788,11 +790,11 @@ static inline void qdisc_calculate_pkt_len(struct sk_buff *skb,
#endif
}
static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
qdisc_calculate_pkt_len(skb, sch);
return sch->enqueue(skb, sch, to_free);
return sch->enqueue(skb, sch, root_lock, to_free);
}
static inline void _bstats_update(struct gnet_stats_basic_packed *bstats,

View file

@ -257,6 +257,8 @@ enum {
TCA_RED_STAB,
TCA_RED_MAX_P,
TCA_RED_FLAGS, /* bitfield32 */
TCA_RED_EARLY_DROP_BLOCK, /* u32 */
TCA_RED_MARK_BLOCK, /* u32 */
__TCA_RED_MAX,
};

View file

@ -3749,7 +3749,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
qdisc_calculate_pkt_len(skb, q);
if (q->flags & TCQ_F_NOLOCK) {
rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
rc = q->enqueue(skb, q, NULL, &to_free) & NET_XMIT_MASK;
qdisc_run(q);
if (unlikely(to_free))
@ -3792,7 +3792,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
qdisc_run_end(q);
rc = NET_XMIT_SUCCESS;
} else {
rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
rc = q->enqueue(skb, q, root_lock, &to_free) & NET_XMIT_MASK;
if (qdisc_run_begin(q)) {
if (unlikely(contended)) {
spin_unlock(&q->busylock);

View file

@ -3748,6 +3748,125 @@ unsigned int tcf_exts_num_actions(struct tcf_exts *exts)
}
EXPORT_SYMBOL(tcf_exts_num_actions);
#ifdef CONFIG_NET_CLS_ACT
static int tcf_qevent_parse_block_index(struct nlattr *block_index_attr,
u32 *p_block_index,
struct netlink_ext_ack *extack)
{
*p_block_index = nla_get_u32(block_index_attr);
if (!*p_block_index) {
NL_SET_ERR_MSG(extack, "Block number may not be zero");
return -EINVAL;
}
return 0;
}
int tcf_qevent_init(struct tcf_qevent *qe, struct Qdisc *sch,
enum flow_block_binder_type binder_type,
struct nlattr *block_index_attr,
struct netlink_ext_ack *extack)
{
u32 block_index;
int err;
if (!block_index_attr)
return 0;
err = tcf_qevent_parse_block_index(block_index_attr, &block_index, extack);
if (err)
return err;
if (!block_index)
return 0;
qe->info.binder_type = binder_type;
qe->info.chain_head_change = tcf_chain_head_change_dflt;
qe->info.chain_head_change_priv = &qe->filter_chain;
qe->info.block_index = block_index;
return tcf_block_get_ext(&qe->block, sch, &qe->info, extack);
}
EXPORT_SYMBOL(tcf_qevent_init);
void tcf_qevent_destroy(struct tcf_qevent *qe, struct Qdisc *sch)
{
if (qe->info.block_index)
tcf_block_put_ext(qe->block, sch, &qe->info);
}
EXPORT_SYMBOL(tcf_qevent_destroy);
int tcf_qevent_validate_change(struct tcf_qevent *qe, struct nlattr *block_index_attr,
struct netlink_ext_ack *extack)
{
u32 block_index;
int err;
if (!block_index_attr)
return 0;
err = tcf_qevent_parse_block_index(block_index_attr, &block_index, extack);
if (err)
return err;
/* Bounce newly-configured block or change in block. */
if (block_index != qe->info.block_index) {
NL_SET_ERR_MSG(extack, "Change of blocks is not supported");
return -EINVAL;
}
return 0;
}
EXPORT_SYMBOL(tcf_qevent_validate_change);
struct sk_buff *tcf_qevent_handle(struct tcf_qevent *qe, struct Qdisc *sch, struct sk_buff *skb,
spinlock_t *root_lock, struct sk_buff **to_free, int *ret)
{
struct tcf_result cl_res;
struct tcf_proto *fl;
if (!qe->info.block_index)
return skb;
fl = rcu_dereference_bh(qe->filter_chain);
if (root_lock)
spin_unlock(root_lock);
switch (tcf_classify(skb, fl, &cl_res, false)) {
case TC_ACT_SHOT:
qdisc_qstats_drop(sch);
__qdisc_drop(skb, to_free);
*ret = __NET_XMIT_BYPASS;
return NULL;
case TC_ACT_STOLEN:
case TC_ACT_QUEUED:
case TC_ACT_TRAP:
__qdisc_drop(skb, to_free);
*ret = __NET_XMIT_STOLEN;
return NULL;
case TC_ACT_REDIRECT:
skb_do_redirect(skb);
*ret = __NET_XMIT_STOLEN;
return NULL;
}
if (root_lock)
spin_lock(root_lock);
return skb;
}
EXPORT_SYMBOL(tcf_qevent_handle);
int tcf_qevent_dump(struct sk_buff *skb, int attr_name, struct tcf_qevent *qe)
{
if (!qe->info.block_index)
return 0;
return nla_put_u32(skb, attr_name, qe->info.block_index);
}
EXPORT_SYMBOL(tcf_qevent_dump);
#endif
static __net_init int tcf_net_init(struct net *net)
{
struct tcf_net *tn = net_generic(net, tcf_net_id);

View file

@ -374,7 +374,7 @@ static struct tcf_block *atm_tc_tcf_block(struct Qdisc *sch, unsigned long cl,
/* --------------------------- Qdisc operations ---------------------------- */
static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
struct atm_qdisc_data *p = qdisc_priv(sch);
@ -432,7 +432,7 @@ done:
#endif
}
ret = qdisc_enqueue(skb, flow->q, to_free);
ret = qdisc_enqueue(skb, flow->q, root_lock, to_free);
if (ret != NET_XMIT_SUCCESS) {
drop: __maybe_unused
if (net_xmit_drop_count(ret)) {

View file

@ -13,7 +13,7 @@
#include <linux/skbuff.h>
#include <net/pkt_sched.h>
static int blackhole_enqueue(struct sk_buff *skb, struct Qdisc *sch,
static int blackhole_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
qdisc_drop(skb, sch, to_free);

View file

@ -1687,7 +1687,7 @@ hash:
static void cake_reconfigure(struct Qdisc *sch);
static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
struct cake_sched_data *q = qdisc_priv(sch);

View file

@ -356,7 +356,7 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
}
static int
cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
struct cbq_sched_data *q = qdisc_priv(sch);
@ -373,7 +373,7 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
return ret;
}
ret = qdisc_enqueue(skb, cl->q, to_free);
ret = qdisc_enqueue(skb, cl->q, root_lock, to_free);
if (ret == NET_XMIT_SUCCESS) {
sch->q.qlen++;
cbq_mark_toplevel(q, cl);

View file

@ -77,7 +77,7 @@ struct cbs_sched_data {
s64 sendslope; /* in bytes/s */
s64 idleslope; /* in bytes/s */
struct qdisc_watchdog watchdog;
int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch,
int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free);
struct sk_buff *(*dequeue)(struct Qdisc *sch);
struct Qdisc *qdisc;
@ -85,13 +85,13 @@ struct cbs_sched_data {
};
static int cbs_child_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct Qdisc *child,
struct Qdisc *child, spinlock_t *root_lock,
struct sk_buff **to_free)
{
unsigned int len = qdisc_pkt_len(skb);
int err;
err = child->ops->enqueue(skb, child, to_free);
err = child->ops->enqueue(skb, child, root_lock, to_free);
if (err != NET_XMIT_SUCCESS)
return err;
@ -101,16 +101,16 @@ static int cbs_child_enqueue(struct sk_buff *skb, struct Qdisc *sch,
return NET_XMIT_SUCCESS;
}
static int cbs_enqueue_offload(struct sk_buff *skb, struct Qdisc *sch,
static int cbs_enqueue_offload(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
struct cbs_sched_data *q = qdisc_priv(sch);
struct Qdisc *qdisc = q->qdisc;
return cbs_child_enqueue(skb, sch, qdisc, to_free);
return cbs_child_enqueue(skb, sch, qdisc, root_lock, to_free);
}
static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch,
static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
struct cbs_sched_data *q = qdisc_priv(sch);
@ -124,15 +124,15 @@ static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch,
q->last = ktime_get_ns();
}
return cbs_child_enqueue(skb, sch, qdisc, to_free);
return cbs_child_enqueue(skb, sch, qdisc, root_lock, to_free);
}
static int cbs_enqueue(struct sk_buff *skb, struct Qdisc *sch,
static int cbs_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
struct cbs_sched_data *q = qdisc_priv(sch);
return q->enqueue(skb, sch, to_free);
return q->enqueue(skb, sch, root_lock, to_free);
}
/* timediff is in ns, slope is in bytes/s */

View file

@ -210,7 +210,7 @@ static bool choke_match_random(const struct choke_sched_data *q,
return choke_match_flow(oskb, nskb);
}
static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch,
static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
struct choke_sched_data *q = qdisc_priv(sch);

View file

@ -108,7 +108,7 @@ static struct sk_buff *codel_qdisc_dequeue(struct Qdisc *sch)
return skb;
}
static int codel_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
static int codel_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
struct codel_sched_data *q;

View file

@ -337,7 +337,7 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch,
return NULL;
}
static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch,
static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
unsigned int len = qdisc_pkt_len(skb);
@ -355,7 +355,7 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch,
}
first = !cl->qdisc->q.qlen;
err = qdisc_enqueue(skb, cl->qdisc, to_free);
err = qdisc_enqueue(skb, cl->qdisc, root_lock, to_free);
if (unlikely(err != NET_XMIT_SUCCESS)) {
if (net_xmit_drop_count(err)) {
cl->qstats.drops++;

View file

@ -198,7 +198,7 @@ static struct tcf_block *dsmark_tcf_block(struct Qdisc *sch, unsigned long cl,
/* --------------------------- Qdisc operations ---------------------------- */
static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch,
static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
unsigned int len = qdisc_pkt_len(skb);
@ -267,7 +267,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch,
}
}
err = qdisc_enqueue(skb, p->q, to_free);
err = qdisc_enqueue(skb, p->q, root_lock, to_free);
if (err != NET_XMIT_SUCCESS) {
if (net_xmit_drop_count(err))
qdisc_qstats_drop(sch);

View file

@ -160,7 +160,7 @@ static void report_sock_error(struct sk_buff *skb, u32 err, u8 code)
}
static int etf_enqueue_timesortedlist(struct sk_buff *nskb, struct Qdisc *sch,
struct sk_buff **to_free)
spinlock_t *root_lock, struct sk_buff **to_free)
{
struct etf_sched_data *q = qdisc_priv(sch);
struct rb_node **p = &q->head.rb_root.rb_node, *parent = NULL;

View file

@ -415,7 +415,7 @@ static struct ets_class *ets_classify(struct sk_buff *skb, struct Qdisc *sch,
return &q->classes[band];
}
static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
unsigned int len = qdisc_pkt_len(skb);
@ -433,7 +433,7 @@ static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
}
first = !cl->qdisc->q.qlen;
err = qdisc_enqueue(skb, cl->qdisc, to_free);
err = qdisc_enqueue(skb, cl->qdisc, root_lock, to_free);
if (unlikely(err != NET_XMIT_SUCCESS)) {
if (net_xmit_drop_count(err)) {
cl->qstats.drops++;

View file

@ -16,7 +16,7 @@
/* 1 band FIFO pseudo-"scheduler" */
static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch,
static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= sch->limit))
@ -25,7 +25,7 @@ static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch,
return qdisc_drop(skb, sch, to_free);
}
static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch,
static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
if (likely(sch->q.qlen < sch->limit))
@ -34,7 +34,7 @@ static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch,
return qdisc_drop(skb, sch, to_free);
}
static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch,
static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
unsigned int prev_backlog;

View file

@ -439,7 +439,7 @@ static bool fq_packet_beyond_horizon(const struct sk_buff *skb,
return unlikely((s64)skb->tstamp > (s64)(q->ktime_cache + q->horizon));
}
static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
struct fq_sched_data *q = qdisc_priv(sch);

View file

@ -181,7 +181,7 @@ static unsigned int fq_codel_drop(struct Qdisc *sch, unsigned int max_packets,
return idx;
}
static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch,
static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
struct fq_codel_sched_data *q = qdisc_priv(sch);

View file

@ -125,7 +125,7 @@ static inline void flow_queue_add(struct fq_pie_flow *flow,
skb->next = NULL;
}
static int fq_pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
static int fq_pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
struct fq_pie_sched_data *q = qdisc_priv(sch);

View file

@ -520,7 +520,7 @@ EXPORT_SYMBOL(netif_carrier_off);
cheaper.
*/
static int noop_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
static int noop_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, spinlock_t *root_lock,
struct sk_buff **to_free)
{
__qdisc_drop(skb, to_free);
@ -614,7 +614,7 @@ static inline struct skb_array *band2list(struct pfifo_fast_priv *priv,
return &priv->q[band];
}
static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, spinlock_t *root_lock,
struct sk_buff **to_free)
{
int band = prio2band[skb->priority & TC_PRIO_MAX];

View file

@ -161,7 +161,7 @@ static bool gred_per_vq_red_flags_used(struct gred_sched *table)
return false;
}
static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch,
static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
struct gred_sched_data *q = NULL;

View file

@ -1528,8 +1528,8 @@ hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb)
return -1;
}
static int
hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
static int hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
unsigned int len = qdisc_pkt_len(skb);
struct hfsc_class *cl;
@ -1545,7 +1545,7 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
}
first = !cl->qdisc->q.qlen;
err = qdisc_enqueue(skb, cl->qdisc, to_free);
err = qdisc_enqueue(skb, cl->qdisc, root_lock, to_free);
if (unlikely(err != NET_XMIT_SUCCESS)) {
if (net_xmit_drop_count(err)) {
cl->qstats.drops++;

View file

@ -368,7 +368,7 @@ static unsigned int hhf_drop(struct Qdisc *sch, struct sk_buff **to_free)
return bucket - q->buckets;
}
static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch,
static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
struct hhf_sched_data *q = qdisc_priv(sch);

View file

@ -576,7 +576,7 @@ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
cl->prio_activity = 0;
}
static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
int uninitialized_var(ret);
@ -599,7 +599,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
__qdisc_drop(skb, to_free);
return ret;
#endif
} else if ((ret = qdisc_enqueue(skb, cl->leaf.q,
} else if ((ret = qdisc_enqueue(skb, cl->leaf.q, root_lock,
to_free)) != NET_XMIT_SUCCESS) {
if (net_xmit_drop_count(ret)) {
qdisc_qstats_drop(sch);

View file

@ -57,7 +57,7 @@ multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
}
static int
multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
struct Qdisc *qdisc;
@ -74,7 +74,7 @@ multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
}
#endif
ret = qdisc_enqueue(skb, qdisc, to_free);
ret = qdisc_enqueue(skb, qdisc, root_lock, to_free);
if (ret == NET_XMIT_SUCCESS) {
sch->q.qlen++;
return NET_XMIT_SUCCESS;

View file

@ -431,7 +431,7 @@ static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch,
* NET_XMIT_DROP: queue length didn't change.
* NET_XMIT_SUCCESS: one skb was queued.
*/
static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
struct netem_sched_data *q = qdisc_priv(sch);
@ -480,7 +480,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
q->duplicate = 0;
rootq->enqueue(skb2, rootq, to_free);
rootq->enqueue(skb2, rootq, root_lock, to_free);
q->duplicate = dupsave;
rc_drop = NET_XMIT_SUCCESS;
}
@ -604,7 +604,7 @@ finish_segs:
skb_mark_not_on_list(segs);
qdisc_skb_cb(segs)->pkt_len = segs->len;
last_len = segs->len;
rc = qdisc_enqueue(segs, sch, to_free);
rc = qdisc_enqueue(segs, sch, root_lock, to_free);
if (rc != NET_XMIT_SUCCESS) {
if (net_xmit_drop_count(rc))
qdisc_qstats_drop(sch);
@ -720,7 +720,7 @@ deliver:
struct sk_buff *to_free = NULL;
int err;
err = qdisc_enqueue(skb, q->qdisc, &to_free);
err = qdisc_enqueue(skb, q->qdisc, NULL, &to_free);
kfree_skb_list(to_free);
if (err != NET_XMIT_SUCCESS &&
net_xmit_drop_count(err)) {

View file

@ -82,7 +82,7 @@ bool pie_drop_early(struct Qdisc *sch, struct pie_params *params,
}
EXPORT_SYMBOL_GPL(pie_drop_early);
static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
struct pie_sched_data *q = qdisc_priv(sch);

View file

@ -84,7 +84,7 @@ struct plug_sched_data {
u32 pkts_to_release;
};
static int plug_enqueue(struct sk_buff *skb, struct Qdisc *sch,
static int plug_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
struct plug_sched_data *q = qdisc_priv(sch);

View file

@ -65,8 +65,8 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
return q->queues[band];
}
static int
prio_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
static int prio_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
unsigned int len = qdisc_pkt_len(skb);
struct Qdisc *qdisc;
@ -83,7 +83,7 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
}
#endif
ret = qdisc_enqueue(skb, qdisc, to_free);
ret = qdisc_enqueue(skb, qdisc, root_lock, to_free);
if (ret == NET_XMIT_SUCCESS) {
sch->qstats.backlog += len;
sch->q.qlen++;

View file

@ -1194,7 +1194,7 @@ static struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *q)
return agg;
}
static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
unsigned int len = qdisc_pkt_len(skb), gso_segs;
@ -1225,7 +1225,7 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
gso_segs = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1;
first = !cl->qdisc->q.qlen;
err = qdisc_enqueue(skb, cl->qdisc, to_free);
err = qdisc_enqueue(skb, cl->qdisc, root_lock, to_free);
if (unlikely(err != NET_XMIT_SUCCESS)) {
pr_debug("qfq_enqueue: enqueue failed %d\n", err);
if (net_xmit_drop_count(err)) {

View file

@ -46,6 +46,8 @@ struct red_sched_data {
struct red_vars vars;
struct red_stats stats;
struct Qdisc *qdisc;
struct tcf_qevent qe_early_drop;
struct tcf_qevent qe_mark;
};
#define TC_RED_SUPPORTED_FLAGS (TC_RED_HISTORIC_FLAGS | TC_RED_NODROP)
@ -65,7 +67,7 @@ static int red_use_nodrop(struct red_sched_data *q)
return q->flags & TC_RED_NODROP;
}
static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
struct red_sched_data *q = qdisc_priv(sch);
@ -92,6 +94,9 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
if (INET_ECN_set_ce(skb)) {
q->stats.prob_mark++;
skb = tcf_qevent_handle(&q->qe_mark, sch, skb, root_lock, to_free, &ret);
if (!skb)
return NET_XMIT_CN | ret;
} else if (!red_use_nodrop(q)) {
q->stats.prob_drop++;
goto congestion_drop;
@ -109,6 +114,9 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
if (INET_ECN_set_ce(skb)) {
q->stats.forced_mark++;
skb = tcf_qevent_handle(&q->qe_mark, sch, skb, root_lock, to_free, &ret);
if (!skb)
return NET_XMIT_CN | ret;
} else if (!red_use_nodrop(q)) {
q->stats.forced_drop++;
goto congestion_drop;
@ -118,7 +126,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
break;
}
ret = qdisc_enqueue(skb, child, to_free);
ret = qdisc_enqueue(skb, child, root_lock, to_free);
if (likely(ret == NET_XMIT_SUCCESS)) {
qdisc_qstats_backlog_inc(sch, skb);
sch->q.qlen++;
@ -129,6 +137,10 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
return ret;
congestion_drop:
skb = tcf_qevent_handle(&q->qe_early_drop, sch, skb, root_lock, to_free, &ret);
if (!skb)
return NET_XMIT_CN | ret;
qdisc_drop(skb, sch, to_free);
return NET_XMIT_CN;
}
@ -202,6 +214,8 @@ static void red_destroy(struct Qdisc *sch)
{
struct red_sched_data *q = qdisc_priv(sch);
tcf_qevent_destroy(&q->qe_mark, sch);
tcf_qevent_destroy(&q->qe_early_drop, sch);
del_timer_sync(&q->adapt_timer);
red_offload(sch, false);
qdisc_put(q->qdisc);
@ -213,14 +227,15 @@ static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
[TCA_RED_STAB] = { .len = RED_STAB_SIZE },
[TCA_RED_MAX_P] = { .type = NLA_U32 },
[TCA_RED_FLAGS] = NLA_POLICY_BITFIELD32(TC_RED_SUPPORTED_FLAGS),
[TCA_RED_EARLY_DROP_BLOCK] = { .type = NLA_U32 },
[TCA_RED_MARK_BLOCK] = { .type = NLA_U32 },
};
static int red_change(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
static int __red_change(struct Qdisc *sch, struct nlattr **tb,
struct netlink_ext_ack *extack)
{
struct Qdisc *old_child = NULL, *child = NULL;
struct red_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_RED_MAX + 1];
struct nla_bitfield32 flags_bf;
struct tc_red_qopt *ctl;
unsigned char userbits;
@ -228,14 +243,6 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt,
int err;
u32 max_P;
if (opt == NULL)
return -EINVAL;
err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
NULL);
if (err < 0)
return err;
if (tb[TCA_RED_PARMS] == NULL ||
tb[TCA_RED_STAB] == NULL)
return -EINVAL;
@ -323,11 +330,74 @@ static int red_init(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
struct red_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_RED_MAX + 1];
int err;
if (!opt)
return -EINVAL;
err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
extack);
if (err < 0)
return err;
q->qdisc = &noop_qdisc;
q->sch = sch;
timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
return red_change(sch, opt, extack);
err = __red_change(sch, tb, extack);
if (err)
return err;
err = tcf_qevent_init(&q->qe_early_drop, sch,
FLOW_BLOCK_BINDER_TYPE_RED_EARLY_DROP,
tb[TCA_RED_EARLY_DROP_BLOCK], extack);
if (err)
goto err_early_drop_init;
err = tcf_qevent_init(&q->qe_mark, sch,
FLOW_BLOCK_BINDER_TYPE_RED_MARK,
tb[TCA_RED_MARK_BLOCK], extack);
if (err)
goto err_mark_init;
return 0;
err_mark_init:
tcf_qevent_destroy(&q->qe_early_drop, sch);
err_early_drop_init:
del_timer_sync(&q->adapt_timer);
red_offload(sch, false);
qdisc_put(q->qdisc);
return err;
}
static int red_change(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
struct red_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_RED_MAX + 1];
int err;
if (!opt)
return -EINVAL;
err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
extack);
if (err < 0)
return err;
err = tcf_qevent_validate_change(&q->qe_early_drop,
tb[TCA_RED_EARLY_DROP_BLOCK], extack);
if (err)
return err;
err = tcf_qevent_validate_change(&q->qe_mark,
tb[TCA_RED_MARK_BLOCK], extack);
if (err)
return err;
return __red_change(sch, tb, extack);
}
static int red_dump_offload_stats(struct Qdisc *sch)
@ -371,7 +441,9 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P) ||
nla_put_bitfield32(skb, TCA_RED_FLAGS,
q->flags, TC_RED_SUPPORTED_FLAGS))
q->flags, TC_RED_SUPPORTED_FLAGS) ||
tcf_qevent_dump(skb, TCA_RED_MARK_BLOCK, &q->qe_mark) ||
tcf_qevent_dump(skb, TCA_RED_EARLY_DROP_BLOCK, &q->qe_early_drop))
goto nla_put_failure;
return nla_nest_end(skb, opts);

View file

@ -276,7 +276,7 @@ static bool sfb_classify(struct sk_buff *skb, struct tcf_proto *fl,
return false;
}
static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
@ -399,7 +399,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
}
enqueue:
ret = qdisc_enqueue(skb, child, to_free);
ret = qdisc_enqueue(skb, child, root_lock, to_free);
if (likely(ret == NET_XMIT_SUCCESS)) {
qdisc_qstats_backlog_inc(sch, skb);
sch->q.qlen++;

View file

@ -343,7 +343,7 @@ static int sfq_headdrop(const struct sfq_sched_data *q)
}
static int
sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free)
{
struct sfq_sched_data *q = qdisc_priv(sch);
unsigned int hash, dropped;

View file

@ -65,7 +65,7 @@ static u16 calc_new_low_prio(const struct skbprio_sched_data *q)
return SKBPRIO_MAX_PRIORITY - 1;
}
static int skbprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
static int skbprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
const unsigned int max_priority = SKBPRIO_MAX_PRIORITY - 1;

View file

@ -410,7 +410,7 @@ done:
return txtime;
}
static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
struct taprio_sched *q = qdisc_priv(sch);
@ -435,7 +435,7 @@ static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
qdisc_qstats_backlog_inc(sch, skb);
sch->q.qlen++;
return qdisc_enqueue(skb, child, to_free);
return qdisc_enqueue(skb, child, root_lock, to_free);
}
static struct sk_buff *taprio_peek_soft(struct Qdisc *sch)

View file

@ -187,7 +187,7 @@ static int tbf_offload_dump(struct Qdisc *sch)
/* GSO packet is too big, segment it so that tbf can transmit
* each segment in time
*/
static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch,
static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
struct tbf_sched_data *q = qdisc_priv(sch);
@ -206,7 +206,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch,
skb_mark_not_on_list(segs);
qdisc_skb_cb(segs)->pkt_len = segs->len;
len += segs->len;
ret = qdisc_enqueue(segs, q->qdisc, to_free);
ret = qdisc_enqueue(segs, q->qdisc, root_lock, to_free);
if (ret != NET_XMIT_SUCCESS) {
if (net_xmit_drop_count(ret))
qdisc_qstats_drop(sch);
@ -221,7 +221,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch,
return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
}
static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch,
static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
struct tbf_sched_data *q = qdisc_priv(sch);
@ -231,10 +231,10 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch,
if (qdisc_pkt_len(skb) > q->max_size) {
if (skb_is_gso(skb) &&
skb_gso_validate_mac_len(skb, q->max_size))
return tbf_segment(skb, sch, to_free);
return tbf_segment(skb, sch, root_lock, to_free);
return qdisc_drop(skb, sch, to_free);
}
ret = qdisc_enqueue(skb, q->qdisc, to_free);
ret = qdisc_enqueue(skb, q->qdisc, root_lock, to_free);
if (ret != NET_XMIT_SUCCESS) {
if (net_xmit_drop_count(ret))
qdisc_qstats_drop(sch);

View file

@ -72,8 +72,8 @@ struct teql_sched_data {
/* "teql*" qdisc routines */
static int
teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
static int teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
{
struct net_device *dev = qdisc_dev(sch);
struct teql_sched_data *q = qdisc_priv(sch);

View file

@ -0,0 +1,492 @@
# SPDX-License-Identifier: GPL-2.0
# This test sends one stream of traffic from H1 through a TBF shaper, to a RED
# within TBF shaper on $swp3. The two shapers have the same configuration, and
# thus the resulting stream should fill all available bandwidth on the latter
# shaper. A second stream is sent from H2 also via $swp3, and used to inject
# additional traffic. Since all available bandwidth is taken, this traffic has
# to go to backlog.
#
# +--------------------------+ +--------------------------+
# | H1 | | H2 |
# | + $h1 | | + $h2 |
# | | 192.0.2.1/28 | | | 192.0.2.2/28 |
# | | TBF 10Mbps | | | |
# +-----|--------------------+ +-----|--------------------+
# | |
# +-----|------------------------------------------------|--------------------+
# | SW | | |
# | +--|------------------------------------------------|----------------+ |
# | | + $swp1 + $swp2 | |
# | | BR | |
# | | | |
# | | + $swp3 | |
# | | | TBF 10Mbps / RED | |
# | +--------------------------------|-----------------------------------+ |
# | | |
# +-----------------------------------|---------------------------------------+
# |
# +-----|--------------------+
# | H3 | |
# | + $h1 |
# | 192.0.2.3/28 |
# | |
# +--------------------------+
ALL_TESTS="
ping_ipv4
ecn_test
ecn_nodrop_test
red_test
red_qevent_test
ecn_qevent_test
"
NUM_NETIFS=6
CHECK_TC="yes"
source lib.sh
BACKLOG=30000
PKTSZ=1400
h1_create()
{
simple_if_init $h1 192.0.2.1/28
mtu_set $h1 10000
tc qdisc replace dev $h1 root handle 1: tbf \
rate 10Mbit burst 10K limit 1M
}
h1_destroy()
{
tc qdisc del dev $h1 root
mtu_restore $h1
simple_if_fini $h1 192.0.2.1/28
}
h2_create()
{
simple_if_init $h2 192.0.2.2/28
mtu_set $h2 10000
}
h2_destroy()
{
mtu_restore $h2
simple_if_fini $h2 192.0.2.2/28
}
h3_create()
{
simple_if_init $h3 192.0.2.3/28
mtu_set $h3 10000
}
h3_destroy()
{
mtu_restore $h3
simple_if_fini $h3 192.0.2.3/28
}
switch_create()
{
ip link add dev br up type bridge
ip link set dev $swp1 up master br
ip link set dev $swp2 up master br
ip link set dev $swp3 up master br
mtu_set $swp1 10000
mtu_set $swp2 10000
mtu_set $swp3 10000
tc qdisc replace dev $swp3 root handle 1: tbf \
rate 10Mbit burst 10K limit 1M
ip link add name _drop_test up type dummy
}
switch_destroy()
{
ip link del dev _drop_test
tc qdisc del dev $swp3 root
mtu_restore $h3
mtu_restore $h2
mtu_restore $h1
ip link set dev $swp3 down nomaster
ip link set dev $swp2 down nomaster
ip link set dev $swp1 down nomaster
ip link del dev br
}
setup_prepare()
{
h1=${NETIFS[p1]}
swp1=${NETIFS[p2]}
h2=${NETIFS[p3]}
swp2=${NETIFS[p4]}
swp3=${NETIFS[p5]}
h3=${NETIFS[p6]}
h3_mac=$(mac_get $h3)
vrf_prepare
h1_create
h2_create
h3_create
switch_create
}
cleanup()
{
pre_cleanup
switch_destroy
h3_destroy
h2_destroy
h1_destroy
vrf_cleanup
}
ping_ipv4()
{
ping_test $h1 192.0.2.3 " from host 1"
ping_test $h2 192.0.2.3 " from host 2"
}
get_qdisc_backlog()
{
qdisc_stats_get $swp3 11: .backlog
}
get_nmarked()
{
qdisc_stats_get $swp3 11: .marked
}
get_qdisc_npackets()
{
qdisc_stats_get $swp3 11: .packets
}
get_nmirrored()
{
link_stats_get _drop_test tx packets
}
send_packets()
{
local proto=$1; shift
local pkts=$1; shift
$MZ $h2 -p $PKTSZ -a own -b $h3_mac -A 192.0.2.2 -B 192.0.2.3 -t $proto -q -c $pkts "$@"
}
# This sends traffic in an attempt to build a backlog of $size. Returns 0 on
# success. After 10 failed attempts it bails out and returns 1. It dumps the
# backlog size to stdout.
build_backlog()
{
local size=$1; shift
local proto=$1; shift
local i=0
while :; do
local cur=$(get_qdisc_backlog)
local diff=$((size - cur))
local pkts=$(((diff + PKTSZ - 1) / PKTSZ))
if ((cur >= size)); then
echo $cur
return 0
elif ((i++ > 10)); then
echo $cur
return 1
fi
send_packets $proto $pkts "$@"
sleep 1
done
}
check_marking()
{
local cond=$1; shift
local npackets_0=$(get_qdisc_npackets)
local nmarked_0=$(get_nmarked)
sleep 5
local npackets_1=$(get_qdisc_npackets)
local nmarked_1=$(get_nmarked)
local nmarked_d=$((nmarked_1 - nmarked_0))
local npackets_d=$((npackets_1 - npackets_0))
local pct=$((100 * nmarked_d / npackets_d))
echo $pct
((pct $cond))
}
check_mirroring()
{
local cond=$1; shift
local npackets_0=$(get_qdisc_npackets)
local nmirrored_0=$(get_nmirrored)
sleep 5
local npackets_1=$(get_qdisc_npackets)
local nmirrored_1=$(get_nmirrored)
local nmirrored_d=$((nmirrored_1 - nmirrored_0))
local npackets_d=$((npackets_1 - npackets_0))
local pct=$((100 * nmirrored_d / npackets_d))
echo $pct
((pct $cond))
}
ecn_test_common()
{
local name=$1; shift
local limit=$1; shift
local backlog
local pct
# Build the below-the-limit backlog using UDP. We could use TCP just
# fine, but this way we get a proof that UDP is accepted when queue
# length is below the limit. The main stream is using TCP, and if the
# limit is misconfigured, we would see this traffic being ECN marked.
RET=0
backlog=$(build_backlog $((2 * limit / 3)) udp)
check_err $? "Could not build the requested backlog"
pct=$(check_marking "== 0")
check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
log_test "$name backlog < limit"
# Now push TCP, because non-TCP traffic would be early-dropped after the
# backlog crosses the limit, and we want to make sure that the backlog
# is above the limit.
RET=0
backlog=$(build_backlog $((3 * limit / 2)) tcp tos=0x01)
check_err $? "Could not build the requested backlog"
pct=$(check_marking ">= 95")
check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected >= 95."
log_test "$name backlog > limit"
}
do_ecn_test()
{
local limit=$1; shift
local name=ECN
$MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \
-a own -b $h3_mac -t tcp -q tos=0x01 &
sleep 1
ecn_test_common "$name" $limit
# Up there we saw that UDP gets accepted when backlog is below the
# limit. Now that it is above, it should all get dropped, and backlog
# building should fail.
RET=0
build_backlog $((2 * limit)) udp >/dev/null
check_fail $? "UDP traffic went into backlog instead of being early-dropped"
log_test "$name backlog > limit: UDP early-dropped"
stop_traffic
sleep 1
}
do_ecn_nodrop_test()
{
local limit=$1; shift
local name="ECN nodrop"
$MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \
-a own -b $h3_mac -t tcp -q tos=0x01 &
sleep 1
ecn_test_common "$name" $limit
# Up there we saw that UDP gets accepted when backlog is below the
# limit. Now that it is above, in nodrop mode, make sure it goes to
# backlog as well.
RET=0
build_backlog $((2 * limit)) udp >/dev/null
check_err $? "UDP traffic was early-dropped instead of getting into backlog"
log_test "$name backlog > limit: UDP not dropped"
stop_traffic
sleep 1
}
do_red_test()
{
local limit=$1; shift
local backlog
local pct
# Use ECN-capable TCP to verify there's no marking even though the queue
# is above limit.
$MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \
-a own -b $h3_mac -t tcp -q tos=0x01 &
# Pushing below the queue limit should work.
RET=0
backlog=$(build_backlog $((2 * limit / 3)) tcp tos=0x01)
check_err $? "Could not build the requested backlog"
pct=$(check_marking "== 0")
check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
log_test "RED backlog < limit"
# Pushing above should not.
RET=0
backlog=$(build_backlog $((3 * limit / 2)) tcp tos=0x01)
check_fail $? "Traffic went into backlog instead of being early-dropped"
pct=$(check_marking "== 0")
check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
log_test "RED backlog > limit"
stop_traffic
sleep 1
}
do_red_qevent_test()
{
local limit=$1; shift
local backlog
local base
local now
local pct
RET=0
$MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \
-a own -b $h3_mac -t udp -q &
sleep 1
tc filter add block 10 pref 1234 handle 102 matchall skip_hw \
action mirred egress mirror dev _drop_test
# Push to the queue until it's at the limit. The configured limit is
# rounded by the qdisc, so this is the best we can do to get to the real
# limit.
build_backlog $((3 * limit / 2)) udp >/dev/null
base=$(get_nmirrored)
send_packets udp 100
sleep 1
now=$(get_nmirrored)
((now >= base + 100))
check_err $? "Dropped packets not observed: 100 expected, $((now - base)) seen"
tc filter del block 10 pref 1234 handle 102 matchall
base=$(get_nmirrored)
send_packets udp 100
sleep 1
now=$(get_nmirrored)
((now == base))
check_err $? "Dropped packets still observed: 0 expected, $((now - base)) seen"
log_test "RED early_dropped packets mirrored"
stop_traffic
sleep 1
}
do_ecn_qevent_test()
{
local limit=$1; shift
local name=ECN
RET=0
$MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \
-a own -b $h3_mac -t tcp -q tos=0x01 &
sleep 1
tc filter add block 10 pref 1234 handle 102 matchall skip_hw \
action mirred egress mirror dev _drop_test
backlog=$(build_backlog $((2 * limit / 3)) tcp tos=0x01)
check_err $? "Could not build the requested backlog"
pct=$(check_mirroring "== 0")
check_err $? "backlog $backlog / $limit Got $pct% mirrored packets, expected == 0."
backlog=$(build_backlog $((3 * limit / 2)) tcp tos=0x01)
check_err $? "Could not build the requested backlog"
pct=$(check_mirroring ">= 95")
check_err $? "backlog $backlog / $limit Got $pct% mirrored packets, expected >= 95."
tc filter del block 10 pref 1234 handle 102 matchall
log_test "ECN marked packets mirrored"
stop_traffic
sleep 1
}
install_qdisc()
{
local -a args=("$@")
tc qdisc replace dev $swp3 parent 1:1 handle 11: red \
limit 1M avpkt $PKTSZ probability 1 \
min $BACKLOG max $((BACKLOG + 1)) burst 38 "${args[@]}"
sleep 1
}
uninstall_qdisc()
{
tc qdisc del dev $swp3 parent 1:1
}
ecn_test()
{
install_qdisc ecn
do_ecn_test $BACKLOG
uninstall_qdisc
}
ecn_nodrop_test()
{
install_qdisc ecn nodrop
do_ecn_nodrop_test $BACKLOG
uninstall_qdisc
}
red_test()
{
install_qdisc
do_red_test $BACKLOG
uninstall_qdisc
}
red_qevent_test()
{
install_qdisc qevent early_drop block 10
do_red_qevent_test $BACKLOG
uninstall_qdisc
}
ecn_qevent_test()
{
install_qdisc ecn qevent mark block 10
do_ecn_qevent_test $BACKLOG
uninstall_qdisc
}
trap cleanup EXIT
setup_prepare
setup_wait
tests_run
exit $EXIT_STATUS