mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	Current implementation of qdisc_destroy() decrements Qdisc reference counter and only actually destroy Qdisc if reference counter value reached zero. Rename qdisc_destroy() to qdisc_put() in order for it to better describe the way in which this function currently implemented and used. Extract code that deallocates Qdisc into new private qdisc_destroy() function. It is intended to be shared between regular qdisc_put() and its unlocked version that is introduced in next patch in this series. Signed-off-by: Vlad Buslov <vladbu@mellanox.com> Acked-by: Jiri Pirko <jiri@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
		
			
				
	
	
		
			452 lines
		
	
	
	
		
			10 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			452 lines
		
	
	
	
		
			10 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/*
 | 
						|
 * net/sched/sch_red.c	Random Early Detection queue.
 | 
						|
 *
 | 
						|
 *		This program is free software; you can redistribute it and/or
 | 
						|
 *		modify it under the terms of the GNU General Public License
 | 
						|
 *		as published by the Free Software Foundation; either version
 | 
						|
 *		2 of the License, or (at your option) any later version.
 | 
						|
 *
 | 
						|
 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 | 
						|
 *
 | 
						|
 * Changes:
 | 
						|
 * J Hadi Salim 980914:	computation fixes
 | 
						|
 * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
 | 
						|
 * J Hadi Salim 980816:  ECN support
 | 
						|
 */
 | 
						|
 | 
						|
#include <linux/module.h>
 | 
						|
#include <linux/types.h>
 | 
						|
#include <linux/kernel.h>
 | 
						|
#include <linux/skbuff.h>
 | 
						|
#include <net/pkt_sched.h>
 | 
						|
#include <net/pkt_cls.h>
 | 
						|
#include <net/inet_ecn.h>
 | 
						|
#include <net/red.h>
 | 
						|
 | 
						|
 | 
						|
/*	Parameters, settable by user:
 | 
						|
	-----------------------------
 | 
						|
 | 
						|
	limit		- bytes (must be > qth_max + burst)
 | 
						|
 | 
						|
	Hard limit on queue length, should be chosen >qth_max
 | 
						|
	to allow packet bursts. This parameter does not
 | 
						|
	affect the algorithms behaviour and can be chosen
 | 
						|
	arbitrarily high (well, less than ram size)
 | 
						|
	Really, this limit will never be reached
 | 
						|
	if RED works correctly.
 | 
						|
 */
 | 
						|
 | 
						|
struct red_sched_data {
 | 
						|
	u32			limit;		/* HARD maximal queue length */
 | 
						|
	unsigned char		flags;
 | 
						|
	struct timer_list	adapt_timer;
 | 
						|
	struct Qdisc		*sch;
 | 
						|
	struct red_parms	parms;
 | 
						|
	struct red_vars		vars;
 | 
						|
	struct red_stats	stats;
 | 
						|
	struct Qdisc		*qdisc;
 | 
						|
};
 | 
						|
 | 
						|
static inline int red_use_ecn(struct red_sched_data *q)
 | 
						|
{
 | 
						|
	return q->flags & TC_RED_ECN;
 | 
						|
}
 | 
						|
 | 
						|
static inline int red_use_harddrop(struct red_sched_data *q)
 | 
						|
{
 | 
						|
	return q->flags & TC_RED_HARDDROP;
 | 
						|
}
 | 
						|
 | 
						|
static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 | 
						|
		       struct sk_buff **to_free)
 | 
						|
{
 | 
						|
	struct red_sched_data *q = qdisc_priv(sch);
 | 
						|
	struct Qdisc *child = q->qdisc;
 | 
						|
	int ret;
 | 
						|
 | 
						|
	q->vars.qavg = red_calc_qavg(&q->parms,
 | 
						|
				     &q->vars,
 | 
						|
				     child->qstats.backlog);
 | 
						|
 | 
						|
	if (red_is_idling(&q->vars))
 | 
						|
		red_end_of_idle_period(&q->vars);
 | 
						|
 | 
						|
	switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
 | 
						|
	case RED_DONT_MARK:
 | 
						|
		break;
 | 
						|
 | 
						|
	case RED_PROB_MARK:
 | 
						|
		qdisc_qstats_overlimit(sch);
 | 
						|
		if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
 | 
						|
			q->stats.prob_drop++;
 | 
						|
			goto congestion_drop;
 | 
						|
		}
 | 
						|
 | 
						|
		q->stats.prob_mark++;
 | 
						|
		break;
 | 
						|
 | 
						|
	case RED_HARD_MARK:
 | 
						|
		qdisc_qstats_overlimit(sch);
 | 
						|
		if (red_use_harddrop(q) || !red_use_ecn(q) ||
 | 
						|
		    !INET_ECN_set_ce(skb)) {
 | 
						|
			q->stats.forced_drop++;
 | 
						|
			goto congestion_drop;
 | 
						|
		}
 | 
						|
 | 
						|
		q->stats.forced_mark++;
 | 
						|
		break;
 | 
						|
	}
 | 
						|
 | 
						|
	ret = qdisc_enqueue(skb, child, to_free);
 | 
						|
	if (likely(ret == NET_XMIT_SUCCESS)) {
 | 
						|
		qdisc_qstats_backlog_inc(sch, skb);
 | 
						|
		sch->q.qlen++;
 | 
						|
	} else if (net_xmit_drop_count(ret)) {
 | 
						|
		q->stats.pdrop++;
 | 
						|
		qdisc_qstats_drop(sch);
 | 
						|
	}
 | 
						|
	return ret;
 | 
						|
 | 
						|
congestion_drop:
 | 
						|
	qdisc_drop(skb, sch, to_free);
 | 
						|
	return NET_XMIT_CN;
 | 
						|
}
 | 
						|
 | 
						|
static struct sk_buff *red_dequeue(struct Qdisc *sch)
 | 
						|
{
 | 
						|
	struct sk_buff *skb;
 | 
						|
	struct red_sched_data *q = qdisc_priv(sch);
 | 
						|
	struct Qdisc *child = q->qdisc;
 | 
						|
 | 
						|
	skb = child->dequeue(child);
 | 
						|
	if (skb) {
 | 
						|
		qdisc_bstats_update(sch, skb);
 | 
						|
		qdisc_qstats_backlog_dec(sch, skb);
 | 
						|
		sch->q.qlen--;
 | 
						|
	} else {
 | 
						|
		if (!red_is_idling(&q->vars))
 | 
						|
			red_start_of_idle_period(&q->vars);
 | 
						|
	}
 | 
						|
	return skb;
 | 
						|
}
 | 
						|
 | 
						|
static struct sk_buff *red_peek(struct Qdisc *sch)
 | 
						|
{
 | 
						|
	struct red_sched_data *q = qdisc_priv(sch);
 | 
						|
	struct Qdisc *child = q->qdisc;
 | 
						|
 | 
						|
	return child->ops->peek(child);
 | 
						|
}
 | 
						|
 | 
						|
static void red_reset(struct Qdisc *sch)
 | 
						|
{
 | 
						|
	struct red_sched_data *q = qdisc_priv(sch);
 | 
						|
 | 
						|
	qdisc_reset(q->qdisc);
 | 
						|
	sch->qstats.backlog = 0;
 | 
						|
	sch->q.qlen = 0;
 | 
						|
	red_restart(&q->vars);
 | 
						|
}
 | 
						|
 | 
						|
static int red_offload(struct Qdisc *sch, bool enable)
 | 
						|
{
 | 
						|
	struct red_sched_data *q = qdisc_priv(sch);
 | 
						|
	struct net_device *dev = qdisc_dev(sch);
 | 
						|
	struct tc_red_qopt_offload opt = {
 | 
						|
		.handle = sch->handle,
 | 
						|
		.parent = sch->parent,
 | 
						|
	};
 | 
						|
 | 
						|
	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
 | 
						|
		return -EOPNOTSUPP;
 | 
						|
 | 
						|
	if (enable) {
 | 
						|
		opt.command = TC_RED_REPLACE;
 | 
						|
		opt.set.min = q->parms.qth_min >> q->parms.Wlog;
 | 
						|
		opt.set.max = q->parms.qth_max >> q->parms.Wlog;
 | 
						|
		opt.set.probability = q->parms.max_P;
 | 
						|
		opt.set.is_ecn = red_use_ecn(q);
 | 
						|
		opt.set.qstats = &sch->qstats;
 | 
						|
	} else {
 | 
						|
		opt.command = TC_RED_DESTROY;
 | 
						|
	}
 | 
						|
 | 
						|
	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
 | 
						|
}
 | 
						|
 | 
						|
static void red_destroy(struct Qdisc *sch)
 | 
						|
{
 | 
						|
	struct red_sched_data *q = qdisc_priv(sch);
 | 
						|
 | 
						|
	del_timer_sync(&q->adapt_timer);
 | 
						|
	red_offload(sch, false);
 | 
						|
	qdisc_put(q->qdisc);
 | 
						|
}
 | 
						|
 | 
						|
static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
 | 
						|
	[TCA_RED_PARMS]	= { .len = sizeof(struct tc_red_qopt) },
 | 
						|
	[TCA_RED_STAB]	= { .len = RED_STAB_SIZE },
 | 
						|
	[TCA_RED_MAX_P] = { .type = NLA_U32 },
 | 
						|
};
 | 
						|
 | 
						|
static int red_change(struct Qdisc *sch, struct nlattr *opt,
 | 
						|
		      struct netlink_ext_ack *extack)
 | 
						|
{
 | 
						|
	struct red_sched_data *q = qdisc_priv(sch);
 | 
						|
	struct nlattr *tb[TCA_RED_MAX + 1];
 | 
						|
	struct tc_red_qopt *ctl;
 | 
						|
	struct Qdisc *child = NULL;
 | 
						|
	int err;
 | 
						|
	u32 max_P;
 | 
						|
 | 
						|
	if (opt == NULL)
 | 
						|
		return -EINVAL;
 | 
						|
 | 
						|
	err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy, NULL);
 | 
						|
	if (err < 0)
 | 
						|
		return err;
 | 
						|
 | 
						|
	if (tb[TCA_RED_PARMS] == NULL ||
 | 
						|
	    tb[TCA_RED_STAB] == NULL)
 | 
						|
		return -EINVAL;
 | 
						|
 | 
						|
	max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
 | 
						|
 | 
						|
	ctl = nla_data(tb[TCA_RED_PARMS]);
 | 
						|
	if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
 | 
						|
		return -EINVAL;
 | 
						|
 | 
						|
	if (ctl->limit > 0) {
 | 
						|
		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
 | 
						|
					 extack);
 | 
						|
		if (IS_ERR(child))
 | 
						|
			return PTR_ERR(child);
 | 
						|
 | 
						|
		/* child is fifo, no need to check for noop_qdisc */
 | 
						|
		qdisc_hash_add(child, true);
 | 
						|
	}
 | 
						|
 | 
						|
	sch_tree_lock(sch);
 | 
						|
	q->flags = ctl->flags;
 | 
						|
	q->limit = ctl->limit;
 | 
						|
	if (child) {
 | 
						|
		qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
 | 
						|
					  q->qdisc->qstats.backlog);
 | 
						|
		qdisc_put(q->qdisc);
 | 
						|
		q->qdisc = child;
 | 
						|
	}
 | 
						|
 | 
						|
	red_set_parms(&q->parms,
 | 
						|
		      ctl->qth_min, ctl->qth_max, ctl->Wlog,
 | 
						|
		      ctl->Plog, ctl->Scell_log,
 | 
						|
		      nla_data(tb[TCA_RED_STAB]),
 | 
						|
		      max_P);
 | 
						|
	red_set_vars(&q->vars);
 | 
						|
 | 
						|
	del_timer(&q->adapt_timer);
 | 
						|
	if (ctl->flags & TC_RED_ADAPTATIVE)
 | 
						|
		mod_timer(&q->adapt_timer, jiffies + HZ/2);
 | 
						|
 | 
						|
	if (!q->qdisc->q.qlen)
 | 
						|
		red_start_of_idle_period(&q->vars);
 | 
						|
 | 
						|
	sch_tree_unlock(sch);
 | 
						|
	red_offload(sch, true);
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
 | 
						|
static inline void red_adaptative_timer(struct timer_list *t)
 | 
						|
{
 | 
						|
	struct red_sched_data *q = from_timer(q, t, adapt_timer);
 | 
						|
	struct Qdisc *sch = q->sch;
 | 
						|
	spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
 | 
						|
 | 
						|
	spin_lock(root_lock);
 | 
						|
	red_adaptative_algo(&q->parms, &q->vars);
 | 
						|
	mod_timer(&q->adapt_timer, jiffies + HZ/2);
 | 
						|
	spin_unlock(root_lock);
 | 
						|
}
 | 
						|
 | 
						|
static int red_init(struct Qdisc *sch, struct nlattr *opt,
 | 
						|
		    struct netlink_ext_ack *extack)
 | 
						|
{
 | 
						|
	struct red_sched_data *q = qdisc_priv(sch);
 | 
						|
 | 
						|
	q->qdisc = &noop_qdisc;
 | 
						|
	q->sch = sch;
 | 
						|
	timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
 | 
						|
	return red_change(sch, opt, extack);
 | 
						|
}
 | 
						|
 | 
						|
static int red_dump_offload_stats(struct Qdisc *sch, struct tc_red_qopt *opt)
 | 
						|
{
 | 
						|
	struct net_device *dev = qdisc_dev(sch);
 | 
						|
	struct tc_red_qopt_offload hw_stats = {
 | 
						|
		.command = TC_RED_STATS,
 | 
						|
		.handle = sch->handle,
 | 
						|
		.parent = sch->parent,
 | 
						|
		{
 | 
						|
			.stats.bstats = &sch->bstats,
 | 
						|
			.stats.qstats = &sch->qstats,
 | 
						|
		},
 | 
						|
	};
 | 
						|
	int err;
 | 
						|
 | 
						|
	sch->flags &= ~TCQ_F_OFFLOADED;
 | 
						|
 | 
						|
	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
 | 
						|
		return 0;
 | 
						|
 | 
						|
	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
 | 
						|
					    &hw_stats);
 | 
						|
	if (err == -EOPNOTSUPP)
 | 
						|
		return 0;
 | 
						|
 | 
						|
	if (!err)
 | 
						|
		sch->flags |= TCQ_F_OFFLOADED;
 | 
						|
 | 
						|
	return err;
 | 
						|
}
 | 
						|
 | 
						|
static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
 | 
						|
{
 | 
						|
	struct red_sched_data *q = qdisc_priv(sch);
 | 
						|
	struct nlattr *opts = NULL;
 | 
						|
	struct tc_red_qopt opt = {
 | 
						|
		.limit		= q->limit,
 | 
						|
		.flags		= q->flags,
 | 
						|
		.qth_min	= q->parms.qth_min >> q->parms.Wlog,
 | 
						|
		.qth_max	= q->parms.qth_max >> q->parms.Wlog,
 | 
						|
		.Wlog		= q->parms.Wlog,
 | 
						|
		.Plog		= q->parms.Plog,
 | 
						|
		.Scell_log	= q->parms.Scell_log,
 | 
						|
	};
 | 
						|
	int err;
 | 
						|
 | 
						|
	err = red_dump_offload_stats(sch, &opt);
 | 
						|
	if (err)
 | 
						|
		goto nla_put_failure;
 | 
						|
 | 
						|
	opts = nla_nest_start(skb, TCA_OPTIONS);
 | 
						|
	if (opts == NULL)
 | 
						|
		goto nla_put_failure;
 | 
						|
	if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
 | 
						|
	    nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P))
 | 
						|
		goto nla_put_failure;
 | 
						|
	return nla_nest_end(skb, opts);
 | 
						|
 | 
						|
nla_put_failure:
 | 
						|
	nla_nest_cancel(skb, opts);
 | 
						|
	return -EMSGSIZE;
 | 
						|
}
 | 
						|
 | 
						|
static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 | 
						|
{
 | 
						|
	struct red_sched_data *q = qdisc_priv(sch);
 | 
						|
	struct net_device *dev = qdisc_dev(sch);
 | 
						|
	struct tc_red_xstats st = {0};
 | 
						|
 | 
						|
	if (sch->flags & TCQ_F_OFFLOADED) {
 | 
						|
		struct tc_red_qopt_offload hw_stats_request = {
 | 
						|
			.command = TC_RED_XSTATS,
 | 
						|
			.handle = sch->handle,
 | 
						|
			.parent = sch->parent,
 | 
						|
			{
 | 
						|
				.xstats = &q->stats,
 | 
						|
			},
 | 
						|
		};
 | 
						|
		dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
 | 
						|
					      &hw_stats_request);
 | 
						|
	}
 | 
						|
	st.early = q->stats.prob_drop + q->stats.forced_drop;
 | 
						|
	st.pdrop = q->stats.pdrop;
 | 
						|
	st.other = q->stats.other;
 | 
						|
	st.marked = q->stats.prob_mark + q->stats.forced_mark;
 | 
						|
 | 
						|
	return gnet_stats_copy_app(d, &st, sizeof(st));
 | 
						|
}
 | 
						|
 | 
						|
static int red_dump_class(struct Qdisc *sch, unsigned long cl,
 | 
						|
			  struct sk_buff *skb, struct tcmsg *tcm)
 | 
						|
{
 | 
						|
	struct red_sched_data *q = qdisc_priv(sch);
 | 
						|
 | 
						|
	tcm->tcm_handle |= TC_H_MIN(1);
 | 
						|
	tcm->tcm_info = q->qdisc->handle;
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
 | 
						|
static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 | 
						|
		     struct Qdisc **old, struct netlink_ext_ack *extack)
 | 
						|
{
 | 
						|
	struct red_sched_data *q = qdisc_priv(sch);
 | 
						|
 | 
						|
	if (new == NULL)
 | 
						|
		new = &noop_qdisc;
 | 
						|
 | 
						|
	*old = qdisc_replace(sch, new, &q->qdisc);
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
 | 
						|
static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
 | 
						|
{
 | 
						|
	struct red_sched_data *q = qdisc_priv(sch);
 | 
						|
	return q->qdisc;
 | 
						|
}
 | 
						|
 | 
						|
static unsigned long red_find(struct Qdisc *sch, u32 classid)
 | 
						|
{
 | 
						|
	return 1;
 | 
						|
}
 | 
						|
 | 
						|
static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 | 
						|
{
 | 
						|
	if (!walker->stop) {
 | 
						|
		if (walker->count >= walker->skip)
 | 
						|
			if (walker->fn(sch, 1, walker) < 0) {
 | 
						|
				walker->stop = 1;
 | 
						|
				return;
 | 
						|
			}
 | 
						|
		walker->count++;
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
static const struct Qdisc_class_ops red_class_ops = {
 | 
						|
	.graft		=	red_graft,
 | 
						|
	.leaf		=	red_leaf,
 | 
						|
	.find		=	red_find,
 | 
						|
	.walk		=	red_walk,
 | 
						|
	.dump		=	red_dump_class,
 | 
						|
};
 | 
						|
 | 
						|
static struct Qdisc_ops red_qdisc_ops __read_mostly = {
 | 
						|
	.id		=	"red",
 | 
						|
	.priv_size	=	sizeof(struct red_sched_data),
 | 
						|
	.cl_ops		=	&red_class_ops,
 | 
						|
	.enqueue	=	red_enqueue,
 | 
						|
	.dequeue	=	red_dequeue,
 | 
						|
	.peek		=	red_peek,
 | 
						|
	.init		=	red_init,
 | 
						|
	.reset		=	red_reset,
 | 
						|
	.destroy	=	red_destroy,
 | 
						|
	.change		=	red_change,
 | 
						|
	.dump		=	red_dump,
 | 
						|
	.dump_stats	=	red_dump_stats,
 | 
						|
	.owner		=	THIS_MODULE,
 | 
						|
};
 | 
						|
 | 
						|
static int __init red_module_init(void)
 | 
						|
{
 | 
						|
	return register_qdisc(&red_qdisc_ops);
 | 
						|
}
 | 
						|
 | 
						|
static void __exit red_module_exit(void)
 | 
						|
{
 | 
						|
	unregister_qdisc(&red_qdisc_ops);
 | 
						|
}
 | 
						|
 | 
						|
module_init(red_module_init)
 | 
						|
module_exit(red_module_exit)
 | 
						|
 | 
						|
MODULE_LICENSE("GPL");
 |