forked from mirrors/linux
		
	net: sched: Protect Qdisc::bstats with u64_stats
The not-per-CPU variant of qdisc tc (traffic control) statistics, Qdisc::gnet_stats_basic_packed bstats, is protected with Qdisc::running sequence counter. This sequence counter is used for reliably protecting bstats reads from parallel writes. Meanwhile, the seqcount's write section covers a much wider area than bstats update: qdisc_run_begin() => qdisc_run_end(). That read/write section asymmetry can lead to needless retries of the read section. To prepare for removing the Qdisc::running sequence counter altogether, introduce a u64_stats sync point inside bstats instead. Modify _bstats_update() to start/end the bstats u64_stats write section. For bisectability, and finer commits granularity, the bstats read section is still protected with a Qdisc::running read/retry loop and qdisc_run_begin/end() still starts/ends that seqcount write section. Once all call sites are modified to use _bstats_update(), the Qdisc::running seqcount will be removed and bstats read/retry loop will be modified to utilize the internal u64_stats sync point. Note, using u64_stats implies no sequence counter protection for 64-bit architectures. This can lead to the statistics "packets" vs. "bytes" values getting out of sync on rare occasions. The individual values will still be valid. [bigeasy: Minor commit message edits, init all gnet_stats_basic_packed.] Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									f2efdb1792
								
							
						
					
					
						commit
						67c9e6270f
					
				
					 17 changed files with 39 additions and 10 deletions
				
			
		|  | @ -11,6 +11,7 @@ | ||||||
| struct gnet_stats_basic_packed { | struct gnet_stats_basic_packed { | ||||||
| 	__u64	bytes; | 	__u64	bytes; | ||||||
| 	__u64	packets; | 	__u64	packets; | ||||||
|  | 	struct u64_stats_sync syncp; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| struct gnet_stats_basic_cpu { | struct gnet_stats_basic_cpu { | ||||||
|  | @ -34,6 +35,7 @@ struct gnet_dump { | ||||||
| 	struct tc_stats   tc_stats; | 	struct tc_stats   tc_stats; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | void gnet_stats_basic_packed_init(struct gnet_stats_basic_packed *b); | ||||||
| int gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock, | int gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock, | ||||||
| 			  struct gnet_dump *d, int padattr); | 			  struct gnet_dump *d, int padattr); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -852,8 +852,10 @@ static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, | ||||||
| static inline void _bstats_update(struct gnet_stats_basic_packed *bstats, | static inline void _bstats_update(struct gnet_stats_basic_packed *bstats, | ||||||
| 				  __u64 bytes, __u32 packets) | 				  __u64 bytes, __u32 packets) | ||||||
| { | { | ||||||
|  | 	u64_stats_update_begin(&bstats->syncp); | ||||||
| 	bstats->bytes += bytes; | 	bstats->bytes += bytes; | ||||||
| 	bstats->packets += packets; | 	bstats->packets += packets; | ||||||
|  | 	u64_stats_update_end(&bstats->syncp); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline void bstats_update(struct gnet_stats_basic_packed *bstats, | static inline void bstats_update(struct gnet_stats_basic_packed *bstats, | ||||||
|  |  | ||||||
|  | @ -62,7 +62,7 @@ struct net_rate_estimator { | ||||||
| static void est_fetch_counters(struct net_rate_estimator *e, | static void est_fetch_counters(struct net_rate_estimator *e, | ||||||
| 			       struct gnet_stats_basic_packed *b) | 			       struct gnet_stats_basic_packed *b) | ||||||
| { | { | ||||||
| 	memset(b, 0, sizeof(*b)); | 	gnet_stats_basic_packed_init(b); | ||||||
| 	if (e->stats_lock) | 	if (e->stats_lock) | ||||||
| 		spin_lock(e->stats_lock); | 		spin_lock(e->stats_lock); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -18,7 +18,7 @@ | ||||||
| #include <linux/gen_stats.h> | #include <linux/gen_stats.h> | ||||||
| #include <net/netlink.h> | #include <net/netlink.h> | ||||||
| #include <net/gen_stats.h> | #include <net/gen_stats.h> | ||||||
| 
 | #include <net/sch_generic.h> | ||||||
| 
 | 
 | ||||||
| static inline int | static inline int | ||||||
| gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size, int padattr) | gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size, int padattr) | ||||||
|  | @ -114,6 +114,15 @@ gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock, | ||||||
| } | } | ||||||
| EXPORT_SYMBOL(gnet_stats_start_copy); | EXPORT_SYMBOL(gnet_stats_start_copy); | ||||||
| 
 | 
 | ||||||
|  | /* Must not be inlined, due to u64_stats seqcount_t lockdep key */ | ||||||
|  | void gnet_stats_basic_packed_init(struct gnet_stats_basic_packed *b) | ||||||
|  | { | ||||||
|  | 	b->bytes = 0; | ||||||
|  | 	b->packets = 0; | ||||||
|  | 	u64_stats_init(&b->syncp); | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL(gnet_stats_basic_packed_init); | ||||||
|  | 
 | ||||||
| static void gnet_stats_add_basic_cpu(struct gnet_stats_basic_packed *bstats, | static void gnet_stats_add_basic_cpu(struct gnet_stats_basic_packed *bstats, | ||||||
| 				     struct gnet_stats_basic_cpu __percpu *cpu) | 				     struct gnet_stats_basic_cpu __percpu *cpu) | ||||||
| { | { | ||||||
|  | @ -167,8 +176,9 @@ ___gnet_stats_copy_basic(const seqcount_t *running, | ||||||
| 			 struct gnet_stats_basic_packed *b, | 			 struct gnet_stats_basic_packed *b, | ||||||
| 			 int type) | 			 int type) | ||||||
| { | { | ||||||
| 	struct gnet_stats_basic_packed bstats = {0}; | 	struct gnet_stats_basic_packed bstats; | ||||||
| 
 | 
 | ||||||
|  | 	gnet_stats_basic_packed_init(&bstats); | ||||||
| 	gnet_stats_add_basic(running, &bstats, cpu, b); | 	gnet_stats_add_basic(running, &bstats, cpu, b); | ||||||
| 
 | 
 | ||||||
| 	if (d->compat_tc_stats && type == TCA_STATS_BASIC) { | 	if (d->compat_tc_stats && type == TCA_STATS_BASIC) { | ||||||
|  |  | ||||||
|  | @ -143,6 +143,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par) | ||||||
| 	if (!est) | 	if (!est) | ||||||
| 		goto err1; | 		goto err1; | ||||||
| 
 | 
 | ||||||
|  | 	gnet_stats_basic_packed_init(&est->bstats); | ||||||
| 	strlcpy(est->name, info->name, sizeof(est->name)); | 	strlcpy(est->name, info->name, sizeof(est->name)); | ||||||
| 	spin_lock_init(&est->lock); | 	spin_lock_init(&est->lock); | ||||||
| 	est->refcnt		= 1; | 	est->refcnt		= 1; | ||||||
|  |  | ||||||
|  | @ -490,6 +490,8 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est, | ||||||
| 		if (!p->cpu_qstats) | 		if (!p->cpu_qstats) | ||||||
| 			goto err3; | 			goto err3; | ||||||
| 	} | 	} | ||||||
|  | 	gnet_stats_basic_packed_init(&p->tcfa_bstats); | ||||||
|  | 	gnet_stats_basic_packed_init(&p->tcfa_bstats_hw); | ||||||
| 	spin_lock_init(&p->tcfa_lock); | 	spin_lock_init(&p->tcfa_lock); | ||||||
| 	p->tcfa_index = index; | 	p->tcfa_index = index; | ||||||
| 	p->tcfa_tm.install = jiffies; | 	p->tcfa_tm.install = jiffies; | ||||||
|  |  | ||||||
|  | @ -548,6 +548,7 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt, | ||||||
| 	pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt); | 	pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt); | ||||||
| 	INIT_LIST_HEAD(&p->flows); | 	INIT_LIST_HEAD(&p->flows); | ||||||
| 	INIT_LIST_HEAD(&p->link.list); | 	INIT_LIST_HEAD(&p->link.list); | ||||||
|  | 	gnet_stats_basic_packed_init(&p->link.bstats); | ||||||
| 	list_add(&p->link.list, &p->flows); | 	list_add(&p->link.list, &p->flows); | ||||||
| 	p->link.q = qdisc_create_dflt(sch->dev_queue, | 	p->link.q = qdisc_create_dflt(sch->dev_queue, | ||||||
| 				      &pfifo_qdisc_ops, sch->handle, extack); | 				      &pfifo_qdisc_ops, sch->handle, extack); | ||||||
|  |  | ||||||
|  | @ -1611,6 +1611,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t | ||||||
| 	if (cl == NULL) | 	if (cl == NULL) | ||||||
| 		goto failure; | 		goto failure; | ||||||
| 
 | 
 | ||||||
|  | 	gnet_stats_basic_packed_init(&cl->bstats); | ||||||
| 	err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack); | 	err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack); | ||||||
| 	if (err) { | 	if (err) { | ||||||
| 		kfree(cl); | 		kfree(cl); | ||||||
|  |  | ||||||
|  | @ -106,6 +106,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, | ||||||
| 	if (cl == NULL) | 	if (cl == NULL) | ||||||
| 		return -ENOBUFS; | 		return -ENOBUFS; | ||||||
| 
 | 
 | ||||||
|  | 	gnet_stats_basic_packed_init(&cl->bstats); | ||||||
| 	cl->common.classid = classid; | 	cl->common.classid = classid; | ||||||
| 	cl->quantum	   = quantum; | 	cl->quantum	   = quantum; | ||||||
| 	cl->qdisc	   = qdisc_create_dflt(sch->dev_queue, | 	cl->qdisc	   = qdisc_create_dflt(sch->dev_queue, | ||||||
|  |  | ||||||
|  | @ -689,7 +689,7 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt, | ||||||
| 		q->classes[i].qdisc = NULL; | 		q->classes[i].qdisc = NULL; | ||||||
| 		q->classes[i].quantum = 0; | 		q->classes[i].quantum = 0; | ||||||
| 		q->classes[i].deficit = 0; | 		q->classes[i].deficit = 0; | ||||||
| 		memset(&q->classes[i].bstats, 0, sizeof(q->classes[i].bstats)); | 		gnet_stats_basic_packed_init(&q->classes[i].bstats); | ||||||
| 		memset(&q->classes[i].qstats, 0, sizeof(q->classes[i].qstats)); | 		memset(&q->classes[i].qstats, 0, sizeof(q->classes[i].qstats)); | ||||||
| 	} | 	} | ||||||
| 	return 0; | 	return 0; | ||||||
|  |  | ||||||
|  | @ -892,6 +892,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, | ||||||
| 	__skb_queue_head_init(&sch->gso_skb); | 	__skb_queue_head_init(&sch->gso_skb); | ||||||
| 	__skb_queue_head_init(&sch->skb_bad_txq); | 	__skb_queue_head_init(&sch->skb_bad_txq); | ||||||
| 	qdisc_skb_head_init(&sch->q); | 	qdisc_skb_head_init(&sch->q); | ||||||
|  | 	gnet_stats_basic_packed_init(&sch->bstats); | ||||||
| 	spin_lock_init(&sch->q.lock); | 	spin_lock_init(&sch->q.lock); | ||||||
| 
 | 
 | ||||||
| 	if (ops->static_flags & TCQ_F_CPUSTATS) { | 	if (ops->static_flags & TCQ_F_CPUSTATS) { | ||||||
|  |  | ||||||
|  | @ -364,9 +364,11 @@ static int gred_offload_dump_stats(struct Qdisc *sch) | ||||||
| 	hw_stats->handle = sch->handle; | 	hw_stats->handle = sch->handle; | ||||||
| 	hw_stats->parent = sch->parent; | 	hw_stats->parent = sch->parent; | ||||||
| 
 | 
 | ||||||
| 	for (i = 0; i < MAX_DPs; i++) | 	for (i = 0; i < MAX_DPs; i++) { | ||||||
|  | 		gnet_stats_basic_packed_init(&hw_stats->stats.bstats[i]); | ||||||
| 		if (table->tab[i]) | 		if (table->tab[i]) | ||||||
| 			hw_stats->stats.xstats[i] = &table->tab[i]->stats; | 			hw_stats->stats.xstats[i] = &table->tab[i]->stats; | ||||||
|  | 	} | ||||||
| 
 | 
 | ||||||
| 	ret = qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_GRED, hw_stats); | 	ret = qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_GRED, hw_stats); | ||||||
| 	/* Even if driver returns failure adjust the stats - in case offload
 | 	/* Even if driver returns failure adjust the stats - in case offload
 | ||||||
|  |  | ||||||
|  | @ -1406,6 +1406,7 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt, | ||||||
| 	if (err) | 	if (err) | ||||||
| 		return err; | 		return err; | ||||||
| 
 | 
 | ||||||
|  | 	gnet_stats_basic_packed_init(&q->root.bstats); | ||||||
| 	q->root.cl_common.classid = sch->handle; | 	q->root.cl_common.classid = sch->handle; | ||||||
| 	q->root.sched   = q; | 	q->root.sched   = q; | ||||||
| 	q->root.qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, | 	q->root.qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, | ||||||
|  |  | ||||||
|  | @ -1311,7 +1311,7 @@ static void htb_offload_aggregate_stats(struct htb_sched *q, | ||||||
| 	struct htb_class *c; | 	struct htb_class *c; | ||||||
| 	unsigned int i; | 	unsigned int i; | ||||||
| 
 | 
 | ||||||
| 	memset(&cl->bstats, 0, sizeof(cl->bstats)); | 	gnet_stats_basic_packed_init(&cl->bstats); | ||||||
| 
 | 
 | ||||||
| 	for (i = 0; i < q->clhash.hashsize; i++) { | 	for (i = 0; i < q->clhash.hashsize; i++) { | ||||||
| 		hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) { | 		hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) { | ||||||
|  | @ -1357,7 +1357,7 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) | ||||||
| 			if (cl->leaf.q) | 			if (cl->leaf.q) | ||||||
| 				cl->bstats = cl->leaf.q->bstats; | 				cl->bstats = cl->leaf.q->bstats; | ||||||
| 			else | 			else | ||||||
| 				memset(&cl->bstats, 0, sizeof(cl->bstats)); | 				gnet_stats_basic_packed_init(&cl->bstats); | ||||||
| 			cl->bstats.bytes += cl->bstats_bias.bytes; | 			cl->bstats.bytes += cl->bstats_bias.bytes; | ||||||
| 			cl->bstats.packets += cl->bstats_bias.packets; | 			cl->bstats.packets += cl->bstats_bias.packets; | ||||||
| 		} else { | 		} else { | ||||||
|  | @ -1849,6 +1849,9 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, | ||||||
| 		if (!cl) | 		if (!cl) | ||||||
| 			goto failure; | 			goto failure; | ||||||
| 
 | 
 | ||||||
|  | 		gnet_stats_basic_packed_init(&cl->bstats); | ||||||
|  | 		gnet_stats_basic_packed_init(&cl->bstats_bias); | ||||||
|  | 
 | ||||||
| 		err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack); | 		err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack); | ||||||
| 		if (err) { | 		if (err) { | ||||||
| 			kfree(cl); | 			kfree(cl); | ||||||
|  |  | ||||||
|  | @ -132,7 +132,7 @@ static int mq_dump(struct Qdisc *sch, struct sk_buff *skb) | ||||||
| 	unsigned int ntx; | 	unsigned int ntx; | ||||||
| 
 | 
 | ||||||
| 	sch->q.qlen = 0; | 	sch->q.qlen = 0; | ||||||
| 	memset(&sch->bstats, 0, sizeof(sch->bstats)); | 	gnet_stats_basic_packed_init(&sch->bstats); | ||||||
| 	memset(&sch->qstats, 0, sizeof(sch->qstats)); | 	memset(&sch->qstats, 0, sizeof(sch->qstats)); | ||||||
| 
 | 
 | ||||||
| 	/* MQ supports lockless qdiscs. However, statistics accounting needs
 | 	/* MQ supports lockless qdiscs. However, statistics accounting needs
 | ||||||
|  |  | ||||||
|  | @ -390,7 +390,7 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) | ||||||
| 	unsigned int ntx, tc; | 	unsigned int ntx, tc; | ||||||
| 
 | 
 | ||||||
| 	sch->q.qlen = 0; | 	sch->q.qlen = 0; | ||||||
| 	memset(&sch->bstats, 0, sizeof(sch->bstats)); | 	gnet_stats_basic_packed_init(&sch->bstats); | ||||||
| 	memset(&sch->qstats, 0, sizeof(sch->qstats)); | 	memset(&sch->qstats, 0, sizeof(sch->qstats)); | ||||||
| 
 | 
 | ||||||
| 	/* MQ supports lockless qdiscs. However, statistics accounting needs
 | 	/* MQ supports lockless qdiscs. However, statistics accounting needs
 | ||||||
|  | @ -500,10 +500,11 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, | ||||||
| 		int i; | 		int i; | ||||||
| 		__u32 qlen; | 		__u32 qlen; | ||||||
| 		struct gnet_stats_queue qstats = {0}; | 		struct gnet_stats_queue qstats = {0}; | ||||||
| 		struct gnet_stats_basic_packed bstats = {0}; | 		struct gnet_stats_basic_packed bstats; | ||||||
| 		struct net_device *dev = qdisc_dev(sch); | 		struct net_device *dev = qdisc_dev(sch); | ||||||
| 		struct netdev_tc_txq tc = dev->tc_to_txq[cl & TC_BITMASK]; | 		struct netdev_tc_txq tc = dev->tc_to_txq[cl & TC_BITMASK]; | ||||||
| 
 | 
 | ||||||
|  | 		gnet_stats_basic_packed_init(&bstats); | ||||||
| 		/* Drop lock here it will be reclaimed before touching
 | 		/* Drop lock here it will be reclaimed before touching
 | ||||||
| 		 * statistics this is required because the d->lock we | 		 * statistics this is required because the d->lock we | ||||||
| 		 * hold here is the look on dev_queue->qdisc_sleeping | 		 * hold here is the look on dev_queue->qdisc_sleeping | ||||||
|  |  | ||||||
|  | @ -465,6 +465,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, | ||||||
| 	if (cl == NULL) | 	if (cl == NULL) | ||||||
| 		return -ENOBUFS; | 		return -ENOBUFS; | ||||||
| 
 | 
 | ||||||
|  | 	gnet_stats_basic_packed_init(&cl->bstats); | ||||||
| 	cl->common.classid = classid; | 	cl->common.classid = classid; | ||||||
| 	cl->deficit = lmax; | 	cl->deficit = lmax; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Ahmed S. Darwish
						Ahmed S. Darwish