forked from mirrors/linux
		
	dropmon: add ability to detect when hardware dropsrxpackets
Patch to add the ability to detect drops in hardware interfaces via dropwatch. Adds a tracepoint to net_rx_action to signal everytime a napi instance is polled. The dropmon code then periodically checks to see if the rx_frames counter has changed, and if so, adds a drop notification to the netlink protocol, using the reserved all-0's vector to indicate the drop location was in hardware, rather than somewhere in the code. Signed-off-by: Neil Horman <nhorman@tuxdriver.com> include/linux/net_dropmon.h | 8 ++ include/trace/napi.h | 11 +++ net/core/dev.c | 5 + net/core/drop_monitor.c | 124 ++++++++++++++++++++++++++++++++++++++++++-- net/core/net-traces.c | 4 + net/core/netpoll.c | 2 6 files changed, 149 insertions(+), 5 deletions(-) Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									d95ed9275e
								
							
						
					
					
						commit
						4ea7e38696
					
				
					 6 changed files with 149 additions and 5 deletions
				
			
		|  | @ -2,12 +2,20 @@ | ||||||
| #define __NET_DROPMON_H | #define __NET_DROPMON_H | ||||||
| 
 | 
 | ||||||
| #include <linux/netlink.h> | #include <linux/netlink.h> | ||||||
|  | #include <linux/types.h> | ||||||
| 
 | 
 | ||||||
| struct net_dm_drop_point { | struct net_dm_drop_point { | ||||||
| 	__u8 pc[8]; | 	__u8 pc[8]; | ||||||
| 	__u32 count; | 	__u32 count; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | #define is_drop_point_hw(x) do {\ | ||||||
|  | 	int ____i, ____j;\ | ||||||
|  | 	for (____i = 0; ____i < 8; i ____i++)\ | ||||||
|  | 		____j |= x[____i];\ | ||||||
|  | 	____j;\ | ||||||
|  | } while (0) | ||||||
|  | 
 | ||||||
| #define NET_DM_CFG_VERSION  0 | #define NET_DM_CFG_VERSION  0 | ||||||
| #define NET_DM_CFG_ALERT_COUNT  1 | #define NET_DM_CFG_ALERT_COUNT  1 | ||||||
| #define NET_DM_CFG_ALERT_DELAY 2 | #define NET_DM_CFG_ALERT_DELAY 2 | ||||||
|  |  | ||||||
							
								
								
									
										11
									
								
								include/trace/napi.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								include/trace/napi.h
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,11 @@ | ||||||
|  | #ifndef _TRACE_NAPI_H_ | ||||||
|  | #define _TRACE_NAPI_H_ | ||||||
|  | 
 | ||||||
|  | #include <linux/netdevice.h> | ||||||
|  | #include <linux/tracepoint.h> | ||||||
|  | 
 | ||||||
|  | DECLARE_TRACE(napi_poll, | ||||||
|  | 	TP_PROTO(struct napi_struct *napi), | ||||||
|  | 	TP_ARGS(napi)); | ||||||
|  | 
 | ||||||
|  | #endif | ||||||
|  | @ -126,6 +126,7 @@ | ||||||
| #include <linux/in.h> | #include <linux/in.h> | ||||||
| #include <linux/jhash.h> | #include <linux/jhash.h> | ||||||
| #include <linux/random.h> | #include <linux/random.h> | ||||||
|  | #include <trace/napi.h> | ||||||
| 
 | 
 | ||||||
| #include "net-sysfs.h" | #include "net-sysfs.h" | ||||||
| 
 | 
 | ||||||
|  | @ -2771,8 +2772,10 @@ static void net_rx_action(struct softirq_action *h) | ||||||
| 		 * accidently calling ->poll() when NAPI is not scheduled. | 		 * accidently calling ->poll() when NAPI is not scheduled. | ||||||
| 		 */ | 		 */ | ||||||
| 		work = 0; | 		work = 0; | ||||||
| 		if (test_bit(NAPI_STATE_SCHED, &n->state)) | 		if (test_bit(NAPI_STATE_SCHED, &n->state)) { | ||||||
| 			work = n->poll(n, weight); | 			work = n->poll(n, weight); | ||||||
|  | 			trace_napi_poll(n); | ||||||
|  | 		} | ||||||
| 
 | 
 | ||||||
| 		WARN_ON_ONCE(work > weight); | 		WARN_ON_ONCE(work > weight); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -22,8 +22,10 @@ | ||||||
| #include <linux/timer.h> | #include <linux/timer.h> | ||||||
| #include <linux/bitops.h> | #include <linux/bitops.h> | ||||||
| #include <net/genetlink.h> | #include <net/genetlink.h> | ||||||
|  | #include <net/netevent.h> | ||||||
| 
 | 
 | ||||||
| #include <trace/skb.h> | #include <trace/skb.h> | ||||||
|  | #include <trace/napi.h> | ||||||
| 
 | 
 | ||||||
| #include <asm/unaligned.h> | #include <asm/unaligned.h> | ||||||
| 
 | 
 | ||||||
|  | @ -38,7 +40,8 @@ static void send_dm_alert(struct work_struct *unused); | ||||||
|  * and the work handle that will send up |  * and the work handle that will send up | ||||||
|  * netlink alerts |  * netlink alerts | ||||||
|  */ |  */ | ||||||
| struct sock *dm_sock; | static int trace_state = TRACE_OFF; | ||||||
|  | static spinlock_t trace_state_lock = SPIN_LOCK_UNLOCKED; | ||||||
| 
 | 
 | ||||||
| struct per_cpu_dm_data { | struct per_cpu_dm_data { | ||||||
| 	struct work_struct dm_alert_work; | 	struct work_struct dm_alert_work; | ||||||
|  | @ -47,6 +50,13 @@ struct per_cpu_dm_data { | ||||||
| 	struct timer_list send_timer; | 	struct timer_list send_timer; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | struct dm_hw_stat_delta { | ||||||
|  | 	struct net_device *dev; | ||||||
|  | 	struct list_head list; | ||||||
|  | 	struct rcu_head rcu; | ||||||
|  | 	unsigned long last_drop_val; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
| static struct genl_family net_drop_monitor_family = { | static struct genl_family net_drop_monitor_family = { | ||||||
| 	.id             = GENL_ID_GENERATE, | 	.id             = GENL_ID_GENERATE, | ||||||
| 	.hdrsize        = 0, | 	.hdrsize        = 0, | ||||||
|  | @ -59,7 +69,8 @@ static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data); | ||||||
| 
 | 
 | ||||||
| static int dm_hit_limit = 64; | static int dm_hit_limit = 64; | ||||||
| static int dm_delay = 1; | static int dm_delay = 1; | ||||||
| 
 | static unsigned long dm_hw_check_delta = 2*HZ; | ||||||
|  | static LIST_HEAD(hw_stats_list); | ||||||
| 
 | 
 | ||||||
| static void reset_per_cpu_data(struct per_cpu_dm_data *data) | static void reset_per_cpu_data(struct per_cpu_dm_data *data) | ||||||
| { | { | ||||||
|  | @ -115,7 +126,7 @@ static void sched_send_work(unsigned long unused) | ||||||
| 	schedule_work(&data->dm_alert_work); | 	schedule_work(&data->dm_alert_work); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void trace_kfree_skb_hit(struct sk_buff *skb, void *location) | static void trace_drop_common(struct sk_buff *skb, void *location) | ||||||
| { | { | ||||||
| 	struct net_dm_alert_msg *msg; | 	struct net_dm_alert_msg *msg; | ||||||
| 	struct nlmsghdr *nlh; | 	struct nlmsghdr *nlh; | ||||||
|  | @ -159,24 +170,80 @@ static void trace_kfree_skb_hit(struct sk_buff *skb, void *location) | ||||||
| 	return; | 	return; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static void trace_kfree_skb_hit(struct sk_buff *skb, void *location) | ||||||
|  | { | ||||||
|  | 	trace_drop_common(skb, location); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void trace_napi_poll_hit(struct napi_struct *napi) | ||||||
|  | { | ||||||
|  | 	struct dm_hw_stat_delta *new_stat; | ||||||
|  | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * Ratelimit our check time to dm_hw_check_delta jiffies | ||||||
|  | 	 */ | ||||||
|  | 	if (!time_after(jiffies, napi->dev->last_rx + dm_hw_check_delta)) | ||||||
|  | 		return; | ||||||
|  | 
 | ||||||
|  | 	rcu_read_lock(); | ||||||
|  | 	list_for_each_entry_rcu(new_stat, &hw_stats_list, list) { | ||||||
|  | 		if ((new_stat->dev == napi->dev)  && | ||||||
|  | 		    (napi->dev->stats.rx_dropped != new_stat->last_drop_val)) { | ||||||
|  | 			trace_drop_common(NULL, NULL); | ||||||
|  | 			new_stat->last_drop_val = napi->dev->stats.rx_dropped; | ||||||
|  | 			break; | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	rcu_read_unlock(); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | static void free_dm_hw_stat(struct rcu_head *head) | ||||||
|  | { | ||||||
|  | 	struct dm_hw_stat_delta *n; | ||||||
|  | 	n = container_of(head, struct dm_hw_stat_delta, rcu); | ||||||
|  | 	kfree(n); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static int set_all_monitor_traces(int state) | static int set_all_monitor_traces(int state) | ||||||
| { | { | ||||||
| 	int rc = 0; | 	int rc = 0; | ||||||
|  | 	struct dm_hw_stat_delta *new_stat = NULL; | ||||||
|  | 	struct dm_hw_stat_delta *temp; | ||||||
|  | 
 | ||||||
|  | 	spin_lock(&trace_state_lock); | ||||||
| 
 | 
 | ||||||
| 	switch (state) { | 	switch (state) { | ||||||
| 	case TRACE_ON: | 	case TRACE_ON: | ||||||
| 		rc |= register_trace_kfree_skb(trace_kfree_skb_hit); | 		rc |= register_trace_kfree_skb(trace_kfree_skb_hit); | ||||||
|  | 		rc |= register_trace_napi_poll(trace_napi_poll_hit); | ||||||
| 		break; | 		break; | ||||||
| 	case TRACE_OFF: | 	case TRACE_OFF: | ||||||
| 		rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit); | 		rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit); | ||||||
|  | 		rc |= unregister_trace_napi_poll(trace_napi_poll_hit); | ||||||
| 
 | 
 | ||||||
| 		tracepoint_synchronize_unregister(); | 		tracepoint_synchronize_unregister(); | ||||||
|  | 
 | ||||||
|  | 		/*
 | ||||||
|  | 		 * Clean the device list | ||||||
|  | 		 */ | ||||||
|  | 		list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) { | ||||||
|  | 			if (new_stat->dev == NULL) { | ||||||
|  | 				list_del_rcu(&new_stat->list); | ||||||
|  | 				call_rcu(&new_stat->rcu, free_dm_hw_stat); | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
| 		break; | 		break; | ||||||
| 	default: | 	default: | ||||||
| 		rc = 1; | 		rc = 1; | ||||||
| 		break; | 		break; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  | 	if (!rc) | ||||||
|  | 		trace_state = state; | ||||||
|  | 
 | ||||||
|  | 	spin_unlock(&trace_state_lock); | ||||||
|  | 
 | ||||||
| 	if (rc) | 	if (rc) | ||||||
| 		return -EINPROGRESS; | 		return -EINPROGRESS; | ||||||
| 	return rc; | 	return rc; | ||||||
|  | @ -204,6 +271,44 @@ static int net_dm_cmd_trace(struct sk_buff *skb, | ||||||
| 	return -ENOTSUPP; | 	return -ENOTSUPP; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static int dropmon_net_event(struct notifier_block *ev_block, | ||||||
|  | 			unsigned long event, void *ptr) | ||||||
|  | { | ||||||
|  | 	struct net_device *dev = ptr; | ||||||
|  | 	struct dm_hw_stat_delta *new_stat = NULL; | ||||||
|  | 	struct dm_hw_stat_delta *tmp; | ||||||
|  | 
 | ||||||
|  | 	switch (event) { | ||||||
|  | 	case NETDEV_REGISTER: | ||||||
|  | 		new_stat = kzalloc(sizeof(struct dm_hw_stat_delta), GFP_KERNEL); | ||||||
|  | 
 | ||||||
|  | 		if (!new_stat) | ||||||
|  | 			goto out; | ||||||
|  | 
 | ||||||
|  | 		new_stat->dev = dev; | ||||||
|  | 		INIT_RCU_HEAD(&new_stat->rcu); | ||||||
|  | 		spin_lock(&trace_state_lock); | ||||||
|  | 		list_add_rcu(&new_stat->list, &hw_stats_list); | ||||||
|  | 		spin_unlock(&trace_state_lock); | ||||||
|  | 		break; | ||||||
|  | 	case NETDEV_UNREGISTER: | ||||||
|  | 		spin_lock(&trace_state_lock); | ||||||
|  | 		list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) { | ||||||
|  | 			if (new_stat->dev == dev) { | ||||||
|  | 				new_stat->dev = NULL; | ||||||
|  | 				if (trace_state == TRACE_OFF) { | ||||||
|  | 					list_del_rcu(&new_stat->list); | ||||||
|  | 					call_rcu(&new_stat->rcu, free_dm_hw_stat); | ||||||
|  | 					break; | ||||||
|  | 				} | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 		spin_unlock(&trace_state_lock); | ||||||
|  | 		break; | ||||||
|  | 	} | ||||||
|  | out: | ||||||
|  | 	return NOTIFY_DONE; | ||||||
|  | } | ||||||
| 
 | 
 | ||||||
| static struct genl_ops dropmon_ops[] = { | static struct genl_ops dropmon_ops[] = { | ||||||
| 	{ | 	{ | ||||||
|  | @ -220,6 +325,10 @@ static struct genl_ops dropmon_ops[] = { | ||||||
| 	}, | 	}, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | static struct notifier_block dropmon_net_notifier = { | ||||||
|  | 	.notifier_call = dropmon_net_event | ||||||
|  | }; | ||||||
|  | 
 | ||||||
| static int __init init_net_drop_monitor(void) | static int __init init_net_drop_monitor(void) | ||||||
| { | { | ||||||
| 	int cpu; | 	int cpu; | ||||||
|  | @ -243,12 +352,18 @@ static int __init init_net_drop_monitor(void) | ||||||
| 		ret = genl_register_ops(&net_drop_monitor_family, | 		ret = genl_register_ops(&net_drop_monitor_family, | ||||||
| 					&dropmon_ops[i]); | 					&dropmon_ops[i]); | ||||||
| 		if (ret) { | 		if (ret) { | ||||||
| 			printk(KERN_CRIT "failed to register operation %d\n", | 			printk(KERN_CRIT "Failed to register operation %d\n", | ||||||
| 				dropmon_ops[i].cmd); | 				dropmon_ops[i].cmd); | ||||||
| 			goto out_unreg; | 			goto out_unreg; | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  | 	rc = register_netdevice_notifier(&dropmon_net_notifier); | ||||||
|  | 	if (rc < 0) { | ||||||
|  | 		printk(KERN_CRIT "Failed to register netdevice notifier\n"); | ||||||
|  | 		goto out_unreg; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
| 	rc = 0; | 	rc = 0; | ||||||
| 
 | 
 | ||||||
| 	for_each_present_cpu(cpu) { | 	for_each_present_cpu(cpu) { | ||||||
|  | @ -259,6 +374,7 @@ static int __init init_net_drop_monitor(void) | ||||||
| 		data->send_timer.data = cpu; | 		data->send_timer.data = cpu; | ||||||
| 		data->send_timer.function = sched_send_work; | 		data->send_timer.function = sched_send_work; | ||||||
| 	} | 	} | ||||||
|  | 
 | ||||||
| 	goto out; | 	goto out; | ||||||
| 
 | 
 | ||||||
| out_unreg: | out_unreg: | ||||||
|  |  | ||||||
|  | @ -20,6 +20,7 @@ | ||||||
| #include <linux/netlink.h> | #include <linux/netlink.h> | ||||||
| #include <linux/net_dropmon.h> | #include <linux/net_dropmon.h> | ||||||
| #include <trace/skb.h> | #include <trace/skb.h> | ||||||
|  | #include <trace/napi.h> | ||||||
| 
 | 
 | ||||||
| #include <asm/unaligned.h> | #include <asm/unaligned.h> | ||||||
| #include <asm/bitops.h> | #include <asm/bitops.h> | ||||||
|  | @ -27,3 +28,6 @@ | ||||||
| 
 | 
 | ||||||
| DEFINE_TRACE(kfree_skb); | DEFINE_TRACE(kfree_skb); | ||||||
| EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); | EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); | ||||||
|  | 
 | ||||||
|  | DEFINE_TRACE(napi_poll); | ||||||
|  | EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll); | ||||||
|  |  | ||||||
|  | @ -24,6 +24,7 @@ | ||||||
| #include <net/tcp.h> | #include <net/tcp.h> | ||||||
| #include <net/udp.h> | #include <net/udp.h> | ||||||
| #include <asm/unaligned.h> | #include <asm/unaligned.h> | ||||||
|  | #include <trace/napi.h> | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * We maintain a small pool of fully-sized skbs, to make sure the |  * We maintain a small pool of fully-sized skbs, to make sure the | ||||||
|  | @ -137,6 +138,7 @@ static int poll_one_napi(struct netpoll_info *npinfo, | ||||||
| 	set_bit(NAPI_STATE_NPSVC, &napi->state); | 	set_bit(NAPI_STATE_NPSVC, &napi->state); | ||||||
| 
 | 
 | ||||||
| 	work = napi->poll(napi, budget); | 	work = napi->poll(napi, budget); | ||||||
|  | 	trace_napi_poll(napi->dev); | ||||||
| 
 | 
 | ||||||
| 	clear_bit(NAPI_STATE_NPSVC, &napi->state); | 	clear_bit(NAPI_STATE_NPSVC, &napi->state); | ||||||
| 	atomic_dec(&trapped); | 	atomic_dec(&trapped); | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Neil Horman
						Neil Horman