forked from mirrors/linux
		
	af_packet: add interframe drop cmsg (v6)
Add Ancilliary data to better represent loss information I've had a few requests recently to provide more detail regarding frame loss during an AF_PACKET packet capture session. Specifically the requestors want to see where in a packet sequence frames were lost, i.e. they want to see that 40 frames were lost between frames 302 and 303 in a packet capture file. In order to do this we need: 1) The kernel to export this data to user space 2) The applications to make use of it This patch addresses item (1). It does this by doing the following: A) Anytime we drop a frame for which we would increment po->stats.tp_drops, we also no increment a stats called po->stats.tp_gap. B) Every time we successfully enqueue a frame to sk_receive_queue, we record the value of po->stats.tp_gap in skb->mark. skb->cb would nominally be the place to record this, but since all the space there is used up, we're overloading skb->mark. Its safe to do since any enqueued packet is guaranteed to be unshared at this point, and skb->mark isn't used for anything else in the rx path to the application. After we record tp_gap in the skb, we zero po->stats.tp_gap. This allows us to keep a counter of the number of frames lost between any two enqueued packets C) When the application goes to dequeue a frame from the packet socket, we look at skb->mark for that frame. If it is non-zero, we add a cmsg chunk to the msghdr of level SOL_PACKET and type PACKET_GAPDATA. Its a 32 bit integer that represents the number of frames lost between this packet and the last previous frame received. Note there is a chance that if there is frame loss after a receive, and then the socket is closed, some gap data might be lost. This is covered by the use of the PACKET_AUXDATA socket option, which gives total loss data. With a bit of math, the final gap can be determined that way. I've tested this patch myself, and it works well. Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> include/linux/if_packet.h | 2 ++ net/packet/af_packet.c | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									69ef969409
								
							
						
					
					
						commit
						977750076d
					
				
					 2 changed files with 35 additions and 0 deletions
				
			
		| 
						 | 
					@ -48,11 +48,13 @@ struct sockaddr_ll
 | 
				
			||||||
#define PACKET_RESERVE			12
 | 
					#define PACKET_RESERVE			12
 | 
				
			||||||
#define PACKET_TX_RING			13
 | 
					#define PACKET_TX_RING			13
 | 
				
			||||||
#define PACKET_LOSS			14
 | 
					#define PACKET_LOSS			14
 | 
				
			||||||
 | 
					#define PACKET_GAPDATA			15
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct tpacket_stats
 | 
					struct tpacket_stats
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	unsigned int	tp_packets;
 | 
						unsigned int	tp_packets;
 | 
				
			||||||
	unsigned int	tp_drops;
 | 
						unsigned int	tp_drops;
 | 
				
			||||||
 | 
						unsigned int    tp_gap;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct tpacket_auxdata
 | 
					struct tpacket_auxdata
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -523,6 +523,31 @@ static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
 | 
				
			||||||
	return res;
 | 
						return res;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * If we've lost frames since the last time we queued one to the
 | 
				
			||||||
 | 
					 * sk_receive_queue, we need to record it here.
 | 
				
			||||||
 | 
					 * This must be called under the protection of the socket lock
 | 
				
			||||||
 | 
					 * to prevent racing with other softirqs and user space
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					static inline void record_packet_gap(struct sk_buff *skb,
 | 
				
			||||||
 | 
										struct packet_sock *po)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * We overload the mark field here, since we're about
 | 
				
			||||||
 | 
						 * to enqueue to a receive queue and no body else will
 | 
				
			||||||
 | 
						 * use this field at this point
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						skb->mark = po->stats.tp_gap;
 | 
				
			||||||
 | 
						po->stats.tp_gap = 0;
 | 
				
			||||||
 | 
						return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline __u32 check_packet_gap(struct sk_buff *skb)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return skb->mark;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
   This function makes lazy skb cloning in hope that most of packets
 | 
					   This function makes lazy skb cloning in hope that most of packets
 | 
				
			||||||
   are discarded by BPF.
 | 
					   are discarded by BPF.
 | 
				
			||||||
| 
						 | 
					@ -626,6 +651,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	spin_lock(&sk->sk_receive_queue.lock);
 | 
						spin_lock(&sk->sk_receive_queue.lock);
 | 
				
			||||||
	po->stats.tp_packets++;
 | 
						po->stats.tp_packets++;
 | 
				
			||||||
 | 
						record_packet_gap(skb, po);
 | 
				
			||||||
	__skb_queue_tail(&sk->sk_receive_queue, skb);
 | 
						__skb_queue_tail(&sk->sk_receive_queue, skb);
 | 
				
			||||||
	spin_unlock(&sk->sk_receive_queue.lock);
 | 
						spin_unlock(&sk->sk_receive_queue.lock);
 | 
				
			||||||
	sk->sk_data_ready(sk, skb->len);
 | 
						sk->sk_data_ready(sk, skb->len);
 | 
				
			||||||
| 
						 | 
					@ -634,6 +660,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
 | 
				
			||||||
drop_n_acct:
 | 
					drop_n_acct:
 | 
				
			||||||
	spin_lock(&sk->sk_receive_queue.lock);
 | 
						spin_lock(&sk->sk_receive_queue.lock);
 | 
				
			||||||
	po->stats.tp_drops++;
 | 
						po->stats.tp_drops++;
 | 
				
			||||||
 | 
						po->stats.tp_gap++;
 | 
				
			||||||
	spin_unlock(&sk->sk_receive_queue.lock);
 | 
						spin_unlock(&sk->sk_receive_queue.lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
drop_n_restore:
 | 
					drop_n_restore:
 | 
				
			||||||
| 
						 | 
					@ -811,6 +838,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
ring_is_full:
 | 
					ring_is_full:
 | 
				
			||||||
	po->stats.tp_drops++;
 | 
						po->stats.tp_drops++;
 | 
				
			||||||
 | 
						po->stats.tp_gap++;
 | 
				
			||||||
	spin_unlock(&sk->sk_receive_queue.lock);
 | 
						spin_unlock(&sk->sk_receive_queue.lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	sk->sk_data_ready(sk, 0);
 | 
						sk->sk_data_ready(sk, 0);
 | 
				
			||||||
| 
						 | 
					@ -1418,6 +1446,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 | 
				
			||||||
	struct sk_buff *skb;
 | 
						struct sk_buff *skb;
 | 
				
			||||||
	int copied, err;
 | 
						int copied, err;
 | 
				
			||||||
	struct sockaddr_ll *sll;
 | 
						struct sockaddr_ll *sll;
 | 
				
			||||||
 | 
						__u32 gap;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	err = -EINVAL;
 | 
						err = -EINVAL;
 | 
				
			||||||
	if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
 | 
						if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
 | 
				
			||||||
| 
						 | 
					@ -1496,6 +1525,10 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 | 
				
			||||||
		put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
 | 
							put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						gap = check_packet_gap(skb);
 | 
				
			||||||
 | 
						if (gap)
 | 
				
			||||||
 | 
							put_cmsg(msg, SOL_PACKET, PACKET_GAPDATA, sizeof(__u32), &gap);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 *	Free or return the buffer as appropriate. Again this
 | 
						 *	Free or return the buffer as appropriate. Again this
 | 
				
			||||||
	 *	hides all the races and re-entrancy issues from us.
 | 
						 *	hides all the races and re-entrancy issues from us.
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue