forked from mirrors/linux
		
	tcp: SOF_TIMESTAMPING_OPT_STATS option for SO_TIMESTAMPING
This patch exports the sender chronograph stats via the socket SO_TIMESTAMPING channel. Currently we can instrument how long a particular application unit of data was queued in TCP by tracking SOF_TIMESTAMPING_TX_SOFTWARE and SOF_TIMESTAMPING_TX_SCHED. Having these sender chronograph stats exported simultaneously along with these timestamps allow further breaking down the various sender limitation. For example, a video server can tell if a particular chunk of video on a connection takes a long time to deliver because TCP was experiencing small receive window. It is not possible to tell before this patch without packet traces. To prepare these stats, the user needs to set SOF_TIMESTAMPING_OPT_STATS and SOF_TIMESTAMPING_OPT_TSONLY flags while requesting other SOF_TIMESTAMPING TX timestamps. When the timestamps are available in the error queue, the stats are returned in a separate control message of type SCM_TIMESTAMPING_OPT_STATS, in a list of TLVs (struct nlattr) of types: TCP_NLA_BUSY_TIME, TCP_NLA_RWND_LIMITED, TCP_NLA_SNDBUF_LIMITED. Unit is microsecond. Signed-off-by: Francis Yan <francisyyan@gmail.com> Signed-off-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com> Acked-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									efd9017416
								
							
						
					
					
						commit
						1c885808e4
					
				
					 20 changed files with 90 additions and 5 deletions
				
			
		|  | @ -182,6 +182,16 @@ SOF_TIMESTAMPING_OPT_TSONLY: | ||||||
|   the timestamp even if sysctl net.core.tstamp_allow_data is 0. |   the timestamp even if sysctl net.core.tstamp_allow_data is 0. | ||||||
|   This option disables SOF_TIMESTAMPING_OPT_CMSG. |   This option disables SOF_TIMESTAMPING_OPT_CMSG. | ||||||
| 
 | 
 | ||||||
|  | SOF_TIMESTAMPING_OPT_STATS: | ||||||
|  | 
 | ||||||
|  |   Optional stats that are obtained along with the transmit timestamps. | ||||||
|  |   It must be used together with SOF_TIMESTAMPING_OPT_TSONLY. When the | ||||||
|  |   transmit timestamp is available, the stats are available in a | ||||||
|  |   separate control message of type SCM_TIMESTAMPING_OPT_STATS, as a | ||||||
|  |   list of TLVs (struct nlattr) of types. These stats allow the | ||||||
|  |   application to associate various transport layer stats with | ||||||
|  |   the transmit timestamps, such as how long a certain block of | ||||||
|  |   data was limited by peer's receiver window. | ||||||
| 
 | 
 | ||||||
| New applications are encouraged to pass SOF_TIMESTAMPING_OPT_ID to | New applications are encouraged to pass SOF_TIMESTAMPING_OPT_ID to | ||||||
| disambiguate timestamps and SOF_TIMESTAMPING_OPT_TSONLY to operate | disambiguate timestamps and SOF_TIMESTAMPING_OPT_TSONLY to operate | ||||||
|  |  | ||||||
|  | @ -97,4 +97,6 @@ | ||||||
| 
 | 
 | ||||||
| #define SO_CNX_ADVICE		53 | #define SO_CNX_ADVICE		53 | ||||||
| 
 | 
 | ||||||
|  | #define SCM_TIMESTAMPING_OPT_STATS	54 | ||||||
|  | 
 | ||||||
| #endif /* _UAPI_ASM_SOCKET_H */ | #endif /* _UAPI_ASM_SOCKET_H */ | ||||||
|  |  | ||||||
|  | @ -90,5 +90,7 @@ | ||||||
| 
 | 
 | ||||||
| #define SO_CNX_ADVICE		53 | #define SO_CNX_ADVICE		53 | ||||||
| 
 | 
 | ||||||
|  | #define SCM_TIMESTAMPING_OPT_STATS	54 | ||||||
|  | 
 | ||||||
| #endif /* _ASM_SOCKET_H */ | #endif /* _ASM_SOCKET_H */ | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -99,4 +99,6 @@ | ||||||
| 
 | 
 | ||||||
| #define SO_CNX_ADVICE		53 | #define SO_CNX_ADVICE		53 | ||||||
| 
 | 
 | ||||||
|  | #define SCM_TIMESTAMPING_OPT_STATS	54 | ||||||
|  | 
 | ||||||
| #endif /* _ASM_IA64_SOCKET_H */ | #endif /* _ASM_IA64_SOCKET_H */ | ||||||
|  |  | ||||||
|  | @ -90,4 +90,6 @@ | ||||||
| 
 | 
 | ||||||
| #define SO_CNX_ADVICE		53 | #define SO_CNX_ADVICE		53 | ||||||
| 
 | 
 | ||||||
|  | #define SCM_TIMESTAMPING_OPT_STATS	54 | ||||||
|  | 
 | ||||||
| #endif /* _ASM_M32R_SOCKET_H */ | #endif /* _ASM_M32R_SOCKET_H */ | ||||||
|  |  | ||||||
|  | @ -108,4 +108,6 @@ | ||||||
| 
 | 
 | ||||||
| #define SO_CNX_ADVICE		53 | #define SO_CNX_ADVICE		53 | ||||||
| 
 | 
 | ||||||
|  | #define SCM_TIMESTAMPING_OPT_STATS	54 | ||||||
|  | 
 | ||||||
| #endif /* _UAPI_ASM_SOCKET_H */ | #endif /* _UAPI_ASM_SOCKET_H */ | ||||||
|  |  | ||||||
|  | @ -90,4 +90,6 @@ | ||||||
| 
 | 
 | ||||||
| #define SO_CNX_ADVICE		53 | #define SO_CNX_ADVICE		53 | ||||||
| 
 | 
 | ||||||
|  | #define SCM_TIMESTAMPING_OPT_STATS	54 | ||||||
|  | 
 | ||||||
| #endif /* _ASM_SOCKET_H */ | #endif /* _ASM_SOCKET_H */ | ||||||
|  |  | ||||||
|  | @ -89,4 +89,6 @@ | ||||||
| 
 | 
 | ||||||
| #define SO_CNX_ADVICE		0x402E | #define SO_CNX_ADVICE		0x402E | ||||||
| 
 | 
 | ||||||
|  | #define SCM_TIMESTAMPING_OPT_STATS	0x402F | ||||||
|  | 
 | ||||||
| #endif /* _UAPI_ASM_SOCKET_H */ | #endif /* _UAPI_ASM_SOCKET_H */ | ||||||
|  |  | ||||||
|  | @ -97,4 +97,6 @@ | ||||||
| 
 | 
 | ||||||
| #define SO_CNX_ADVICE		53 | #define SO_CNX_ADVICE		53 | ||||||
| 
 | 
 | ||||||
|  | #define SCM_TIMESTAMPING_OPT_STATS	54 | ||||||
|  | 
 | ||||||
| #endif	/* _ASM_POWERPC_SOCKET_H */ | #endif	/* _ASM_POWERPC_SOCKET_H */ | ||||||
|  |  | ||||||
|  | @ -96,4 +96,6 @@ | ||||||
| 
 | 
 | ||||||
| #define SO_CNX_ADVICE		53 | #define SO_CNX_ADVICE		53 | ||||||
| 
 | 
 | ||||||
|  | #define SCM_TIMESTAMPING_OPT_STATS	54 | ||||||
|  | 
 | ||||||
| #endif /* _ASM_SOCKET_H */ | #endif /* _ASM_SOCKET_H */ | ||||||
|  |  | ||||||
|  | @ -86,6 +86,8 @@ | ||||||
| 
 | 
 | ||||||
| #define SO_CNX_ADVICE		0x0037 | #define SO_CNX_ADVICE		0x0037 | ||||||
| 
 | 
 | ||||||
|  | #define SCM_TIMESTAMPING_OPT_STATS	0x0038 | ||||||
|  | 
 | ||||||
| /* Security levels - as per NRL IPv6 - don't actually do anything */ | /* Security levels - as per NRL IPv6 - don't actually do anything */ | ||||||
| #define SO_SECURITY_AUTHENTICATION		0x5001 | #define SO_SECURITY_AUTHENTICATION		0x5001 | ||||||
| #define SO_SECURITY_ENCRYPTION_TRANSPORT	0x5002 | #define SO_SECURITY_ENCRYPTION_TRANSPORT	0x5002 | ||||||
|  |  | ||||||
|  | @ -101,4 +101,6 @@ | ||||||
| 
 | 
 | ||||||
| #define SO_CNX_ADVICE		53 | #define SO_CNX_ADVICE		53 | ||||||
| 
 | 
 | ||||||
|  | #define SCM_TIMESTAMPING_OPT_STATS	54 | ||||||
|  | 
 | ||||||
| #endif	/* _XTENSA_SOCKET_H */ | #endif	/* _XTENSA_SOCKET_H */ | ||||||
|  |  | ||||||
|  | @ -428,4 +428,6 @@ static inline void tcp_saved_syn_free(struct tcp_sock *tp) | ||||||
| 	tp->saved_syn = NULL; | 	tp->saved_syn = NULL; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk); | ||||||
|  | 
 | ||||||
| #endif	/* _LINUX_TCP_H */ | #endif	/* _LINUX_TCP_H */ | ||||||
|  |  | ||||||
|  | @ -92,4 +92,6 @@ | ||||||
| 
 | 
 | ||||||
| #define SO_CNX_ADVICE		53 | #define SO_CNX_ADVICE		53 | ||||||
| 
 | 
 | ||||||
|  | #define SCM_TIMESTAMPING_OPT_STATS	54 | ||||||
|  | 
 | ||||||
| #endif /* __ASM_GENERIC_SOCKET_H */ | #endif /* __ASM_GENERIC_SOCKET_H */ | ||||||
|  |  | ||||||
|  | @ -25,8 +25,9 @@ enum { | ||||||
| 	SOF_TIMESTAMPING_TX_ACK = (1<<9), | 	SOF_TIMESTAMPING_TX_ACK = (1<<9), | ||||||
| 	SOF_TIMESTAMPING_OPT_CMSG = (1<<10), | 	SOF_TIMESTAMPING_OPT_CMSG = (1<<10), | ||||||
| 	SOF_TIMESTAMPING_OPT_TSONLY = (1<<11), | 	SOF_TIMESTAMPING_OPT_TSONLY = (1<<11), | ||||||
|  | 	SOF_TIMESTAMPING_OPT_STATS = (1<<12), | ||||||
| 
 | 
 | ||||||
| 	SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_TSONLY, | 	SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_STATS, | ||||||
| 	SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) | | 	SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) | | ||||||
| 				 SOF_TIMESTAMPING_LAST | 				 SOF_TIMESTAMPING_LAST | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | @ -220,6 +220,14 @@ struct tcp_info { | ||||||
| 	__u64	tcpi_sndbuf_limited; /* Time (usec) limited by send buffer */ | 	__u64	tcpi_sndbuf_limited; /* Time (usec) limited by send buffer */ | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | /* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */ | ||||||
|  | enum { | ||||||
|  | 	TCP_NLA_PAD, | ||||||
|  | 	TCP_NLA_BUSY,		/* Time (usec) busy sending data */ | ||||||
|  | 	TCP_NLA_RWND_LIMITED,	/* Time (usec) limited by receive window */ | ||||||
|  | 	TCP_NLA_SNDBUF_LIMITED,	/* Time (usec) limited by send buffer */ | ||||||
|  | }; | ||||||
|  | 
 | ||||||
| /* for TCP_MD5SIG socket option */ | /* for TCP_MD5SIG socket option */ | ||||||
| #define TCP_MD5SIG_MAXKEYLEN	80 | #define TCP_MD5SIG_MAXKEYLEN	80 | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -3839,10 +3839,18 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb, | ||||||
| 	if (!skb_may_tx_timestamp(sk, tsonly)) | 	if (!skb_may_tx_timestamp(sk, tsonly)) | ||||||
| 		return; | 		return; | ||||||
| 
 | 
 | ||||||
| 	if (tsonly) | 	if (tsonly) { | ||||||
| 		skb = alloc_skb(0, GFP_ATOMIC); | #ifdef CONFIG_INET | ||||||
| 	else | 		if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) && | ||||||
|  | 		    sk->sk_protocol == IPPROTO_TCP && | ||||||
|  | 		    sk->sk_type == SOCK_STREAM) | ||||||
|  | 			skb = tcp_get_timestamping_opt_stats(sk); | ||||||
|  | 		else | ||||||
|  | #endif | ||||||
|  | 			skb = alloc_skb(0, GFP_ATOMIC); | ||||||
|  | 	} else { | ||||||
| 		skb = skb_clone(orig_skb, GFP_ATOMIC); | 		skb = skb_clone(orig_skb, GFP_ATOMIC); | ||||||
|  | 	} | ||||||
| 	if (!skb) | 	if (!skb) | ||||||
| 		return; | 		return; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -854,6 +854,13 @@ int sock_setsockopt(struct socket *sock, int level, int optname, | ||||||
| 				sk->sk_tskey = 0; | 				sk->sk_tskey = 0; | ||||||
| 			} | 			} | ||||||
| 		} | 		} | ||||||
|  | 
 | ||||||
|  | 		if (val & SOF_TIMESTAMPING_OPT_STATS && | ||||||
|  | 		    !(val & SOF_TIMESTAMPING_OPT_TSONLY)) { | ||||||
|  | 			ret = -EINVAL; | ||||||
|  | 			break; | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
| 		sk->sk_tsflags = val; | 		sk->sk_tsflags = val; | ||||||
| 		if (val & SOF_TIMESTAMPING_RX_SOFTWARE) | 		if (val & SOF_TIMESTAMPING_RX_SOFTWARE) | ||||||
| 			sock_enable_timestamp(sk, | 			sock_enable_timestamp(sk, | ||||||
|  |  | ||||||
|  | @ -2841,6 +2841,26 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) | ||||||
| } | } | ||||||
| EXPORT_SYMBOL_GPL(tcp_get_info); | EXPORT_SYMBOL_GPL(tcp_get_info); | ||||||
| 
 | 
 | ||||||
|  | struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk) | ||||||
|  | { | ||||||
|  | 	const struct tcp_sock *tp = tcp_sk(sk); | ||||||
|  | 	struct sk_buff *stats; | ||||||
|  | 	struct tcp_info info; | ||||||
|  | 
 | ||||||
|  | 	stats = alloc_skb(3 * nla_total_size_64bit(sizeof(u64)), GFP_ATOMIC); | ||||||
|  | 	if (!stats) | ||||||
|  | 		return NULL; | ||||||
|  | 
 | ||||||
|  | 	tcp_get_info_chrono_stats(tp, &info); | ||||||
|  | 	nla_put_u64_64bit(stats, TCP_NLA_BUSY, | ||||||
|  | 			  info.tcpi_busy_time, TCP_NLA_PAD); | ||||||
|  | 	nla_put_u64_64bit(stats, TCP_NLA_RWND_LIMITED, | ||||||
|  | 			  info.tcpi_rwnd_limited, TCP_NLA_PAD); | ||||||
|  | 	nla_put_u64_64bit(stats, TCP_NLA_SNDBUF_LIMITED, | ||||||
|  | 			  info.tcpi_sndbuf_limited, TCP_NLA_PAD); | ||||||
|  | 	return stats; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static int do_tcp_getsockopt(struct sock *sk, int level, | static int do_tcp_getsockopt(struct sock *sk, int level, | ||||||
| 		int optname, char __user *optval, int __user *optlen) | 		int optname, char __user *optval, int __user *optlen) | ||||||
| { | { | ||||||
|  |  | ||||||
|  | @ -693,9 +693,14 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, | ||||||
| 	    (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) && | 	    (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) && | ||||||
| 	    ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2)) | 	    ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2)) | ||||||
| 		empty = 0; | 		empty = 0; | ||||||
| 	if (!empty) | 	if (!empty) { | ||||||
| 		put_cmsg(msg, SOL_SOCKET, | 		put_cmsg(msg, SOL_SOCKET, | ||||||
| 			 SCM_TIMESTAMPING, sizeof(tss), &tss); | 			 SCM_TIMESTAMPING, sizeof(tss), &tss); | ||||||
|  | 
 | ||||||
|  | 		if (skb->len && (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS)) | ||||||
|  | 			put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS, | ||||||
|  | 				 skb->len, skb->data); | ||||||
|  | 	} | ||||||
| } | } | ||||||
| EXPORT_SYMBOL_GPL(__sock_recv_timestamp); | EXPORT_SYMBOL_GPL(__sock_recv_timestamp); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Francis Yan
						Francis Yan