forked from mirrors/linux
		
	bpf: add BPF_CGROUP_SOCK_OPS callback that is executed on every RTT
Performance impact should be minimal because it's under a new BPF_SOCK_OPS_RTT_CB_FLAG flag that has to be explicitly enabled. Suggested-by: Eric Dumazet <edumazet@google.com> Cc: Eric Dumazet <edumazet@google.com> Cc: Priyaranjan Jha <priyarjha@google.com> Cc: Yuchung Cheng <ycheng@google.com> Cc: Soheil Hassas Yeganeh <soheil@google.com> Acked-by: Soheil Hassas Yeganeh <soheil@google.com> Acked-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: Stanislav Fomichev <sdf@google.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
This commit is contained in:
		
							parent
							
								
									d2f5bbbc35
								
							
						
					
					
						commit
						23729ff231
					
				
					 3 changed files with 17 additions and 1 deletions
				
			
		| 
						 | 
					@ -2221,6 +2221,14 @@ static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk)
 | 
				
			||||||
	return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN, 0, NULL) == 1);
 | 
						return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN, 0, NULL) == 1);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline void tcp_bpf_rtt(struct sock *sk)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct tcp_sock *tp = tcp_sk(sk);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RTT_CB_FLAG))
 | 
				
			||||||
 | 
							tcp_call_bpf(sk, BPF_SOCK_OPS_RTT_CB, 0, NULL);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#if IS_ENABLED(CONFIG_SMC)
 | 
					#if IS_ENABLED(CONFIG_SMC)
 | 
				
			||||||
extern struct static_key_false tcp_have_smc;
 | 
					extern struct static_key_false tcp_have_smc;
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1770,6 +1770,7 @@ union bpf_attr {
 | 
				
			||||||
 * 		* **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out)
 | 
					 * 		* **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out)
 | 
				
			||||||
 * 		* **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission)
 | 
					 * 		* **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission)
 | 
				
			||||||
 * 		* **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change)
 | 
					 * 		* **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change)
 | 
				
			||||||
 | 
					 * 		* **BPF_SOCK_OPS_RTT_CB_FLAG** (every RTT)
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * 		Therefore, this function can be used to clear a callback flag by
 | 
					 * 		Therefore, this function can be used to clear a callback flag by
 | 
				
			||||||
 * 		setting the appropriate bit to zero. e.g. to disable the RTO
 | 
					 * 		setting the appropriate bit to zero. e.g. to disable the RTO
 | 
				
			||||||
| 
						 | 
					@ -3314,7 +3315,8 @@ struct bpf_sock_ops {
 | 
				
			||||||
#define BPF_SOCK_OPS_RTO_CB_FLAG	(1<<0)
 | 
					#define BPF_SOCK_OPS_RTO_CB_FLAG	(1<<0)
 | 
				
			||||||
#define BPF_SOCK_OPS_RETRANS_CB_FLAG	(1<<1)
 | 
					#define BPF_SOCK_OPS_RETRANS_CB_FLAG	(1<<1)
 | 
				
			||||||
#define BPF_SOCK_OPS_STATE_CB_FLAG	(1<<2)
 | 
					#define BPF_SOCK_OPS_STATE_CB_FLAG	(1<<2)
 | 
				
			||||||
#define BPF_SOCK_OPS_ALL_CB_FLAGS       0x7		/* Mask of all currently
 | 
					#define BPF_SOCK_OPS_RTT_CB_FLAG	(1<<3)
 | 
				
			||||||
 | 
					#define BPF_SOCK_OPS_ALL_CB_FLAGS       0xF		/* Mask of all currently
 | 
				
			||||||
							 * supported cb flags
 | 
												 * supported cb flags
 | 
				
			||||||
							 */
 | 
												 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -3369,6 +3371,8 @@ enum {
 | 
				
			||||||
	BPF_SOCK_OPS_TCP_LISTEN_CB,	/* Called on listen(2), right after
 | 
						BPF_SOCK_OPS_TCP_LISTEN_CB,	/* Called on listen(2), right after
 | 
				
			||||||
					 * socket transition to LISTEN state.
 | 
										 * socket transition to LISTEN state.
 | 
				
			||||||
					 */
 | 
										 */
 | 
				
			||||||
 | 
						BPF_SOCK_OPS_RTT_CB,		/* Called on every RTT.
 | 
				
			||||||
 | 
										 */
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
 | 
					/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -778,6 +778,8 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
 | 
				
			||||||
				tp->rttvar_us -= (tp->rttvar_us - tp->mdev_max_us) >> 2;
 | 
									tp->rttvar_us -= (tp->rttvar_us - tp->mdev_max_us) >> 2;
 | 
				
			||||||
			tp->rtt_seq = tp->snd_nxt;
 | 
								tp->rtt_seq = tp->snd_nxt;
 | 
				
			||||||
			tp->mdev_max_us = tcp_rto_min_us(sk);
 | 
								tp->mdev_max_us = tcp_rto_min_us(sk);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								tcp_bpf_rtt(sk);
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	} else {
 | 
						} else {
 | 
				
			||||||
		/* no previous measure. */
 | 
							/* no previous measure. */
 | 
				
			||||||
| 
						 | 
					@ -786,6 +788,8 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
 | 
				
			||||||
		tp->rttvar_us = max(tp->mdev_us, tcp_rto_min_us(sk));
 | 
							tp->rttvar_us = max(tp->mdev_us, tcp_rto_min_us(sk));
 | 
				
			||||||
		tp->mdev_max_us = tp->rttvar_us;
 | 
							tp->mdev_max_us = tp->rttvar_us;
 | 
				
			||||||
		tp->rtt_seq = tp->snd_nxt;
 | 
							tp->rtt_seq = tp->snd_nxt;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							tcp_bpf_rtt(sk);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	tp->srtt_us = max(1U, srtt);
 | 
						tp->srtt_us = max(1U, srtt);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue