mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	bpf: Add support for changing congestion control
Added support for changing congestion control for SOCK_OPS bpf programs through the setsockopt bpf helper function. It also adds a new SOCK_OPS op, BPF_SOCK_OPS_NEEDS_ECN, that is needed for congestion controls, like dctcp, that need to enable ECN in the SYN packets. Signed-off-by: Lawrence Brakmo <brakmo@fb.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									d9925368a6
								
							
						
					
					
						commit
						91b5b21c7c
					
				
					 7 changed files with 58 additions and 17 deletions
				
			
		| 
						 | 
				
			
			@ -1004,7 +1004,9 @@ void tcp_get_default_congestion_control(char *name);
 | 
			
		|||
void tcp_get_available_congestion_control(char *buf, size_t len);
 | 
			
		||||
void tcp_get_allowed_congestion_control(char *buf, size_t len);
 | 
			
		||||
int tcp_set_allowed_congestion_control(char *allowed);
 | 
			
		||||
int tcp_set_congestion_control(struct sock *sk, const char *name);
 | 
			
		||||
int tcp_set_congestion_control(struct sock *sk, const char *name, bool load);
 | 
			
		||||
void tcp_reinit_congestion_control(struct sock *sk,
 | 
			
		||||
				   const struct tcp_congestion_ops *ca);
 | 
			
		||||
u32 tcp_slow_start(struct tcp_sock *tp, u32 acked);
 | 
			
		||||
void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -2078,4 +2080,9 @@ static inline u32 tcp_rwnd_init_bpf(struct sock *sk)
 | 
			
		|||
		rwnd = 0;
 | 
			
		||||
	return rwnd;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk)
 | 
			
		||||
{
 | 
			
		||||
	return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN) == 1);
 | 
			
		||||
}
 | 
			
		||||
#endif	/* _TCP_H */
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -778,6 +778,9 @@ enum {
 | 
			
		|||
						 * passive connection is
 | 
			
		||||
						 * established
 | 
			
		||||
						 */
 | 
			
		||||
	BPF_SOCK_OPS_NEEDS_ECN,		/* If connection's congestion control
 | 
			
		||||
					 * needs ECN
 | 
			
		||||
					 */
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#endif /* _UAPI__LINUX_BPF_H__ */
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -2719,8 +2719,24 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
 | 
			
		|||
		}
 | 
			
		||||
	} else if (level == SOL_TCP &&
 | 
			
		||||
		   sk->sk_prot->setsockopt == tcp_setsockopt) {
 | 
			
		||||
		/* Place holder */
 | 
			
		||||
#ifdef CONFIG_INET
 | 
			
		||||
		if (optname == TCP_CONGESTION) {
 | 
			
		||||
			char name[TCP_CA_NAME_MAX];
 | 
			
		||||
 | 
			
		||||
			strncpy(name, optval, min_t(long, optlen,
 | 
			
		||||
						    TCP_CA_NAME_MAX-1));
 | 
			
		||||
			name[TCP_CA_NAME_MAX-1] = 0;
 | 
			
		||||
			ret = tcp_set_congestion_control(sk, name, false);
 | 
			
		||||
			if (!ret && bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN)
 | 
			
		||||
				/* replacing an existing ca */
 | 
			
		||||
				tcp_reinit_congestion_control(sk,
 | 
			
		||||
					inet_csk(sk)->icsk_ca_ops);
 | 
			
		||||
		} else {
 | 
			
		||||
			ret = -EINVAL;
 | 
			
		||||
		}
 | 
			
		||||
#else
 | 
			
		||||
		ret = -EINVAL;
 | 
			
		||||
#endif
 | 
			
		||||
	} else {
 | 
			
		||||
		ret = -EINVAL;
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -2481,7 +2481,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 | 
			
		|||
		name[val] = 0;
 | 
			
		||||
 | 
			
		||||
		lock_sock(sk);
 | 
			
		||||
		err = tcp_set_congestion_control(sk, name);
 | 
			
		||||
		err = tcp_set_congestion_control(sk, name, true);
 | 
			
		||||
		release_sock(sk);
 | 
			
		||||
		return err;
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -189,7 +189,7 @@ void tcp_init_congestion_control(struct sock *sk)
 | 
			
		|||
		INET_ECN_dontxmit(sk);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void tcp_reinit_congestion_control(struct sock *sk,
 | 
			
		||||
void tcp_reinit_congestion_control(struct sock *sk,
 | 
			
		||||
				   const struct tcp_congestion_ops *ca)
 | 
			
		||||
{
 | 
			
		||||
	struct inet_connection_sock *icsk = inet_csk(sk);
 | 
			
		||||
| 
						 | 
				
			
			@ -333,8 +333,12 @@ int tcp_set_allowed_congestion_control(char *val)
 | 
			
		|||
	return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* Change congestion control for socket */
 | 
			
		||||
int tcp_set_congestion_control(struct sock *sk, const char *name)
 | 
			
		||||
/* Change congestion control for socket. If load is false, then it is the
 | 
			
		||||
 * responsibility of the caller to call tcp_init_congestion_control or
 | 
			
		||||
 * tcp_reinit_congestion_control (if the current congestion control was
 | 
			
		||||
 * already initialized.
 | 
			
		||||
 */
 | 
			
		||||
int tcp_set_congestion_control(struct sock *sk, const char *name, bool load)
 | 
			
		||||
{
 | 
			
		||||
	struct inet_connection_sock *icsk = inet_csk(sk);
 | 
			
		||||
	const struct tcp_congestion_ops *ca;
 | 
			
		||||
| 
						 | 
				
			
			@ -344,21 +348,29 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
 | 
			
		|||
		return -EPERM;
 | 
			
		||||
 | 
			
		||||
	rcu_read_lock();
 | 
			
		||||
	if (!load)
 | 
			
		||||
		ca = tcp_ca_find(name);
 | 
			
		||||
	else
 | 
			
		||||
		ca = __tcp_ca_find_autoload(name);
 | 
			
		||||
	/* No change asking for existing value */
 | 
			
		||||
	if (ca == icsk->icsk_ca_ops) {
 | 
			
		||||
		icsk->icsk_ca_setsockopt = 1;
 | 
			
		||||
		goto out;
 | 
			
		||||
	}
 | 
			
		||||
	if (!ca)
 | 
			
		||||
	if (!ca) {
 | 
			
		||||
		err = -ENOENT;
 | 
			
		||||
	else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) ||
 | 
			
		||||
		   ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)))
 | 
			
		||||
		err = -EPERM;
 | 
			
		||||
	else if (!try_module_get(ca->owner))
 | 
			
		||||
	} else if (!load) {
 | 
			
		||||
		icsk->icsk_ca_ops = ca;
 | 
			
		||||
		if (!try_module_get(ca->owner))
 | 
			
		||||
			err = -EBUSY;
 | 
			
		||||
	else
 | 
			
		||||
	} else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) ||
 | 
			
		||||
		     ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))) {
 | 
			
		||||
		err = -EPERM;
 | 
			
		||||
	} else if (!try_module_get(ca->owner)) {
 | 
			
		||||
		err = -EBUSY;
 | 
			
		||||
	} else {
 | 
			
		||||
		tcp_reinit_congestion_control(sk, ca);
 | 
			
		||||
	}
 | 
			
		||||
 out:
 | 
			
		||||
	rcu_read_unlock();
 | 
			
		||||
	return err;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -6191,7 +6191,8 @@ static void tcp_ecn_create_request(struct request_sock *req,
 | 
			
		|||
	ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst;
 | 
			
		||||
 | 
			
		||||
	if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk) ||
 | 
			
		||||
	    (ecn_ok_dst & DST_FEATURE_ECN_CA))
 | 
			
		||||
	    (ecn_ok_dst & DST_FEATURE_ECN_CA) ||
 | 
			
		||||
	    tcp_bpf_ca_needs_ecn((struct sock *)req))
 | 
			
		||||
		inet_rsk(req)->ecn_ok = 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -316,7 +316,8 @@ static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb)
 | 
			
		|||
	TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR;
 | 
			
		||||
	if (!(tp->ecn_flags & TCP_ECN_OK))
 | 
			
		||||
		TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
 | 
			
		||||
	else if (tcp_ca_needs_ecn(sk))
 | 
			
		||||
	else if (tcp_ca_needs_ecn(sk) ||
 | 
			
		||||
		 tcp_bpf_ca_needs_ecn(sk))
 | 
			
		||||
		INET_ECN_xmit(sk);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -324,8 +325,9 @@ static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb)
 | 
			
		|||
static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
 | 
			
		||||
{
 | 
			
		||||
	struct tcp_sock *tp = tcp_sk(sk);
 | 
			
		||||
	bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk);
 | 
			
		||||
	bool use_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 ||
 | 
			
		||||
		       tcp_ca_needs_ecn(sk);
 | 
			
		||||
		tcp_ca_needs_ecn(sk) || bpf_needs_ecn;
 | 
			
		||||
 | 
			
		||||
	if (!use_ecn) {
 | 
			
		||||
		const struct dst_entry *dst = __sk_dst_get(sk);
 | 
			
		||||
| 
						 | 
				
			
			@ -339,7 +341,7 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
 | 
			
		|||
	if (use_ecn) {
 | 
			
		||||
		TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
 | 
			
		||||
		tp->ecn_flags = TCP_ECN_OK;
 | 
			
		||||
		if (tcp_ca_needs_ecn(sk))
 | 
			
		||||
		if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn)
 | 
			
		||||
			INET_ECN_xmit(sk);
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue