forked from mirrors/linux
		
	bpf: Add support for changing congestion control
Added support for changing congestion control for SOCK_OPS bpf programs through the setsockopt bpf helper function. It also adds a new SOCK_OPS op, BPF_SOCK_OPS_NEEDS_ECN, that is needed for congestion controls, like dctcp, that need to enable ECN in the SYN packets. Signed-off-by: Lawrence Brakmo <brakmo@fb.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									d9925368a6
								
							
						
					
					
						commit
						91b5b21c7c
					
				
					 7 changed files with 58 additions and 17 deletions
				
			
		|  | @ -1004,7 +1004,9 @@ void tcp_get_default_congestion_control(char *name); | |||
| void tcp_get_available_congestion_control(char *buf, size_t len); | ||||
| void tcp_get_allowed_congestion_control(char *buf, size_t len); | ||||
| int tcp_set_allowed_congestion_control(char *allowed); | ||||
| int tcp_set_congestion_control(struct sock *sk, const char *name); | ||||
| int tcp_set_congestion_control(struct sock *sk, const char *name, bool load); | ||||
| void tcp_reinit_congestion_control(struct sock *sk, | ||||
| 				   const struct tcp_congestion_ops *ca); | ||||
| u32 tcp_slow_start(struct tcp_sock *tp, u32 acked); | ||||
| void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked); | ||||
| 
 | ||||
|  | @ -2078,4 +2080,9 @@ static inline u32 tcp_rwnd_init_bpf(struct sock *sk) | |||
| 		rwnd = 0; | ||||
| 	return rwnd; | ||||
| } | ||||
| 
 | ||||
| static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk) | ||||
| { | ||||
| 	return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN) == 1); | ||||
| } | ||||
| #endif	/* _TCP_H */ | ||||
|  |  | |||
|  | @ -778,6 +778,9 @@ enum { | |||
| 						 * passive connection is | ||||
| 						 * established | ||||
| 						 */ | ||||
| 	BPF_SOCK_OPS_NEEDS_ECN,		/* If connection's congestion control
 | ||||
| 					 * needs ECN | ||||
| 					 */ | ||||
| }; | ||||
| 
 | ||||
| #endif /* _UAPI__LINUX_BPF_H__ */ | ||||
|  |  | |||
|  | @ -2719,8 +2719,24 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, | |||
| 		} | ||||
| 	} else if (level == SOL_TCP && | ||||
| 		   sk->sk_prot->setsockopt == tcp_setsockopt) { | ||||
| 		/* Place holder */ | ||||
| #ifdef CONFIG_INET | ||||
| 		if (optname == TCP_CONGESTION) { | ||||
| 			char name[TCP_CA_NAME_MAX]; | ||||
| 
 | ||||
| 			strncpy(name, optval, min_t(long, optlen, | ||||
| 						    TCP_CA_NAME_MAX-1)); | ||||
| 			name[TCP_CA_NAME_MAX-1] = 0; | ||||
| 			ret = tcp_set_congestion_control(sk, name, false); | ||||
| 			if (!ret && bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN) | ||||
| 				/* replacing an existing ca */ | ||||
| 				tcp_reinit_congestion_control(sk, | ||||
| 					inet_csk(sk)->icsk_ca_ops); | ||||
| 		} else { | ||||
| 			ret = -EINVAL; | ||||
| 		} | ||||
| #else | ||||
| 		ret = -EINVAL; | ||||
| #endif | ||||
| 	} else { | ||||
| 		ret = -EINVAL; | ||||
| 	} | ||||
|  |  | |||
|  | @ -2481,7 +2481,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
| 		name[val] = 0; | ||||
| 
 | ||||
| 		lock_sock(sk); | ||||
| 		err = tcp_set_congestion_control(sk, name); | ||||
| 		err = tcp_set_congestion_control(sk, name, true); | ||||
| 		release_sock(sk); | ||||
| 		return err; | ||||
| 	} | ||||
|  |  | |||
|  | @ -189,8 +189,8 @@ void tcp_init_congestion_control(struct sock *sk) | |||
| 		INET_ECN_dontxmit(sk); | ||||
| } | ||||
| 
 | ||||
| static void tcp_reinit_congestion_control(struct sock *sk, | ||||
| 					  const struct tcp_congestion_ops *ca) | ||||
| void tcp_reinit_congestion_control(struct sock *sk, | ||||
| 				   const struct tcp_congestion_ops *ca) | ||||
| { | ||||
| 	struct inet_connection_sock *icsk = inet_csk(sk); | ||||
| 
 | ||||
|  | @ -333,8 +333,12 @@ int tcp_set_allowed_congestion_control(char *val) | |||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| /* Change congestion control for socket */ | ||||
| int tcp_set_congestion_control(struct sock *sk, const char *name) | ||||
| /* Change congestion control for socket. If load is false, then it is the
 | ||||
|  * responsibility of the caller to call tcp_init_congestion_control or | ||||
|  * tcp_reinit_congestion_control (if the current congestion control was | ||||
|  * already initialized. | ||||
|  */ | ||||
| int tcp_set_congestion_control(struct sock *sk, const char *name, bool load) | ||||
| { | ||||
| 	struct inet_connection_sock *icsk = inet_csk(sk); | ||||
| 	const struct tcp_congestion_ops *ca; | ||||
|  | @ -344,21 +348,29 @@ int tcp_set_congestion_control(struct sock *sk, const char *name) | |||
| 		return -EPERM; | ||||
| 
 | ||||
| 	rcu_read_lock(); | ||||
| 	ca = __tcp_ca_find_autoload(name); | ||||
| 	if (!load) | ||||
| 		ca = tcp_ca_find(name); | ||||
| 	else | ||||
| 		ca = __tcp_ca_find_autoload(name); | ||||
| 	/* No change asking for existing value */ | ||||
| 	if (ca == icsk->icsk_ca_ops) { | ||||
| 		icsk->icsk_ca_setsockopt = 1; | ||||
| 		goto out; | ||||
| 	} | ||||
| 	if (!ca) | ||||
| 	if (!ca) { | ||||
| 		err = -ENOENT; | ||||
| 	else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || | ||||
| 		   ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))) | ||||
| 	} else if (!load) { | ||||
| 		icsk->icsk_ca_ops = ca; | ||||
| 		if (!try_module_get(ca->owner)) | ||||
| 			err = -EBUSY; | ||||
| 	} else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || | ||||
| 		     ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))) { | ||||
| 		err = -EPERM; | ||||
| 	else if (!try_module_get(ca->owner)) | ||||
| 	} else if (!try_module_get(ca->owner)) { | ||||
| 		err = -EBUSY; | ||||
| 	else | ||||
| 	} else { | ||||
| 		tcp_reinit_congestion_control(sk, ca); | ||||
| 	} | ||||
|  out: | ||||
| 	rcu_read_unlock(); | ||||
| 	return err; | ||||
|  |  | |||
|  | @ -6191,7 +6191,8 @@ static void tcp_ecn_create_request(struct request_sock *req, | |||
| 	ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst; | ||||
| 
 | ||||
| 	if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk) || | ||||
| 	    (ecn_ok_dst & DST_FEATURE_ECN_CA)) | ||||
| 	    (ecn_ok_dst & DST_FEATURE_ECN_CA) || | ||||
| 	    tcp_bpf_ca_needs_ecn((struct sock *)req)) | ||||
| 		inet_rsk(req)->ecn_ok = 1; | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -316,7 +316,8 @@ static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb) | |||
| 	TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR; | ||||
| 	if (!(tp->ecn_flags & TCP_ECN_OK)) | ||||
| 		TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE; | ||||
| 	else if (tcp_ca_needs_ecn(sk)) | ||||
| 	else if (tcp_ca_needs_ecn(sk) || | ||||
| 		 tcp_bpf_ca_needs_ecn(sk)) | ||||
| 		INET_ECN_xmit(sk); | ||||
| } | ||||
| 
 | ||||
|  | @ -324,8 +325,9 @@ static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb) | |||
| static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) | ||||
| { | ||||
| 	struct tcp_sock *tp = tcp_sk(sk); | ||||
| 	bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk); | ||||
| 	bool use_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 || | ||||
| 		       tcp_ca_needs_ecn(sk); | ||||
| 		tcp_ca_needs_ecn(sk) || bpf_needs_ecn; | ||||
| 
 | ||||
| 	if (!use_ecn) { | ||||
| 		const struct dst_entry *dst = __sk_dst_get(sk); | ||||
|  | @ -339,7 +341,7 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) | |||
| 	if (use_ecn) { | ||||
| 		TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR; | ||||
| 		tp->ecn_flags = TCP_ECN_OK; | ||||
| 		if (tcp_ca_needs_ecn(sk)) | ||||
| 		if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn) | ||||
| 			INET_ECN_xmit(sk); | ||||
| 	} | ||||
| } | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Lawrence Brakmo
						Lawrence Brakmo