forked from mirrors/linux
		
	net: sk_add_backlog() take rmem_alloc into account
Current socket backlog limit is not enough to really stop DDOS attacks, because user thread spend many time to process a full backlog each round, and user might crazy spin on socket lock. We should add backlog size and receive_queue size (aka rmem_alloc) to pace writers, and let user run without being slow down too much. Introduce a sk_rcvqueues_full() helper, to avoid taking socket lock in stress situations. Under huge stress from a multiqueue/RPS enabled NIC, a single flow udp receiver can now process ~200.000 pps (instead of ~100 pps before the patch) on a 8 core machine. Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									6e7676c1a7
								
							
						
					
					
						commit
						c377411f24
					
				
					 5 changed files with 27 additions and 6 deletions
				
			
		| 
						 | 
					@ -256,7 +256,6 @@ struct sock {
 | 
				
			||||||
		struct sk_buff *head;
 | 
							struct sk_buff *head;
 | 
				
			||||||
		struct sk_buff *tail;
 | 
							struct sk_buff *tail;
 | 
				
			||||||
		int len;
 | 
							int len;
 | 
				
			||||||
		int limit;
 | 
					 | 
				
			||||||
	} sk_backlog;
 | 
						} sk_backlog;
 | 
				
			||||||
	wait_queue_head_t	*sk_sleep;
 | 
						wait_queue_head_t	*sk_sleep;
 | 
				
			||||||
	struct dst_entry	*sk_dst_cache;
 | 
						struct dst_entry	*sk_dst_cache;
 | 
				
			||||||
| 
						 | 
					@ -608,10 +607,20 @@ static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb)
 | 
				
			||||||
	skb->next = NULL;
 | 
						skb->next = NULL;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Take into account size of receive queue and backlog queue
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					static inline bool sk_rcvqueues_full(const struct sock *sk, const struct sk_buff *skb)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						unsigned int qsize = sk->sk_backlog.len + atomic_read(&sk->sk_rmem_alloc);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return qsize + skb->truesize > sk->sk_rcvbuf;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* The per-socket spinlock must be held here. */
 | 
					/* The per-socket spinlock must be held here. */
 | 
				
			||||||
static inline __must_check int sk_add_backlog(struct sock *sk, struct sk_buff *skb)
 | 
					static inline __must_check int sk_add_backlog(struct sock *sk, struct sk_buff *skb)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	if (sk->sk_backlog.len >= max(sk->sk_backlog.limit, sk->sk_rcvbuf << 1))
 | 
						if (sk_rcvqueues_full(sk, skb))
 | 
				
			||||||
		return -ENOBUFS;
 | 
							return -ENOBUFS;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	__sk_add_backlog(sk, skb);
 | 
						__sk_add_backlog(sk, skb);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -327,6 +327,10 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	skb->dev = NULL;
 | 
						skb->dev = NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (sk_rcvqueues_full(sk, skb)) {
 | 
				
			||||||
 | 
							atomic_inc(&sk->sk_drops);
 | 
				
			||||||
 | 
							goto discard_and_relse;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
	if (nested)
 | 
						if (nested)
 | 
				
			||||||
		bh_lock_sock_nested(sk);
 | 
							bh_lock_sock_nested(sk);
 | 
				
			||||||
	else
 | 
						else
 | 
				
			||||||
| 
						 | 
					@ -1885,7 +1889,6 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 | 
				
			||||||
	sk->sk_allocation	=	GFP_KERNEL;
 | 
						sk->sk_allocation	=	GFP_KERNEL;
 | 
				
			||||||
	sk->sk_rcvbuf		=	sysctl_rmem_default;
 | 
						sk->sk_rcvbuf		=	sysctl_rmem_default;
 | 
				
			||||||
	sk->sk_sndbuf		=	sysctl_wmem_default;
 | 
						sk->sk_sndbuf		=	sysctl_wmem_default;
 | 
				
			||||||
	sk->sk_backlog.limit	=	sk->sk_rcvbuf << 1;
 | 
					 | 
				
			||||||
	sk->sk_state		=	TCP_CLOSE;
 | 
						sk->sk_state		=	TCP_CLOSE;
 | 
				
			||||||
	sk_set_socket(sk, sock);
 | 
						sk_set_socket(sk, sock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1372,6 +1372,10 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 | 
				
			||||||
			goto drop;
 | 
								goto drop;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (sk_rcvqueues_full(sk, skb))
 | 
				
			||||||
 | 
							goto drop;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	rc = 0;
 | 
						rc = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	bh_lock_sock(sk);
 | 
						bh_lock_sock(sk);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -584,6 +584,10 @@ static void flush_stack(struct sock **stack, unsigned int count,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		sk = stack[i];
 | 
							sk = stack[i];
 | 
				
			||||||
		if (skb1) {
 | 
							if (skb1) {
 | 
				
			||||||
 | 
								if (sk_rcvqueues_full(sk, skb)) {
 | 
				
			||||||
 | 
									kfree_skb(skb1);
 | 
				
			||||||
 | 
									goto drop;
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
			bh_lock_sock(sk);
 | 
								bh_lock_sock(sk);
 | 
				
			||||||
			if (!sock_owned_by_user(sk))
 | 
								if (!sock_owned_by_user(sk))
 | 
				
			||||||
				udpv6_queue_rcv_skb(sk, skb1);
 | 
									udpv6_queue_rcv_skb(sk, skb1);
 | 
				
			||||||
| 
						 | 
					@ -759,6 +763,10 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* deliver */
 | 
						/* deliver */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (sk_rcvqueues_full(sk, skb)) {
 | 
				
			||||||
 | 
							sock_put(sk);
 | 
				
			||||||
 | 
							goto discard;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
	bh_lock_sock(sk);
 | 
						bh_lock_sock(sk);
 | 
				
			||||||
	if (!sock_owned_by_user(sk))
 | 
						if (!sock_owned_by_user(sk))
 | 
				
			||||||
		udpv6_queue_rcv_skb(sk, skb);
 | 
							udpv6_queue_rcv_skb(sk, skb);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -3721,9 +3721,6 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
 | 
				
			||||||
	SCTP_DBG_OBJCNT_INC(sock);
 | 
						SCTP_DBG_OBJCNT_INC(sock);
 | 
				
			||||||
	percpu_counter_inc(&sctp_sockets_allocated);
 | 
						percpu_counter_inc(&sctp_sockets_allocated);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* Set socket backlog limit. */
 | 
					 | 
				
			||||||
	sk->sk_backlog.limit = sysctl_sctp_rmem[1];
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	local_bh_disable();
 | 
						local_bh_disable();
 | 
				
			||||||
	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 | 
						sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 | 
				
			||||||
	local_bh_enable();
 | 
						local_bh_enable();
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue