mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	tcp_bpf: Add sk_rmem_alloc related logic for tcp_bpf ingress redirection
When we do sk_psock_verdict_apply->sk_psock_skb_ingress, an sk_msg will
be created out of the skb, and the rmem accounting of the sk_msg will be
handled by the skb.
For skmsgs in __SK_REDIRECT case of tcp_bpf_send_verdict, when redirecting
to the ingress of a socket, although we sk_rmem_schedule and add sk_msg to
the ingress_msg of sk_redir, we do not update sk_rmem_alloc. As a result,
except for the global memory limit, the rmem of sk_redir is nearly
unlimited. Thus, add sk_rmem_alloc related logic to limit the recv buffer.
Since the function sk_msg_recvmsg and __sk_psock_purge_ingress_msg are
used in these two paths. We use "msg->skb" to test whether the sk_msg is
skb backed up. If it's not, we shall do the memory accounting explicitly.
Fixes: 604326b41a ("bpf, sockmap: convert to generic sk_msg interface")
Signed-off-by: Zijian Zhang <zijianzhang@bytedance.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20241210012039.1669389-3-zijianzhang@bytedance.com
			
			
This commit is contained in:
		
							parent
							
								
									54f89b3178
								
							
						
					
					
						commit
						d888b7af7c
					
				
					 3 changed files with 16 additions and 5 deletions
				
			
		| 
						 | 
					@ -317,17 +317,22 @@ static inline void sock_drop(struct sock *sk, struct sk_buff *skb)
 | 
				
			||||||
	kfree_skb(skb);
 | 
						kfree_skb(skb);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline void sk_psock_queue_msg(struct sk_psock *psock,
 | 
					static inline bool sk_psock_queue_msg(struct sk_psock *psock,
 | 
				
			||||||
				      struct sk_msg *msg)
 | 
									      struct sk_msg *msg)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
						bool ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	spin_lock_bh(&psock->ingress_lock);
 | 
						spin_lock_bh(&psock->ingress_lock);
 | 
				
			||||||
	if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
 | 
						if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
 | 
				
			||||||
		list_add_tail(&msg->list, &psock->ingress_msg);
 | 
							list_add_tail(&msg->list, &psock->ingress_msg);
 | 
				
			||||||
	else {
 | 
							ret = true;
 | 
				
			||||||
 | 
						} else {
 | 
				
			||||||
		sk_msg_free(psock->sk, msg);
 | 
							sk_msg_free(psock->sk, msg);
 | 
				
			||||||
		kfree(msg);
 | 
							kfree(msg);
 | 
				
			||||||
 | 
							ret = false;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	spin_unlock_bh(&psock->ingress_lock);
 | 
						spin_unlock_bh(&psock->ingress_lock);
 | 
				
			||||||
 | 
						return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline struct sk_msg *sk_psock_dequeue_msg(struct sk_psock *psock)
 | 
					static inline struct sk_msg *sk_psock_dequeue_msg(struct sk_psock *psock)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -445,8 +445,10 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
 | 
				
			||||||
			if (likely(!peek)) {
 | 
								if (likely(!peek)) {
 | 
				
			||||||
				sge->offset += copy;
 | 
									sge->offset += copy;
 | 
				
			||||||
				sge->length -= copy;
 | 
									sge->length -= copy;
 | 
				
			||||||
				if (!msg_rx->skb)
 | 
									if (!msg_rx->skb) {
 | 
				
			||||||
					sk_mem_uncharge(sk, copy);
 | 
										sk_mem_uncharge(sk, copy);
 | 
				
			||||||
 | 
										atomic_sub(copy, &sk->sk_rmem_alloc);
 | 
				
			||||||
 | 
									}
 | 
				
			||||||
				msg_rx->sg.size -= copy;
 | 
									msg_rx->sg.size -= copy;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
				if (!sge->length) {
 | 
									if (!sge->length) {
 | 
				
			||||||
| 
						 | 
					@ -772,6 +774,8 @@ static void __sk_psock_purge_ingress_msg(struct sk_psock *psock)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	list_for_each_entry_safe(msg, tmp, &psock->ingress_msg, list) {
 | 
						list_for_each_entry_safe(msg, tmp, &psock->ingress_msg, list) {
 | 
				
			||||||
		list_del(&msg->list);
 | 
							list_del(&msg->list);
 | 
				
			||||||
 | 
							if (!msg->skb)
 | 
				
			||||||
 | 
								atomic_sub(msg->sg.size, &psock->sk->sk_rmem_alloc);
 | 
				
			||||||
		sk_msg_free(psock->sk, msg);
 | 
							sk_msg_free(psock->sk, msg);
 | 
				
			||||||
		kfree(msg);
 | 
							kfree(msg);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -56,6 +56,7 @@ static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		sk_mem_charge(sk, size);
 | 
							sk_mem_charge(sk, size);
 | 
				
			||||||
 | 
							atomic_add(size, &sk->sk_rmem_alloc);
 | 
				
			||||||
		sk_msg_xfer(tmp, msg, i, size);
 | 
							sk_msg_xfer(tmp, msg, i, size);
 | 
				
			||||||
		copied += size;
 | 
							copied += size;
 | 
				
			||||||
		if (sge->length)
 | 
							if (sge->length)
 | 
				
			||||||
| 
						 | 
					@ -74,7 +75,8 @@ static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (!ret) {
 | 
						if (!ret) {
 | 
				
			||||||
		msg->sg.start = i;
 | 
							msg->sg.start = i;
 | 
				
			||||||
		sk_psock_queue_msg(psock, tmp);
 | 
							if (!sk_psock_queue_msg(psock, tmp))
 | 
				
			||||||
 | 
								atomic_sub(copied, &sk->sk_rmem_alloc);
 | 
				
			||||||
		sk_psock_data_ready(sk, psock);
 | 
							sk_psock_data_ready(sk, psock);
 | 
				
			||||||
	} else {
 | 
						} else {
 | 
				
			||||||
		sk_msg_free(sk, tmp);
 | 
							sk_msg_free(sk, tmp);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue