forked from mirrors/linux
		
	mptcp: properly account bulk freed memory
After commit879526030c("mptcp: protect the rx path with the msk socket spinlock") the rmem currently used by a given msk is really sk_rmem_alloc - rmem_released. The safety check in mptcp_data_ready() does not take the above in due account, as a result legit incoming data is kept in subflow receive queue with no reason, delaying or blocking MPTCP-level ack generation. This change addresses the issue introducing a new helper to fetch the rmem memory and using it as needed. Additionally add a MIB counter for the exceptional event described above - the peer is misbehaving. Finally, introduce the required annotation when rmem_released is updated. Fixes:879526030c("mptcp: protect the rx path with the msk socket spinlock") Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/211 Signed-off-by: Paolo Abeni <pabeni@redhat.com> Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									a7da441621
								
							
						
					
					
						commit
						ce599c5163
					
				
					 4 changed files with 18 additions and 6 deletions
				
			
		|  | @ -44,6 +44,7 @@ static const struct snmp_mib mptcp_snmp_list[] = { | ||||||
| 	SNMP_MIB_ITEM("RmSubflow", MPTCP_MIB_RMSUBFLOW), | 	SNMP_MIB_ITEM("RmSubflow", MPTCP_MIB_RMSUBFLOW), | ||||||
| 	SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX), | 	SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX), | ||||||
| 	SNMP_MIB_ITEM("MPPrioRx", MPTCP_MIB_MPPRIORX), | 	SNMP_MIB_ITEM("MPPrioRx", MPTCP_MIB_MPPRIORX), | ||||||
|  | 	SNMP_MIB_ITEM("RcvPruned", MPTCP_MIB_RCVPRUNED), | ||||||
| 	SNMP_MIB_SENTINEL | 	SNMP_MIB_SENTINEL | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -37,6 +37,7 @@ enum linux_mptcp_mib_field { | ||||||
| 	MPTCP_MIB_RMSUBFLOW,		/* Remove a subflow */ | 	MPTCP_MIB_RMSUBFLOW,		/* Remove a subflow */ | ||||||
| 	MPTCP_MIB_MPPRIOTX,		/* Transmit a MP_PRIO */ | 	MPTCP_MIB_MPPRIOTX,		/* Transmit a MP_PRIO */ | ||||||
| 	MPTCP_MIB_MPPRIORX,		/* Received a MP_PRIO */ | 	MPTCP_MIB_MPPRIORX,		/* Received a MP_PRIO */ | ||||||
|  | 	MPTCP_MIB_RCVPRUNED,		/* Incoming packet dropped due to memory limit */ | ||||||
| 	__MPTCP_MIB_MAX | 	__MPTCP_MIB_MAX | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -474,7 +474,7 @@ static void mptcp_cleanup_rbuf(struct mptcp_sock *msk) | ||||||
| 	bool cleanup, rx_empty; | 	bool cleanup, rx_empty; | ||||||
| 
 | 
 | ||||||
| 	cleanup = (space > 0) && (space >= (old_space << 1)); | 	cleanup = (space > 0) && (space >= (old_space << 1)); | ||||||
| 	rx_empty = !atomic_read(&sk->sk_rmem_alloc); | 	rx_empty = !__mptcp_rmem(sk); | ||||||
| 
 | 
 | ||||||
| 	mptcp_for_each_subflow(msk, subflow) { | 	mptcp_for_each_subflow(msk, subflow) { | ||||||
| 		struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | 		struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | ||||||
|  | @ -720,8 +720,10 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk) | ||||||
| 		sk_rbuf = ssk_rbuf; | 		sk_rbuf = ssk_rbuf; | ||||||
| 
 | 
 | ||||||
| 	/* over limit? can't append more skbs to msk, Also, no need to wake-up*/ | 	/* over limit? can't append more skbs to msk, Also, no need to wake-up*/ | ||||||
| 	if (atomic_read(&sk->sk_rmem_alloc) > sk_rbuf) | 	if (__mptcp_rmem(sk) > sk_rbuf) { | ||||||
|  | 		MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RCVPRUNED); | ||||||
| 		return; | 		return; | ||||||
|  | 	} | ||||||
| 
 | 
 | ||||||
| 	/* Wake-up the reader only for in-sequence data */ | 	/* Wake-up the reader only for in-sequence data */ | ||||||
| 	mptcp_data_lock(sk); | 	mptcp_data_lock(sk); | ||||||
|  | @ -1754,7 +1756,7 @@ static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk, | ||||||
| 		if (!(flags & MSG_PEEK)) { | 		if (!(flags & MSG_PEEK)) { | ||||||
| 			/* we will bulk release the skb memory later */ | 			/* we will bulk release the skb memory later */ | ||||||
| 			skb->destructor = NULL; | 			skb->destructor = NULL; | ||||||
| 			msk->rmem_released += skb->truesize; | 			WRITE_ONCE(msk->rmem_released, msk->rmem_released + skb->truesize); | ||||||
| 			__skb_unlink(skb, &msk->receive_queue); | 			__skb_unlink(skb, &msk->receive_queue); | ||||||
| 			__kfree_skb(skb); | 			__kfree_skb(skb); | ||||||
| 		} | 		} | ||||||
|  | @ -1873,7 +1875,7 @@ static void __mptcp_update_rmem(struct sock *sk) | ||||||
| 
 | 
 | ||||||
| 	atomic_sub(msk->rmem_released, &sk->sk_rmem_alloc); | 	atomic_sub(msk->rmem_released, &sk->sk_rmem_alloc); | ||||||
| 	sk_mem_uncharge(sk, msk->rmem_released); | 	sk_mem_uncharge(sk, msk->rmem_released); | ||||||
| 	msk->rmem_released = 0; | 	WRITE_ONCE(msk->rmem_released, 0); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void __mptcp_splice_receive_queue(struct sock *sk) | static void __mptcp_splice_receive_queue(struct sock *sk) | ||||||
|  | @ -2380,7 +2382,7 @@ static int __mptcp_init_sock(struct sock *sk) | ||||||
| 	msk->out_of_order_queue = RB_ROOT; | 	msk->out_of_order_queue = RB_ROOT; | ||||||
| 	msk->first_pending = NULL; | 	msk->first_pending = NULL; | ||||||
| 	msk->wmem_reserved = 0; | 	msk->wmem_reserved = 0; | ||||||
| 	msk->rmem_released = 0; | 	WRITE_ONCE(msk->rmem_released, 0); | ||||||
| 	msk->tx_pending_data = 0; | 	msk->tx_pending_data = 0; | ||||||
| 
 | 
 | ||||||
| 	msk->first = NULL; | 	msk->first = NULL; | ||||||
|  |  | ||||||
|  | @ -296,9 +296,17 @@ static inline struct mptcp_sock *mptcp_sk(const struct sock *sk) | ||||||
| 	return (struct mptcp_sock *)sk; | 	return (struct mptcp_sock *)sk; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | /* the msk socket don't use the backlog, also account for the bulk
 | ||||||
|  |  * free memory | ||||||
|  |  */ | ||||||
|  | static inline int __mptcp_rmem(const struct sock *sk) | ||||||
|  | { | ||||||
|  | 	return atomic_read(&sk->sk_rmem_alloc) - READ_ONCE(mptcp_sk(sk)->rmem_released); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static inline int __mptcp_space(const struct sock *sk) | static inline int __mptcp_space(const struct sock *sk) | ||||||
| { | { | ||||||
| 	return tcp_space(sk) + READ_ONCE(mptcp_sk(sk)->rmem_released); | 	return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - __mptcp_rmem(sk)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk) | static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk) | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Paolo Abeni
						Paolo Abeni