forked from mirrors/linux
		
	netem: fix skb_orphan_partial()
I should have known that lowering skb->truesize was dangerous :/
In case packets are not leaving the host via a standard Ethernet device,
but looped back to local sockets, bad things can happen, as reported
by Michael Madsen ( https://bugzilla.kernel.org/show_bug.cgi?id=195713 )
So instead of tweaking skb->truesize, lets change skb->destructor
and keep a reference on the owner socket via its sk_refcnt.
Fixes: f2f872f927 ("netem: Introduce skb_orphan_partial() helper")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Michael Madsen <mkm@nabto.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
			
			
This commit is contained in:
		
							parent
							
								
									4e3c60ed2f
								
							
						
					
					
						commit
						f6ba8d33cf
					
				
					 1 changed files with 8 additions and 12 deletions
				
			
		|  | @ -1803,28 +1803,24 @@ EXPORT_SYMBOL(skb_set_owner_w); | |||
|  * delay queue. We want to allow the owner socket to send more | ||||
|  * packets, as if they were already TX completed by a typical driver. | ||||
|  * But we also want to keep skb->sk set because some packet schedulers | ||||
|  * rely on it (sch_fq for example). So we set skb->truesize to a small | ||||
|  * amount (1) and decrease sk_wmem_alloc accordingly. | ||||
|  * rely on it (sch_fq for example). | ||||
|  */ | ||||
| void skb_orphan_partial(struct sk_buff *skb) | ||||
| { | ||||
| 	/* If this skb is a TCP pure ACK or already went here,
 | ||||
| 	 * we have nothing to do. 2 is already a very small truesize. | ||||
| 	 */ | ||||
| 	if (skb->truesize <= 2) | ||||
| 	if (skb_is_tcp_pure_ack(skb)) | ||||
| 		return; | ||||
| 
 | ||||
| 	/* TCP stack sets skb->ooo_okay based on sk_wmem_alloc,
 | ||||
| 	 * so we do not completely orphan skb, but transfert all | ||||
| 	 * accounted bytes but one, to avoid unexpected reorders. | ||||
| 	 */ | ||||
| 	if (skb->destructor == sock_wfree | ||||
| #ifdef CONFIG_INET | ||||
| 	    || skb->destructor == tcp_wfree | ||||
| #endif | ||||
| 		) { | ||||
| 		atomic_sub(skb->truesize - 1, &skb->sk->sk_wmem_alloc); | ||||
| 		skb->truesize = 1; | ||||
| 		struct sock *sk = skb->sk; | ||||
| 
 | ||||
| 		if (atomic_inc_not_zero(&sk->sk_refcnt)) { | ||||
| 			atomic_sub(skb->truesize, &sk->sk_wmem_alloc); | ||||
| 			skb->destructor = sock_efree; | ||||
| 		} | ||||
| 	} else { | ||||
| 		skb_orphan(skb); | ||||
| 	} | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Eric Dumazet
						Eric Dumazet