mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	net: avoid RCU for NOCACHE dst
There is no point using RCU for dst we allocate for a very short time (used once). Change dst_release() to take DST_NOCACHE into account, but also change skb_dst_set_noref() to force a refcount increment for such dst. This is a _huge_ gain, because we dont waste memory to store xx thousand of dsts. Instead of queueing them to RCU, we can free them instantly. CPU caches can stay hot, re-using same memory blocks to hold temporary dsts. Note : remove unneeded smp_mb__before_atomic_dec(); in dst_release(), since atomic_dec_return() implies a full memory barrier. Stress test, 160.000.000 udp frames sent, IP route cache disabled (DDOS). Before: real 0m38.091s user 0m13.189s sys 7m53.018s After: real 0m29.946s user 0m12.157s sys 7m40.605s For reference, if IP route cache was enabled : real 0m32.030s user 0m10.521s sys 8m15.243s Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									e6484930d7
								
							
						
					
					
						commit
						27b75c95f1
					
				
					 3 changed files with 33 additions and 19 deletions
				
			
		| 
						 | 
					@ -460,19 +460,7 @@ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst)
 | 
				
			||||||
	skb->_skb_refdst = (unsigned long)dst;
 | 
						skb->_skb_refdst = (unsigned long)dst;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/**
 | 
					extern void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst);
 | 
				
			||||||
 * skb_dst_set_noref - sets skb dst, without a reference
 | 
					 | 
				
			||||||
 * @skb: buffer
 | 
					 | 
				
			||||||
 * @dst: dst entry
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * Sets skb dst, assuming a reference was not taken on dst
 | 
					 | 
				
			||||||
 * skb_dst_drop() should not dst_release() this dst
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
 | 
					 | 
				
			||||||
	skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
/**
 | 
					/**
 | 
				
			||||||
 * skb_dst_is_noref - Test if skb dst isnt refcounted
 | 
					 * skb_dst_is_noref - Test if skb dst isnt refcounted
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -271,13 +271,40 @@ void dst_release(struct dst_entry *dst)
 | 
				
			||||||
	if (dst) {
 | 
						if (dst) {
 | 
				
			||||||
		int newrefcnt;
 | 
							int newrefcnt;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		smp_mb__before_atomic_dec();
 | 
					 | 
				
			||||||
		newrefcnt = atomic_dec_return(&dst->__refcnt);
 | 
							newrefcnt = atomic_dec_return(&dst->__refcnt);
 | 
				
			||||||
		WARN_ON(newrefcnt < 0);
 | 
							WARN_ON(newrefcnt < 0);
 | 
				
			||||||
 | 
							if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt) {
 | 
				
			||||||
 | 
								dst = dst_destroy(dst);
 | 
				
			||||||
 | 
								if (dst)
 | 
				
			||||||
 | 
									__dst_free(dst);
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
EXPORT_SYMBOL(dst_release);
 | 
					EXPORT_SYMBOL(dst_release);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/**
 | 
				
			||||||
 | 
					 * skb_dst_set_noref - sets skb dst, without a reference
 | 
				
			||||||
 | 
					 * @skb: buffer
 | 
				
			||||||
 | 
					 * @dst: dst entry
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * Sets skb dst, assuming a reference was not taken on dst
 | 
				
			||||||
 | 
					 * skb_dst_drop() should not dst_release() this dst
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
 | 
				
			||||||
 | 
						/* If dst not in cache, we must take a reference, because
 | 
				
			||||||
 | 
						 * dst_release() will destroy dst as soon as its refcount becomes zero
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						if (unlikely(dst->flags & DST_NOCACHE)) {
 | 
				
			||||||
 | 
							dst_hold(dst);
 | 
				
			||||||
 | 
							skb_dst_set(skb, dst);
 | 
				
			||||||
 | 
						} else {
 | 
				
			||||||
 | 
							skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					EXPORT_SYMBOL(skb_dst_set_noref);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* Dirty hack. We did it in 2.2 (in __dst_free),
 | 
					/* Dirty hack. We did it in 2.2 (in __dst_free),
 | 
				
			||||||
 * we have _very_ good reasons not to repeat
 | 
					 * we have _very_ good reasons not to repeat
 | 
				
			||||||
 * this mistake in 2.3, but we have no choice
 | 
					 * this mistake in 2.3, but we have no choice
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1105,9 +1105,9 @@ static int rt_intern_hash(unsigned hash, struct rtable *rt,
 | 
				
			||||||
		 * Note that we do rt_free on this new route entry, so that
 | 
							 * Note that we do rt_free on this new route entry, so that
 | 
				
			||||||
		 * once its refcount hits zero, we are still able to reap it
 | 
							 * once its refcount hits zero, we are still able to reap it
 | 
				
			||||||
		 * (Thanks Alexey)
 | 
							 * (Thanks Alexey)
 | 
				
			||||||
		 * Note also the rt_free uses call_rcu.  We don't actually
 | 
							 * Note: To avoid expensive rcu stuff for this uncached dst,
 | 
				
			||||||
		 * need rcu protection here, this is just our path to get
 | 
							 * we set DST_NOCACHE so that dst_release() can free dst without
 | 
				
			||||||
		 * on the route gc list.
 | 
							 * waiting a grace period.
 | 
				
			||||||
		 */
 | 
							 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		rt->dst.flags |= DST_NOCACHE;
 | 
							rt->dst.flags |= DST_NOCACHE;
 | 
				
			||||||
| 
						 | 
					@ -1117,12 +1117,11 @@ static int rt_intern_hash(unsigned hash, struct rtable *rt,
 | 
				
			||||||
				if (net_ratelimit())
 | 
									if (net_ratelimit())
 | 
				
			||||||
					printk(KERN_WARNING
 | 
										printk(KERN_WARNING
 | 
				
			||||||
					    "Neighbour table failure & not caching routes.\n");
 | 
										    "Neighbour table failure & not caching routes.\n");
 | 
				
			||||||
				rt_drop(rt);
 | 
									ip_rt_put(rt);
 | 
				
			||||||
				return err;
 | 
									return err;
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		rt_free(rt);
 | 
					 | 
				
			||||||
		goto skip_hashing;
 | 
							goto skip_hashing;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue