forked from mirrors/linux
		
	net: rfs: hash function change
RFS is using two kinds of hash tables. First one is controlled by /proc/sys/net/core/rps_sock_flow_entries = 2^N and using the N low order bits of the l4 hash is good enough. Then each RX queue has its own hash table, controlled by /sys/class/net/eth1/queues/rx-$q/rps_flow_cnt = 2^X Current hash function, using the X low order bits is suboptimal, because RSS is usually using Func(hash) = (hash % power_of_two); For example, with 32 RX queues, 6 low order bits have no entropy for a given queue. Switch this hash function to hash_32(hash, log) to increase chances to use all possible slots and reduce collisions. Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Tom Herbert <tom@herbertland.com> Reviewed-by: Simon Horman <horms@kernel.org> Link: https://patch.msgid.link/20250321171309.634100-1-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
		
							parent
							
								
									1952e19c02
								
							
						
					
					
						commit
						f3483c8e1d
					
				
					 3 changed files with 12 additions and 7 deletions
				
			
		|  | @ -39,7 +39,7 @@ struct rps_dev_flow { | ||||||
|  * The rps_dev_flow_table structure contains a table of flow mappings. |  * The rps_dev_flow_table structure contains a table of flow mappings. | ||||||
|  */ |  */ | ||||||
| struct rps_dev_flow_table { | struct rps_dev_flow_table { | ||||||
| 	unsigned int		mask; | 	u8			log; | ||||||
| 	struct rcu_head		rcu; | 	struct rcu_head		rcu; | ||||||
| 	struct rps_dev_flow	flows[]; | 	struct rps_dev_flow	flows[]; | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | @ -4751,6 +4751,11 @@ EXPORT_SYMBOL(rps_needed); | ||||||
| struct static_key_false rfs_needed __read_mostly; | struct static_key_false rfs_needed __read_mostly; | ||||||
| EXPORT_SYMBOL(rfs_needed); | EXPORT_SYMBOL(rfs_needed); | ||||||
| 
 | 
 | ||||||
|  | static u32 rfs_slot(u32 hash, const struct rps_dev_flow_table *flow_table) | ||||||
|  | { | ||||||
|  | 	return hash_32(hash, flow_table->log); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static struct rps_dev_flow * | static struct rps_dev_flow * | ||||||
| set_rps_cpu(struct net_device *dev, struct sk_buff *skb, | set_rps_cpu(struct net_device *dev, struct sk_buff *skb, | ||||||
| 	    struct rps_dev_flow *rflow, u16 next_cpu) | 	    struct rps_dev_flow *rflow, u16 next_cpu) | ||||||
|  | @ -4777,7 +4782,7 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb, | ||||||
| 		flow_table = rcu_dereference(rxqueue->rps_flow_table); | 		flow_table = rcu_dereference(rxqueue->rps_flow_table); | ||||||
| 		if (!flow_table) | 		if (!flow_table) | ||||||
| 			goto out; | 			goto out; | ||||||
| 		flow_id = skb_get_hash(skb) & flow_table->mask; | 		flow_id = rfs_slot(skb_get_hash(skb), flow_table); | ||||||
| 		rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb, | 		rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb, | ||||||
| 							rxq_index, flow_id); | 							rxq_index, flow_id); | ||||||
| 		if (rc < 0) | 		if (rc < 0) | ||||||
|  | @ -4856,7 +4861,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, | ||||||
| 		/* OK, now we know there is a match,
 | 		/* OK, now we know there is a match,
 | ||||||
| 		 * we can look at the local (per receive queue) flow table | 		 * we can look at the local (per receive queue) flow table | ||||||
| 		 */ | 		 */ | ||||||
| 		rflow = &flow_table->flows[hash & flow_table->mask]; | 		rflow = &flow_table->flows[rfs_slot(hash, flow_table)]; | ||||||
| 		tcpu = rflow->cpu; | 		tcpu = rflow->cpu; | ||||||
| 
 | 
 | ||||||
| 		/*
 | 		/*
 | ||||||
|  | @ -4923,13 +4928,13 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, | ||||||
| 
 | 
 | ||||||
| 	rcu_read_lock(); | 	rcu_read_lock(); | ||||||
| 	flow_table = rcu_dereference(rxqueue->rps_flow_table); | 	flow_table = rcu_dereference(rxqueue->rps_flow_table); | ||||||
| 	if (flow_table && flow_id <= flow_table->mask) { | 	if (flow_table && flow_id < (1UL << flow_table->log)) { | ||||||
| 		rflow = &flow_table->flows[flow_id]; | 		rflow = &flow_table->flows[flow_id]; | ||||||
| 		cpu = READ_ONCE(rflow->cpu); | 		cpu = READ_ONCE(rflow->cpu); | ||||||
| 		if (READ_ONCE(rflow->filter) == filter_id && cpu < nr_cpu_ids && | 		if (READ_ONCE(rflow->filter) == filter_id && cpu < nr_cpu_ids && | ||||||
| 		    ((int)(READ_ONCE(per_cpu(softnet_data, cpu).input_queue_head) - | 		    ((int)(READ_ONCE(per_cpu(softnet_data, cpu).input_queue_head) - | ||||||
| 			   READ_ONCE(rflow->last_qtail)) < | 			   READ_ONCE(rflow->last_qtail)) < | ||||||
| 		     (int)(10 * flow_table->mask))) | 		     (int)(10 << flow_table->log))) | ||||||
| 			expire = false; | 			expire = false; | ||||||
| 	} | 	} | ||||||
| 	rcu_read_unlock(); | 	rcu_read_unlock(); | ||||||
|  |  | ||||||
|  | @ -1056,7 +1056,7 @@ static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, | ||||||
| 	rcu_read_lock(); | 	rcu_read_lock(); | ||||||
| 	flow_table = rcu_dereference(queue->rps_flow_table); | 	flow_table = rcu_dereference(queue->rps_flow_table); | ||||||
| 	if (flow_table) | 	if (flow_table) | ||||||
| 		val = (unsigned long)flow_table->mask + 1; | 		val = 1UL << flow_table->log; | ||||||
| 	rcu_read_unlock(); | 	rcu_read_unlock(); | ||||||
| 
 | 
 | ||||||
| 	return sysfs_emit(buf, "%lu\n", val); | 	return sysfs_emit(buf, "%lu\n", val); | ||||||
|  | @ -1109,7 +1109,7 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, | ||||||
| 		if (!table) | 		if (!table) | ||||||
| 			return -ENOMEM; | 			return -ENOMEM; | ||||||
| 
 | 
 | ||||||
| 		table->mask = mask; | 		table->log = ilog2(mask) + 1; | ||||||
| 		for (count = 0; count <= mask; count++) | 		for (count = 0; count <= mask; count++) | ||||||
| 			table->flows[count].cpu = RPS_NO_CPU; | 			table->flows[count].cpu = RPS_NO_CPU; | ||||||
| 	} else { | 	} else { | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Eric Dumazet
						Eric Dumazet