mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	nft_set_pipapo: Introduce AVX2-based lookup implementation
If the AVX2 set is available, we can exploit the repetitive
characteristic of this algorithm to provide a fast, vectorised
version by using 256-bit wide AVX2 operations for bucket loads and
bitwise intersections.
In most cases, this implementation consistently outperforms rbtree
set instances despite the fact they are configured to use a given,
single, ranged data type out of the ones used for performance
measurements by the nft_concat_range.sh kselftest.
That script, injecting packets directly on the ingoing device path
with pktgen, reports, averaged over five runs on a single AMD Epyc
7402 thread (3.35GHz, 768 KiB L1D$, 12 MiB L2$), the figures below.
CONFIG_RETPOLINE was not set here.
Note that this is not a fair comparison over hash and rbtree set
types: non-ranged entries (used to have a reference for hash types)
would be matched faster than this, and matching on a single field
only (which is the case for rbtree) is also significantly faster.
However, it's not possible at the moment to choose this set type
for non-ranged entries, and the current implementation also needs
a few minor adjustments in order to match on less than two fields.
 ---------------.-----------------------------------.------------.
 AMD Epyc 7402  |          baselines, Mpps          | this patch |
  1 thread      |___________________________________|____________|
  3.35GHz       |        |        |        |        |            |
  768KiB L1D$   | netdev |  hash  | rbtree |        |            |
 ---------------|  hook  |   no   | single |        |   pipapo   |
 type   entries |  drop  | ranges | field  | pipapo |    AVX2    |
 ---------------|--------|--------|--------|--------|------------|
 net,port       |        |        |        |        |            |
          1000  |   19.0 |   10.4 |    3.8 |    4.0 | 7.5   +87% |
 ---------------|--------|--------|--------|--------|------------|
 port,net       |        |        |        |        |            |
           100  |   18.8 |   10.3 |    5.8 |    6.3 | 8.1   +29% |
 ---------------|--------|--------|--------|--------|------------|
 net6,port      |        |        |        |        |            |
          1000  |   16.4 |    7.6 |    1.8 |    2.1 | 4.8  +128% |
 ---------------|--------|--------|--------|--------|------------|
 port,proto     |        |        |        |        |            |
         30000  |   19.6 |   11.6 |    3.9 |    0.5 | 2.6  +420% |
 ---------------|--------|--------|--------|--------|------------|
 net6,port,mac  |        |        |        |        |            |
            10  |   16.5 |    5.4 |    4.3 |    3.4 | 4.7   +38% |
 ---------------|--------|--------|--------|--------|------------|
 net6,port,mac, |        |        |        |        |            |
 proto    1000  |   16.5 |    5.7 |    1.9 |    1.4 | 3.6   +26% |
 ---------------|--------|--------|--------|--------|------------|
 net,mac        |        |        |        |        |            |
          1000  |   19.0 |    8.4 |    3.9 |    2.5 | 6.4  +156% |
 ---------------'--------'--------'--------'--------'------------'
A similar strategy could be easily reused to implement specialised
versions for other SIMD sets, and I plan to post at least a NEON
version at a later time.
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
			
			
This commit is contained in:
		
							parent
							
								
									8683f4b995
								
							
						
					
					
						commit
						7400b06396
					
				
					 6 changed files with 1270 additions and 0 deletions
				
			
		| 
						 | 
				
			
			@ -75,6 +75,7 @@ extern const struct nft_set_type nft_set_hash_fast_type;
 | 
			
		|||
extern const struct nft_set_type nft_set_rbtree_type;
 | 
			
		||||
extern const struct nft_set_type nft_set_bitmap_type;
 | 
			
		||||
extern const struct nft_set_type nft_set_pipapo_type;
 | 
			
		||||
extern const struct nft_set_type nft_set_pipapo_avx2_type;
 | 
			
		||||
 | 
			
		||||
struct nft_expr;
 | 
			
		||||
struct nft_regs;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -82,6 +82,12 @@ nf_tables-objs := nf_tables_core.o nf_tables_api.o nft_chain_filter.o \
 | 
			
		|||
		  nft_set_hash.o nft_set_bitmap.o nft_set_rbtree.o \
 | 
			
		||||
		  nft_set_pipapo.o
 | 
			
		||||
 | 
			
		||||
ifdef CONFIG_X86_64
 | 
			
		||||
ifneq (,$(findstring -DCONFIG_AS_AVX2=1,$(KBUILD_CFLAGS)))
 | 
			
		||||
nf_tables-objs += nft_set_pipapo_avx2.o
 | 
			
		||||
endif
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
obj-$(CONFIG_NF_TABLES)		+= nf_tables.o
 | 
			
		||||
obj-$(CONFIG_NFT_COMPAT)	+= nft_compat.o
 | 
			
		||||
obj-$(CONFIG_NFT_CONNLIMIT)	+= nft_connlimit.o
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -3272,6 +3272,9 @@ static const struct nft_set_type *nft_set_types[] = {
 | 
			
		|||
	&nft_set_rhash_type,
 | 
			
		||||
	&nft_set_bitmap_type,
 | 
			
		||||
	&nft_set_rbtree_type,
 | 
			
		||||
#if defined(CONFIG_X86_64) && defined(CONFIG_AS_AVX2)
 | 
			
		||||
	&nft_set_pipapo_avx2_type,
 | 
			
		||||
#endif
 | 
			
		||||
	&nft_set_pipapo_type,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -339,6 +339,7 @@
 | 
			
		|||
#include <linux/bitmap.h>
 | 
			
		||||
#include <linux/bitops.h>
 | 
			
		||||
 | 
			
		||||
#include "nft_set_pipapo_avx2.h"
 | 
			
		||||
#include "nft_set_pipapo.h"
 | 
			
		||||
 | 
			
		||||
/* Current working bitmap index, toggled between field matches */
 | 
			
		||||
| 
						 | 
				
			
			@ -2174,3 +2175,26 @@ const struct nft_set_type nft_set_pipapo_type = {
 | 
			
		|||
		.elemsize	= offsetof(struct nft_pipapo_elem, ext),
 | 
			
		||||
	},
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#if defined(CONFIG_X86_64) && defined(CONFIG_AS_AVX2)
 | 
			
		||||
const struct nft_set_type nft_set_pipapo_avx2_type = {
 | 
			
		||||
	.features	= NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT |
 | 
			
		||||
			  NFT_SET_TIMEOUT,
 | 
			
		||||
	.ops		= {
 | 
			
		||||
		.lookup		= nft_pipapo_avx2_lookup,
 | 
			
		||||
		.insert		= nft_pipapo_insert,
 | 
			
		||||
		.activate	= nft_pipapo_activate,
 | 
			
		||||
		.deactivate	= nft_pipapo_deactivate,
 | 
			
		||||
		.flush		= nft_pipapo_flush,
 | 
			
		||||
		.remove		= nft_pipapo_remove,
 | 
			
		||||
		.walk		= nft_pipapo_walk,
 | 
			
		||||
		.get		= nft_pipapo_get,
 | 
			
		||||
		.privsize	= nft_pipapo_privsize,
 | 
			
		||||
		.estimate	= nft_pipapo_avx2_estimate,
 | 
			
		||||
		.init		= nft_pipapo_init,
 | 
			
		||||
		.destroy	= nft_pipapo_destroy,
 | 
			
		||||
		.gc_init	= nft_pipapo_gc_init,
 | 
			
		||||
		.elemsize	= offsetof(struct nft_pipapo_elem, ext),
 | 
			
		||||
	},
 | 
			
		||||
};
 | 
			
		||||
#endif
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										1222
									
								
								net/netfilter/nft_set_pipapo_avx2.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1222
									
								
								net/netfilter/nft_set_pipapo_avx2.c
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
							
								
								
									
										14
									
								
								net/netfilter/nft_set_pipapo_avx2.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								net/netfilter/nft_set_pipapo_avx2.h
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,14 @@
 | 
			
		|||
/* SPDX-License-Identifier: GPL-2.0-only */
 | 
			
		||||
#ifndef _NFT_SET_PIPAPO_AVX2_H
 | 
			
		||||
 | 
			
		||||
#ifdef CONFIG_AS_AVX2
 | 
			
		||||
#include <asm/fpu/xstate.h>
 | 
			
		||||
#define NFT_PIPAPO_ALIGN	(XSAVE_YMM_SIZE / BITS_PER_BYTE)
 | 
			
		||||
 | 
			
		||||
bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
 | 
			
		||||
			    const u32 *key, const struct nft_set_ext **ext);
 | 
			
		||||
bool nft_pipapo_avx2_estimate(const struct nft_set_desc *desc, u32 features,
 | 
			
		||||
			      struct nft_set_estimate *est);
 | 
			
		||||
#endif /* CONFIG_AS_AVX2 */
 | 
			
		||||
 | 
			
		||||
#endif /* _NFT_SET_PIPAPO_AVX2_H */
 | 
			
		||||
		Loading…
	
		Reference in a new issue