mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	samples/bpf: sample application and documentation for AF_XDP sockets
This is a sample application for AF_XDP sockets. The application
supports three different modes of operation: rxdrop, txonly and l2fwd.
To show-case a simple round-robin load-balancing between a set of
sockets in an xskmap, set the RR_LB compile time define option to 1 in
"xdpsock.h".
v2: The entries variable was calculated twice in {umem,xq}_nb_avail.
Co-authored-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
			
			
This commit is contained in:
		
							parent
							
								
									af75d9e02d
								
							
						
					
					
						commit
						b4b8faa1de
					
				
					 6 changed files with 1317 additions and 0 deletions
				
			
		
							
								
								
									
										297
									
								
								Documentation/networking/af_xdp.rst
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										297
									
								
								Documentation/networking/af_xdp.rst
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,297 @@
 | 
			
		|||
.. SPDX-License-Identifier: GPL-2.0
 | 
			
		||||
 | 
			
		||||
======
 | 
			
		||||
AF_XDP
 | 
			
		||||
======
 | 
			
		||||
 | 
			
		||||
Overview
 | 
			
		||||
========
 | 
			
		||||
 | 
			
		||||
AF_XDP is an address family that is optimized for high performance
 | 
			
		||||
packet processing.
 | 
			
		||||
 | 
			
		||||
This document assumes that the reader is familiar with BPF and XDP. If
 | 
			
		||||
not, the Cilium project has an excellent reference guide at
 | 
			
		||||
http://cilium.readthedocs.io/en/doc-1.0/bpf/.
 | 
			
		||||
 | 
			
		||||
Using the XDP_REDIRECT action from an XDP program, the program can
 | 
			
		||||
redirect ingress frames to other XDP enabled netdevs, using the
 | 
			
		||||
bpf_redirect_map() function. AF_XDP sockets enable the possibility for
 | 
			
		||||
XDP programs to redirect frames to a memory buffer in a user-space
 | 
			
		||||
application.
 | 
			
		||||
 | 
			
		||||
An AF_XDP socket (XSK) is created with the normal socket()
 | 
			
		||||
syscall. Associated with each XSK are two rings: the RX ring and the
 | 
			
		||||
TX ring. A socket can receive packets on the RX ring and it can send
 | 
			
		||||
packets on the TX ring. These rings are registered and sized with the
 | 
			
		||||
setsockopts XDP_RX_RING and XDP_TX_RING, respectively. It is mandatory
 | 
			
		||||
to have at least one of these rings for each socket. An RX or TX
 | 
			
		||||
descriptor ring points to a data buffer in a memory area called a
 | 
			
		||||
UMEM. RX and TX can share the same UMEM so that a packet does not have
 | 
			
		||||
to be copied between RX and TX. Moreover, if a packet needs to be kept
 | 
			
		||||
for a while due to a possible retransmit, the descriptor that points
 | 
			
		||||
to that packet can be changed to point to another and reused right
 | 
			
		||||
away. This again avoids copying data.
 | 
			
		||||
 | 
			
		||||
The UMEM consists of a number of equally size frames and each frame
 | 
			
		||||
has a unique frame id. A descriptor in one of the rings references a
 | 
			
		||||
frame by referencing its frame id. The user space allocates memory for
 | 
			
		||||
this UMEM using whatever means it feels is most appropriate (malloc,
 | 
			
		||||
mmap, huge pages, etc). This memory area is then registered with the
 | 
			
		||||
kernel using the new setsockopt XDP_UMEM_REG. The UMEM also has two
 | 
			
		||||
rings: the FILL ring and the COMPLETION ring. The fill ring is used by
 | 
			
		||||
the application to send down frame ids for the kernel to fill in with
 | 
			
		||||
RX packet data. References to these frames will then appear in the RX
 | 
			
		||||
ring once each packet has been received. The completion ring, on the
 | 
			
		||||
other hand, contains frame ids that the kernel has transmitted
 | 
			
		||||
completely and can now be used again by user space, for either TX or
 | 
			
		||||
RX. Thus, the frame ids appearing in the completion ring are ids that
 | 
			
		||||
were previously transmitted using the TX ring. In summary, the RX and
 | 
			
		||||
FILL rings are used for the RX path and the TX and COMPLETION rings
 | 
			
		||||
are used for the TX path.
 | 
			
		||||
 | 
			
		||||
The socket is then finally bound with a bind() call to a device and a
 | 
			
		||||
specific queue id on that device, and it is not until bind is
 | 
			
		||||
completed that traffic starts to flow.
 | 
			
		||||
 | 
			
		||||
The UMEM can be shared between processes, if desired. If a process
 | 
			
		||||
wants to do this, it simply skips the registration of the UMEM and its
 | 
			
		||||
corresponding two rings, sets the XDP_SHARED_UMEM flag in the bind
 | 
			
		||||
call and submits the XSK of the process it would like to share UMEM
 | 
			
		||||
with as well as its own newly created XSK socket. The new process will
 | 
			
		||||
then receive frame id references in its own RX ring that point to this
 | 
			
		||||
shared UMEM. Note that since the ring structures are single-consumer /
 | 
			
		||||
single-producer (for performance reasons), the new process has to
 | 
			
		||||
create its own socket with associated RX and TX rings, since it cannot
 | 
			
		||||
share this with the other process. This is also the reason that there
 | 
			
		||||
is only one set of FILL and COMPLETION rings per UMEM. It is the
 | 
			
		||||
responsibility of a single process to handle the UMEM.
 | 
			
		||||
 | 
			
		||||
How is then packets distributed from an XDP program to the XSKs? There
 | 
			
		||||
is a BPF map called XSKMAP (or BPF_MAP_TYPE_XSKMAP in full). The
 | 
			
		||||
user-space application can place an XSK at an arbitrary place in this
 | 
			
		||||
map. The XDP program can then redirect a packet to a specific index in
 | 
			
		||||
this map and at this point XDP validates that the XSK in that map was
 | 
			
		||||
indeed bound to that device and ring number. If not, the packet is
 | 
			
		||||
dropped. If the map is empty at that index, the packet is also
 | 
			
		||||
dropped. This also means that it is currently mandatory to have an XDP
 | 
			
		||||
program loaded (and one XSK in the XSKMAP) to be able to get any
 | 
			
		||||
traffic to user space through the XSK.
 | 
			
		||||
 | 
			
		||||
AF_XDP can operate in two different modes: XDP_SKB and XDP_DRV. If the
 | 
			
		||||
driver does not have support for XDP, or XDP_SKB is explicitly chosen
 | 
			
		||||
when loading the XDP program, XDP_SKB mode is employed that uses SKBs
 | 
			
		||||
together with the generic XDP support and copies out the data to user
 | 
			
		||||
space. A fallback mode that works for any network device. On the other
 | 
			
		||||
hand, if the driver has support for XDP, it will be used by the AF_XDP
 | 
			
		||||
code to provide better performance, but there is still a copy of the
 | 
			
		||||
data into user space.
 | 
			
		||||
 | 
			
		||||
Concepts
 | 
			
		||||
========
 | 
			
		||||
 | 
			
		||||
In order to use an AF_XDP socket, a number of associated objects need
 | 
			
		||||
to be setup.
 | 
			
		||||
 | 
			
		||||
Jonathan Corbet has also written an excellent article on LWN,
 | 
			
		||||
"Accelerating networking with AF_XDP". It can be found at
 | 
			
		||||
https://lwn.net/Articles/750845/.
 | 
			
		||||
 | 
			
		||||
UMEM
 | 
			
		||||
----
 | 
			
		||||
 | 
			
		||||
UMEM is a region of virtual contiguous memory, divided into
 | 
			
		||||
equal-sized frames. An UMEM is associated to a netdev and a specific
 | 
			
		||||
queue id of that netdev. It is created and configured (frame size,
 | 
			
		||||
frame headroom, start address and size) by using the XDP_UMEM_REG
 | 
			
		||||
setsockopt system call. A UMEM is bound to a netdev and queue id, via
 | 
			
		||||
the bind() system call.
 | 
			
		||||
 | 
			
		||||
An AF_XDP is socket linked to a single UMEM, but one UMEM can have
 | 
			
		||||
multiple AF_XDP sockets. To share an UMEM created via one socket A,
 | 
			
		||||
the next socket B can do this by setting the XDP_SHARED_UMEM flag in
 | 
			
		||||
struct sockaddr_xdp member sxdp_flags, and passing the file descriptor
 | 
			
		||||
of A to struct sockaddr_xdp member sxdp_shared_umem_fd.
 | 
			
		||||
 | 
			
		||||
The UMEM has two single-producer/single-consumer rings, that are used
 | 
			
		||||
to transfer ownership of UMEM frames between the kernel and the
 | 
			
		||||
user-space application.
 | 
			
		||||
 | 
			
		||||
Rings
 | 
			
		||||
-----
 | 
			
		||||
 | 
			
		||||
There are a four different kind of rings: Fill, Completion, RX and
 | 
			
		||||
TX. All rings are single-producer/single-consumer, so the user-space
 | 
			
		||||
application need explicit synchronization of multiple
 | 
			
		||||
processes/threads are reading/writing to them.
 | 
			
		||||
 | 
			
		||||
The UMEM uses two rings: Fill and Completion. Each socket associated
 | 
			
		||||
with the UMEM must have an RX queue, TX queue or both. Say, that there
 | 
			
		||||
is a setup with four sockets (all doing TX and RX). Then there will be
 | 
			
		||||
one Fill ring, one Completion ring, four TX rings and four RX rings.
 | 
			
		||||
 | 
			
		||||
The rings are head(producer)/tail(consumer) based rings. A producer
 | 
			
		||||
writes the data ring at the index pointed out by struct xdp_ring
 | 
			
		||||
producer member, and increasing the producer index. A consumer reads
 | 
			
		||||
the data ring at the index pointed out by struct xdp_ring consumer
 | 
			
		||||
member, and increasing the consumer index.
 | 
			
		||||
 | 
			
		||||
The rings are configured and created via the _RING setsockopt system
 | 
			
		||||
calls and mmapped to user-space using the appropriate offset to mmap()
 | 
			
		||||
(XDP_PGOFF_RX_RING, XDP_PGOFF_TX_RING, XDP_UMEM_PGOFF_FILL_RING and
 | 
			
		||||
XDP_UMEM_PGOFF_COMPLETION_RING).
 | 
			
		||||
 | 
			
		||||
The size of the rings need to be of size power of two.
 | 
			
		||||
 | 
			
		||||
UMEM Fill Ring
 | 
			
		||||
~~~~~~~~~~~~~~
 | 
			
		||||
 | 
			
		||||
The Fill ring is used to transfer ownership of UMEM frames from
 | 
			
		||||
user-space to kernel-space. The UMEM indicies are passed in the
 | 
			
		||||
ring. As an example, if the UMEM is 64k and each frame is 4k, then the
 | 
			
		||||
UMEM has 16 frames and can pass indicies between 0 and 15.
 | 
			
		||||
 | 
			
		||||
Frames passed to the kernel are used for the ingress path (RX rings).
 | 
			
		||||
 | 
			
		||||
The user application produces UMEM indicies to this ring.
 | 
			
		||||
 | 
			
		||||
UMEM Completetion Ring
 | 
			
		||||
~~~~~~~~~~~~~~~~~~~~~~
 | 
			
		||||
 | 
			
		||||
The Completion Ring is used transfer ownership of UMEM frames from
 | 
			
		||||
kernel-space to user-space. Just like the Fill ring, UMEM indicies are
 | 
			
		||||
used.
 | 
			
		||||
 | 
			
		||||
Frames passed from the kernel to user-space are frames that has been
 | 
			
		||||
sent (TX ring) and can be used by user-space again.
 | 
			
		||||
 | 
			
		||||
The user application consumes UMEM indicies from this ring.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
RX Ring
 | 
			
		||||
~~~~~~~
 | 
			
		||||
 | 
			
		||||
The RX ring is the receiving side of a socket. Each entry in the ring
 | 
			
		||||
is a struct xdp_desc descriptor. The descriptor contains UMEM index
 | 
			
		||||
(idx), the length of the data (len), the offset into the frame
 | 
			
		||||
(offset).
 | 
			
		||||
 | 
			
		||||
If no frames have been passed to kernel via the Fill ring, no
 | 
			
		||||
descriptors will (or can) appear on the RX ring.
 | 
			
		||||
 | 
			
		||||
The user application consumes struct xdp_desc descriptors from this
 | 
			
		||||
ring.
 | 
			
		||||
 | 
			
		||||
TX Ring
 | 
			
		||||
~~~~~~~
 | 
			
		||||
 | 
			
		||||
The TX ring is used to send frames. The struct xdp_desc descriptor is
 | 
			
		||||
filled (index, length and offset) and passed into the ring.
 | 
			
		||||
 | 
			
		||||
To start the transfer a sendmsg() system call is required. This might
 | 
			
		||||
be relaxed in the future.
 | 
			
		||||
 | 
			
		||||
The user application produces struct xdp_desc descriptors to this
 | 
			
		||||
ring.
 | 
			
		||||
 | 
			
		||||
XSKMAP / BPF_MAP_TYPE_XSKMAP
 | 
			
		||||
----------------------------
 | 
			
		||||
 | 
			
		||||
On XDP side there is a BPF map type BPF_MAP_TYPE_XSKMAP (XSKMAP) that
 | 
			
		||||
is used in conjunction with bpf_redirect_map() to pass the ingress
 | 
			
		||||
frame to a socket.
 | 
			
		||||
 | 
			
		||||
The user application inserts the socket into the map, via the bpf()
 | 
			
		||||
system call.
 | 
			
		||||
 | 
			
		||||
Note that if an XDP program tries to redirect to a socket that does
 | 
			
		||||
not match the queue configuration and netdev, the frame will be
 | 
			
		||||
dropped. E.g. an AF_XDP socket is bound to netdev eth0 and
 | 
			
		||||
queue 17. Only the XDP program executing for eth0 and queue 17 will
 | 
			
		||||
successfully pass data to the socket. Please refer to the sample
 | 
			
		||||
application (samples/bpf/) in for an example.
 | 
			
		||||
 | 
			
		||||
Usage
 | 
			
		||||
=====
 | 
			
		||||
 | 
			
		||||
In order to use AF_XDP sockets there are two parts needed. The
 | 
			
		||||
user-space application and the XDP program. For a complete setup and
 | 
			
		||||
usage example, please refer to the sample application. The user-space
 | 
			
		||||
side is xdpsock_user.c and the XDP side xdpsock_kern.c.
 | 
			
		||||
 | 
			
		||||
Naive ring dequeue and enqueue could look like this::
 | 
			
		||||
 | 
			
		||||
    // typedef struct xdp_rxtx_ring RING;
 | 
			
		||||
    // typedef struct xdp_umem_ring RING;
 | 
			
		||||
 | 
			
		||||
    // typedef struct xdp_desc RING_TYPE;
 | 
			
		||||
    // typedef __u32 RING_TYPE;
 | 
			
		||||
 | 
			
		||||
    int dequeue_one(RING *ring, RING_TYPE *item)
 | 
			
		||||
    {
 | 
			
		||||
        __u32 entries = ring->ptrs.producer - ring->ptrs.consumer;
 | 
			
		||||
 | 
			
		||||
        if (entries == 0)
 | 
			
		||||
            return -1;
 | 
			
		||||
 | 
			
		||||
        // read-barrier!
 | 
			
		||||
 | 
			
		||||
        *item = ring->desc[ring->ptrs.consumer & (RING_SIZE - 1)];
 | 
			
		||||
        ring->ptrs.consumer++;
 | 
			
		||||
        return 0;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    int enqueue_one(RING *ring, const RING_TYPE *item)
 | 
			
		||||
    {
 | 
			
		||||
        u32 free_entries = RING_SIZE - (ring->ptrs.producer - ring->ptrs.consumer);
 | 
			
		||||
 | 
			
		||||
        if (free_entries == 0)
 | 
			
		||||
            return -1;
 | 
			
		||||
 | 
			
		||||
        ring->desc[ring->ptrs.producer & (RING_SIZE - 1)] = *item;
 | 
			
		||||
 | 
			
		||||
        // write-barrier!
 | 
			
		||||
 | 
			
		||||
        ring->ptrs.producer++;
 | 
			
		||||
        return 0;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
For a more optimized version, please refer to the sample application.
 | 
			
		||||
 | 
			
		||||
Sample application
 | 
			
		||||
==================
 | 
			
		||||
 | 
			
		||||
There is a xdpsock benchmarking/test application included that
 | 
			
		||||
demonstrates how to use AF_XDP sockets with both private and shared
 | 
			
		||||
UMEMs. Say that you would like your UDP traffic from port 4242 to end
 | 
			
		||||
up in queue 16, that we will enable AF_XDP on. Here, we use ethtool
 | 
			
		||||
for this::
 | 
			
		||||
 | 
			
		||||
      ethtool -N p3p2 rx-flow-hash udp4 fn
 | 
			
		||||
      ethtool -N p3p2 flow-type udp4 src-port 4242 dst-port 4242 \
 | 
			
		||||
          action 16
 | 
			
		||||
 | 
			
		||||
Running the rxdrop benchmark in XDP_DRV mode can then be done
 | 
			
		||||
using::
 | 
			
		||||
 | 
			
		||||
      samples/bpf/xdpsock -i p3p2 -q 16 -r -N
 | 
			
		||||
 | 
			
		||||
For XDP_SKB mode, use the switch "-S" instead of "-N" and all options
 | 
			
		||||
can be displayed with "-h", as usual.
 | 
			
		||||
 | 
			
		||||
Credits
 | 
			
		||||
=======
 | 
			
		||||
 | 
			
		||||
- Björn Töpel (AF_XDP core)
 | 
			
		||||
- Magnus Karlsson (AF_XDP core)
 | 
			
		||||
- Alexander Duyck
 | 
			
		||||
- Alexei Starovoitov
 | 
			
		||||
- Daniel Borkmann
 | 
			
		||||
- Jesper Dangaard Brouer
 | 
			
		||||
- John Fastabend
 | 
			
		||||
- Jonathan Corbet (LWN coverage)
 | 
			
		||||
- Michael S. Tsirkin
 | 
			
		||||
- Qi Z Zhang
 | 
			
		||||
- Willem de Bruijn
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -6,6 +6,7 @@ Contents:
 | 
			
		|||
.. toctree::
 | 
			
		||||
   :maxdepth: 2
 | 
			
		||||
 | 
			
		||||
   af_xdp
 | 
			
		||||
   batman-adv
 | 
			
		||||
   can
 | 
			
		||||
   dpaa2/index
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -45,6 +45,7 @@ hostprogs-y += xdp_rxq_info
 | 
			
		|||
hostprogs-y += syscall_tp
 | 
			
		||||
hostprogs-y += cpustat
 | 
			
		||||
hostprogs-y += xdp_adjust_tail
 | 
			
		||||
hostprogs-y += xdpsock
 | 
			
		||||
 | 
			
		||||
# Libbpf dependencies
 | 
			
		||||
LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o
 | 
			
		||||
| 
						 | 
				
			
			@ -98,6 +99,7 @@ xdp_rxq_info-objs := bpf_load.o $(LIBBPF) xdp_rxq_info_user.o
 | 
			
		|||
syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o
 | 
			
		||||
cpustat-objs := bpf_load.o $(LIBBPF) cpustat_user.o
 | 
			
		||||
xdp_adjust_tail-objs := bpf_load.o $(LIBBPF) xdp_adjust_tail_user.o
 | 
			
		||||
xdpsock-objs := bpf_load.o $(LIBBPF) xdpsock_user.o
 | 
			
		||||
 | 
			
		||||
# Tell kbuild to always build the programs
 | 
			
		||||
always := $(hostprogs-y)
 | 
			
		||||
| 
						 | 
				
			
			@ -151,6 +153,7 @@ always += xdp2skb_meta_kern.o
 | 
			
		|||
always += syscall_tp_kern.o
 | 
			
		||||
always += cpustat_kern.o
 | 
			
		||||
always += xdp_adjust_tail_kern.o
 | 
			
		||||
always += xdpsock_kern.o
 | 
			
		||||
 | 
			
		||||
HOSTCFLAGS += -I$(objtree)/usr/include
 | 
			
		||||
HOSTCFLAGS += -I$(srctree)/tools/lib/
 | 
			
		||||
| 
						 | 
				
			
			@ -197,6 +200,7 @@ HOSTLOADLIBES_xdp_rxq_info += -lelf
 | 
			
		|||
HOSTLOADLIBES_syscall_tp += -lelf
 | 
			
		||||
HOSTLOADLIBES_cpustat += -lelf
 | 
			
		||||
HOSTLOADLIBES_xdp_adjust_tail += -lelf
 | 
			
		||||
HOSTLOADLIBES_xdpsock += -lelf -pthread
 | 
			
		||||
 | 
			
		||||
# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
 | 
			
		||||
#  make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										11
									
								
								samples/bpf/xdpsock.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								samples/bpf/xdpsock.h
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,11 @@
 | 
			
		|||
/* SPDX-License-Identifier: GPL-2.0 */
 | 
			
		||||
#ifndef XDPSOCK_H_
 | 
			
		||||
#define XDPSOCK_H_
 | 
			
		||||
 | 
			
		||||
/* Power-of-2 number of sockets */
 | 
			
		||||
#define MAX_SOCKS 4
 | 
			
		||||
 | 
			
		||||
/* Round-robin receive */
 | 
			
		||||
#define RR_LB 0
 | 
			
		||||
 | 
			
		||||
#endif /* XDPSOCK_H_ */
 | 
			
		||||
							
								
								
									
										56
									
								
								samples/bpf/xdpsock_kern.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										56
									
								
								samples/bpf/xdpsock_kern.c
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,56 @@
 | 
			
		|||
// SPDX-License-Identifier: GPL-2.0
 | 
			
		||||
#define KBUILD_MODNAME "foo"
 | 
			
		||||
#include <uapi/linux/bpf.h>
 | 
			
		||||
#include "bpf_helpers.h"
 | 
			
		||||
 | 
			
		||||
#include "xdpsock.h"
 | 
			
		||||
 | 
			
		||||
struct bpf_map_def SEC("maps") qidconf_map = {
 | 
			
		||||
	.type		= BPF_MAP_TYPE_ARRAY,
 | 
			
		||||
	.key_size	= sizeof(int),
 | 
			
		||||
	.value_size	= sizeof(int),
 | 
			
		||||
	.max_entries	= 1,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct bpf_map_def SEC("maps") xsks_map = {
 | 
			
		||||
	.type = BPF_MAP_TYPE_XSKMAP,
 | 
			
		||||
	.key_size = sizeof(int),
 | 
			
		||||
	.value_size = sizeof(int),
 | 
			
		||||
	.max_entries = 4,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct bpf_map_def SEC("maps") rr_map = {
 | 
			
		||||
	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
 | 
			
		||||
	.key_size = sizeof(int),
 | 
			
		||||
	.value_size = sizeof(unsigned int),
 | 
			
		||||
	.max_entries = 1,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
SEC("xdp_sock")
 | 
			
		||||
int xdp_sock_prog(struct xdp_md *ctx)
 | 
			
		||||
{
 | 
			
		||||
	int *qidconf, key = 0, idx;
 | 
			
		||||
	unsigned int *rr;
 | 
			
		||||
 | 
			
		||||
	qidconf = bpf_map_lookup_elem(&qidconf_map, &key);
 | 
			
		||||
	if (!qidconf)
 | 
			
		||||
		return XDP_ABORTED;
 | 
			
		||||
 | 
			
		||||
	if (*qidconf != ctx->rx_queue_index)
 | 
			
		||||
		return XDP_PASS;
 | 
			
		||||
 | 
			
		||||
#if RR_LB /* NB! RR_LB is configured in xdpsock.h */
 | 
			
		||||
	rr = bpf_map_lookup_elem(&rr_map, &key);
 | 
			
		||||
	if (!rr)
 | 
			
		||||
		return XDP_ABORTED;
 | 
			
		||||
 | 
			
		||||
	*rr = (*rr + 1) & (MAX_SOCKS - 1);
 | 
			
		||||
	idx = *rr;
 | 
			
		||||
#else
 | 
			
		||||
	idx = 0;
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
	return bpf_redirect_map(&xsks_map, idx, 0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
char _license[] SEC("license") = "GPL";
 | 
			
		||||
							
								
								
									
										948
									
								
								samples/bpf/xdpsock_user.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										948
									
								
								samples/bpf/xdpsock_user.c
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,948 @@
 | 
			
		|||
// SPDX-License-Identifier: GPL-2.0
 | 
			
		||||
/* Copyright(c) 2017 - 2018 Intel Corporation.
 | 
			
		||||
 *
 | 
			
		||||
 * This program is free software; you can redistribute it and/or modify it
 | 
			
		||||
 * under the terms and conditions of the GNU General Public License,
 | 
			
		||||
 * version 2, as published by the Free Software Foundation.
 | 
			
		||||
 *
 | 
			
		||||
 * This program is distributed in the hope it will be useful, but WITHOUT
 | 
			
		||||
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 | 
			
		||||
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 | 
			
		||||
 * more details.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#include <assert.h>
 | 
			
		||||
#include <errno.h>
 | 
			
		||||
#include <getopt.h>
 | 
			
		||||
#include <libgen.h>
 | 
			
		||||
#include <linux/bpf.h>
 | 
			
		||||
#include <linux/if_link.h>
 | 
			
		||||
#include <linux/if_xdp.h>
 | 
			
		||||
#include <linux/if_ether.h>
 | 
			
		||||
#include <net/if.h>
 | 
			
		||||
#include <signal.h>
 | 
			
		||||
#include <stdbool.h>
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
#include <string.h>
 | 
			
		||||
#include <net/ethernet.h>
 | 
			
		||||
#include <sys/resource.h>
 | 
			
		||||
#include <sys/socket.h>
 | 
			
		||||
#include <sys/mman.h>
 | 
			
		||||
#include <time.h>
 | 
			
		||||
#include <unistd.h>
 | 
			
		||||
#include <pthread.h>
 | 
			
		||||
#include <locale.h>
 | 
			
		||||
#include <sys/types.h>
 | 
			
		||||
#include <poll.h>
 | 
			
		||||
 | 
			
		||||
#include "bpf_load.h"
 | 
			
		||||
#include "bpf_util.h"
 | 
			
		||||
#include "libbpf.h"
 | 
			
		||||
 | 
			
		||||
#include "xdpsock.h"
 | 
			
		||||
 | 
			
		||||
#ifndef SOL_XDP
 | 
			
		||||
#define SOL_XDP 283
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef AF_XDP
 | 
			
		||||
#define AF_XDP 44
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef PF_XDP
 | 
			
		||||
#define PF_XDP AF_XDP
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#define NUM_FRAMES 131072
 | 
			
		||||
#define FRAME_HEADROOM 0
 | 
			
		||||
#define FRAME_SIZE 2048
 | 
			
		||||
#define NUM_DESCS 1024
 | 
			
		||||
#define BATCH_SIZE 16
 | 
			
		||||
 | 
			
		||||
#define FQ_NUM_DESCS 1024
 | 
			
		||||
#define CQ_NUM_DESCS 1024
 | 
			
		||||
 | 
			
		||||
#define DEBUG_HEXDUMP 0
 | 
			
		||||
 | 
			
		||||
typedef __u32 u32;
 | 
			
		||||
 | 
			
		||||
static unsigned long prev_time;
 | 
			
		||||
 | 
			
		||||
enum benchmark_type {
 | 
			
		||||
	BENCH_RXDROP = 0,
 | 
			
		||||
	BENCH_TXONLY = 1,
 | 
			
		||||
	BENCH_L2FWD = 2,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static enum benchmark_type opt_bench = BENCH_RXDROP;
 | 
			
		||||
static u32 opt_xdp_flags;
 | 
			
		||||
static const char *opt_if = "";
 | 
			
		||||
static int opt_ifindex;
 | 
			
		||||
static int opt_queue;
 | 
			
		||||
static int opt_poll;
 | 
			
		||||
static int opt_shared_packet_buffer;
 | 
			
		||||
static int opt_interval = 1;
 | 
			
		||||
 | 
			
		||||
struct xdp_umem_uqueue {
 | 
			
		||||
	u32 cached_prod;
 | 
			
		||||
	u32 cached_cons;
 | 
			
		||||
	u32 mask;
 | 
			
		||||
	u32 size;
 | 
			
		||||
	struct xdp_umem_ring *ring;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct xdp_umem {
 | 
			
		||||
	char (*frames)[FRAME_SIZE];
 | 
			
		||||
	struct xdp_umem_uqueue fq;
 | 
			
		||||
	struct xdp_umem_uqueue cq;
 | 
			
		||||
	int fd;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct xdp_uqueue {
 | 
			
		||||
	u32 cached_prod;
 | 
			
		||||
	u32 cached_cons;
 | 
			
		||||
	u32 mask;
 | 
			
		||||
	u32 size;
 | 
			
		||||
	struct xdp_rxtx_ring *ring;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct xdpsock {
 | 
			
		||||
	struct xdp_uqueue rx;
 | 
			
		||||
	struct xdp_uqueue tx;
 | 
			
		||||
	int sfd;
 | 
			
		||||
	struct xdp_umem *umem;
 | 
			
		||||
	u32 outstanding_tx;
 | 
			
		||||
	unsigned long rx_npkts;
 | 
			
		||||
	unsigned long tx_npkts;
 | 
			
		||||
	unsigned long prev_rx_npkts;
 | 
			
		||||
	unsigned long prev_tx_npkts;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#define MAX_SOCKS 4
 | 
			
		||||
static int num_socks;
 | 
			
		||||
struct xdpsock *xsks[MAX_SOCKS];
 | 
			
		||||
 | 
			
		||||
static unsigned long get_nsecs(void)
 | 
			
		||||
{
 | 
			
		||||
	struct timespec ts;
 | 
			
		||||
 | 
			
		||||
	clock_gettime(CLOCK_MONOTONIC, &ts);
 | 
			
		||||
	return ts.tv_sec * 1000000000UL + ts.tv_nsec;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void dump_stats(void);
 | 
			
		||||
 | 
			
		||||
#define lassert(expr)							\
 | 
			
		||||
	do {								\
 | 
			
		||||
		if (!(expr)) {						\
 | 
			
		||||
			fprintf(stderr, "%s:%s:%i: Assertion failed: "	\
 | 
			
		||||
				#expr ": errno: %d/\"%s\"\n",		\
 | 
			
		||||
				__FILE__, __func__, __LINE__,		\
 | 
			
		||||
				errno, strerror(errno));		\
 | 
			
		||||
			dump_stats();					\
 | 
			
		||||
			exit(EXIT_FAILURE);				\
 | 
			
		||||
		}							\
 | 
			
		||||
	} while (0)
 | 
			
		||||
 | 
			
		||||
#define barrier() __asm__ __volatile__("": : :"memory")
 | 
			
		||||
#define u_smp_rmb() barrier()
 | 
			
		||||
#define u_smp_wmb() barrier()
 | 
			
		||||
#define likely(x) __builtin_expect(!!(x), 1)
 | 
			
		||||
#define unlikely(x) __builtin_expect(!!(x), 0)
 | 
			
		||||
 | 
			
		||||
static const char pkt_data[] =
 | 
			
		||||
	"\x3c\xfd\xfe\x9e\x7f\x71\xec\xb1\xd7\x98\x3a\xc0\x08\x00\x45\x00"
 | 
			
		||||
	"\x00\x2e\x00\x00\x00\x00\x40\x11\x88\x97\x05\x08\x07\x08\xc8\x14"
 | 
			
		||||
	"\x1e\x04\x10\x92\x10\x92\x00\x1a\x6d\xa3\x34\x33\x1f\x69\x40\x6b"
 | 
			
		||||
	"\x54\x59\xb6\x14\x2d\x11\x44\xbf\xaf\xd9\xbe\xaa";
 | 
			
		||||
 | 
			
		||||
static inline u32 umem_nb_free(struct xdp_umem_uqueue *q, u32 nb)
 | 
			
		||||
{
 | 
			
		||||
	u32 free_entries = q->size - (q->cached_prod - q->cached_cons);
 | 
			
		||||
 | 
			
		||||
	if (free_entries >= nb)
 | 
			
		||||
		return free_entries;
 | 
			
		||||
 | 
			
		||||
	/* Refresh the local tail pointer */
 | 
			
		||||
	q->cached_cons = q->ring->ptrs.consumer;
 | 
			
		||||
 | 
			
		||||
	return q->size - (q->cached_prod - q->cached_cons);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline u32 xq_nb_free(struct xdp_uqueue *q, u32 ndescs)
 | 
			
		||||
{
 | 
			
		||||
	u32 free_entries = q->cached_cons - q->cached_prod;
 | 
			
		||||
 | 
			
		||||
	if (free_entries >= ndescs)
 | 
			
		||||
		return free_entries;
 | 
			
		||||
 | 
			
		||||
	/* Refresh the local tail pointer */
 | 
			
		||||
	q->cached_cons = q->ring->ptrs.consumer + q->size;
 | 
			
		||||
	return q->cached_cons - q->cached_prod;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline u32 umem_nb_avail(struct xdp_umem_uqueue *q, u32 nb)
 | 
			
		||||
{
 | 
			
		||||
	u32 entries = q->cached_prod - q->cached_cons;
 | 
			
		||||
 | 
			
		||||
	if (entries == 0) {
 | 
			
		||||
		q->cached_prod = q->ring->ptrs.producer;
 | 
			
		||||
		entries = q->cached_prod - q->cached_cons;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return (entries > nb) ? nb : entries;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline u32 xq_nb_avail(struct xdp_uqueue *q, u32 ndescs)
 | 
			
		||||
{
 | 
			
		||||
	u32 entries = q->cached_prod - q->cached_cons;
 | 
			
		||||
 | 
			
		||||
	if (entries == 0) {
 | 
			
		||||
		q->cached_prod = q->ring->ptrs.producer;
 | 
			
		||||
		entries = q->cached_prod - q->cached_cons;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return (entries > ndescs) ? ndescs : entries;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline int umem_fill_to_kernel_ex(struct xdp_umem_uqueue *fq,
 | 
			
		||||
					 struct xdp_desc *d,
 | 
			
		||||
					 size_t nb)
 | 
			
		||||
{
 | 
			
		||||
	u32 i;
 | 
			
		||||
 | 
			
		||||
	if (umem_nb_free(fq, nb) < nb)
 | 
			
		||||
		return -ENOSPC;
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i < nb; i++) {
 | 
			
		||||
		u32 idx = fq->cached_prod++ & fq->mask;
 | 
			
		||||
 | 
			
		||||
		fq->ring->desc[idx] = d[i].idx;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	u_smp_wmb();
 | 
			
		||||
 | 
			
		||||
	fq->ring->ptrs.producer = fq->cached_prod;
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline int umem_fill_to_kernel(struct xdp_umem_uqueue *fq, u32 *d,
 | 
			
		||||
				      size_t nb)
 | 
			
		||||
{
 | 
			
		||||
	u32 i;
 | 
			
		||||
 | 
			
		||||
	if (umem_nb_free(fq, nb) < nb)
 | 
			
		||||
		return -ENOSPC;
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i < nb; i++) {
 | 
			
		||||
		u32 idx = fq->cached_prod++ & fq->mask;
 | 
			
		||||
 | 
			
		||||
		fq->ring->desc[idx] = d[i];
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	u_smp_wmb();
 | 
			
		||||
 | 
			
		||||
	fq->ring->ptrs.producer = fq->cached_prod;
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline size_t umem_complete_from_kernel(struct xdp_umem_uqueue *cq,
 | 
			
		||||
					       u32 *d, size_t nb)
 | 
			
		||||
{
 | 
			
		||||
	u32 idx, i, entries = umem_nb_avail(cq, nb);
 | 
			
		||||
 | 
			
		||||
	u_smp_rmb();
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i < entries; i++) {
 | 
			
		||||
		idx = cq->cached_cons++ & cq->mask;
 | 
			
		||||
		d[i] = cq->ring->desc[idx];
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (entries > 0) {
 | 
			
		||||
		u_smp_wmb();
 | 
			
		||||
 | 
			
		||||
		cq->ring->ptrs.consumer = cq->cached_cons;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return entries;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline void *xq_get_data(struct xdpsock *xsk, __u32 idx, __u32 off)
 | 
			
		||||
{
 | 
			
		||||
	lassert(idx < NUM_FRAMES);
 | 
			
		||||
	return &xsk->umem->frames[idx][off];
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline int xq_enq(struct xdp_uqueue *uq,
 | 
			
		||||
			 const struct xdp_desc *descs,
 | 
			
		||||
			 unsigned int ndescs)
 | 
			
		||||
{
 | 
			
		||||
	struct xdp_rxtx_ring *r = uq->ring;
 | 
			
		||||
	unsigned int i;
 | 
			
		||||
 | 
			
		||||
	if (xq_nb_free(uq, ndescs) < ndescs)
 | 
			
		||||
		return -ENOSPC;
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i < ndescs; i++) {
 | 
			
		||||
		u32 idx = uq->cached_prod++ & uq->mask;
 | 
			
		||||
 | 
			
		||||
		r->desc[idx].idx = descs[i].idx;
 | 
			
		||||
		r->desc[idx].len = descs[i].len;
 | 
			
		||||
		r->desc[idx].offset = descs[i].offset;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	u_smp_wmb();
 | 
			
		||||
 | 
			
		||||
	r->ptrs.producer = uq->cached_prod;
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline int xq_enq_tx_only(struct xdp_uqueue *uq,
 | 
			
		||||
				 __u32 idx, unsigned int ndescs)
 | 
			
		||||
{
 | 
			
		||||
	struct xdp_rxtx_ring *q = uq->ring;
 | 
			
		||||
	unsigned int i;
 | 
			
		||||
 | 
			
		||||
	if (xq_nb_free(uq, ndescs) < ndescs)
 | 
			
		||||
		return -ENOSPC;
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i < ndescs; i++) {
 | 
			
		||||
		u32 idx = uq->cached_prod++ & uq->mask;
 | 
			
		||||
 | 
			
		||||
		q->desc[idx].idx	= idx + i;
 | 
			
		||||
		q->desc[idx].len	= sizeof(pkt_data) - 1;
 | 
			
		||||
		q->desc[idx].offset	= 0;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	u_smp_wmb();
 | 
			
		||||
 | 
			
		||||
	q->ptrs.producer = uq->cached_prod;
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline int xq_deq(struct xdp_uqueue *uq,
 | 
			
		||||
			 struct xdp_desc *descs,
 | 
			
		||||
			 int ndescs)
 | 
			
		||||
{
 | 
			
		||||
	struct xdp_rxtx_ring *r = uq->ring;
 | 
			
		||||
	unsigned int idx;
 | 
			
		||||
	int i, entries;
 | 
			
		||||
 | 
			
		||||
	entries = xq_nb_avail(uq, ndescs);
 | 
			
		||||
 | 
			
		||||
	u_smp_rmb();
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i < entries; i++) {
 | 
			
		||||
		idx = uq->cached_cons++ & uq->mask;
 | 
			
		||||
		descs[i] = r->desc[idx];
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (entries > 0) {
 | 
			
		||||
		u_smp_wmb();
 | 
			
		||||
 | 
			
		||||
		r->ptrs.consumer = uq->cached_cons;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return entries;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void swap_mac_addresses(void *data)
 | 
			
		||||
{
 | 
			
		||||
	struct ether_header *eth = (struct ether_header *)data;
 | 
			
		||||
	struct ether_addr *src_addr = (struct ether_addr *)ð->ether_shost;
 | 
			
		||||
	struct ether_addr *dst_addr = (struct ether_addr *)ð->ether_dhost;
 | 
			
		||||
	struct ether_addr tmp;
 | 
			
		||||
 | 
			
		||||
	tmp = *src_addr;
 | 
			
		||||
	*src_addr = *dst_addr;
 | 
			
		||||
	*dst_addr = tmp;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#if DEBUG_HEXDUMP
 | 
			
		||||
static void hex_dump(void *pkt, size_t length, const char *prefix)
 | 
			
		||||
{
 | 
			
		||||
	int i = 0;
 | 
			
		||||
	const unsigned char *address = (unsigned char *)pkt;
 | 
			
		||||
	const unsigned char *line = address;
 | 
			
		||||
	size_t line_size = 32;
 | 
			
		||||
	unsigned char c;
 | 
			
		||||
 | 
			
		||||
	printf("length = %zu\n", length);
 | 
			
		||||
	printf("%s | ", prefix);
 | 
			
		||||
	while (length-- > 0) {
 | 
			
		||||
		printf("%02X ", *address++);
 | 
			
		||||
		if (!(++i % line_size) || (length == 0 && i % line_size)) {
 | 
			
		||||
			if (length == 0) {
 | 
			
		||||
				while (i++ % line_size)
 | 
			
		||||
					printf("__ ");
 | 
			
		||||
			}
 | 
			
		||||
			printf(" | ");	/* right close */
 | 
			
		||||
			while (line < address) {
 | 
			
		||||
				c = *line++;
 | 
			
		||||
				printf("%c", (c < 33 || c == 255) ? 0x2E : c);
 | 
			
		||||
			}
 | 
			
		||||
			printf("\n");
 | 
			
		||||
			if (length > 0)
 | 
			
		||||
				printf("%s | ", prefix);
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	printf("\n");
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
static size_t gen_eth_frame(char *frame)
 | 
			
		||||
{
 | 
			
		||||
	memcpy(frame, pkt_data, sizeof(pkt_data) - 1);
 | 
			
		||||
	return sizeof(pkt_data) - 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static struct xdp_umem *xdp_umem_configure(int sfd)
 | 
			
		||||
{
 | 
			
		||||
	int fq_size = FQ_NUM_DESCS, cq_size = CQ_NUM_DESCS;
 | 
			
		||||
	struct xdp_umem_reg mr;
 | 
			
		||||
	struct xdp_umem *umem;
 | 
			
		||||
	void *bufs;
 | 
			
		||||
 | 
			
		||||
	umem = calloc(1, sizeof(*umem));
 | 
			
		||||
	lassert(umem);
 | 
			
		||||
 | 
			
		||||
	lassert(posix_memalign(&bufs, getpagesize(), /* PAGE_SIZE aligned */
 | 
			
		||||
			       NUM_FRAMES * FRAME_SIZE) == 0);
 | 
			
		||||
 | 
			
		||||
	mr.addr = (__u64)bufs;
 | 
			
		||||
	mr.len = NUM_FRAMES * FRAME_SIZE;
 | 
			
		||||
	mr.frame_size = FRAME_SIZE;
 | 
			
		||||
	mr.frame_headroom = FRAME_HEADROOM;
 | 
			
		||||
 | 
			
		||||
	lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr)) == 0);
 | 
			
		||||
	lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_FILL_RING, &fq_size,
 | 
			
		||||
			   sizeof(int)) == 0);
 | 
			
		||||
	lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_COMPLETION_RING, &cq_size,
 | 
			
		||||
			   sizeof(int)) == 0);
 | 
			
		||||
 | 
			
		||||
	umem->fq.ring = mmap(0, sizeof(struct xdp_umem_ring) +
 | 
			
		||||
			     FQ_NUM_DESCS * sizeof(u32),
 | 
			
		||||
			     PROT_READ | PROT_WRITE,
 | 
			
		||||
			     MAP_SHARED | MAP_POPULATE, sfd,
 | 
			
		||||
			     XDP_UMEM_PGOFF_FILL_RING);
 | 
			
		||||
	lassert(umem->fq.ring != MAP_FAILED);
 | 
			
		||||
 | 
			
		||||
	umem->fq.mask = FQ_NUM_DESCS - 1;
 | 
			
		||||
	umem->fq.size = FQ_NUM_DESCS;
 | 
			
		||||
 | 
			
		||||
	umem->cq.ring = mmap(0, sizeof(struct xdp_umem_ring) +
 | 
			
		||||
			     CQ_NUM_DESCS * sizeof(u32),
 | 
			
		||||
			     PROT_READ | PROT_WRITE,
 | 
			
		||||
			     MAP_SHARED | MAP_POPULATE, sfd,
 | 
			
		||||
			     XDP_UMEM_PGOFF_COMPLETION_RING);
 | 
			
		||||
	lassert(umem->cq.ring != MAP_FAILED);
 | 
			
		||||
 | 
			
		||||
	umem->cq.mask = CQ_NUM_DESCS - 1;
 | 
			
		||||
	umem->cq.size = CQ_NUM_DESCS;
 | 
			
		||||
 | 
			
		||||
	umem->frames = (char (*)[FRAME_SIZE])bufs;
 | 
			
		||||
	umem->fd = sfd;
 | 
			
		||||
 | 
			
		||||
	if (opt_bench == BENCH_TXONLY) {
 | 
			
		||||
		int i;
 | 
			
		||||
 | 
			
		||||
		for (i = 0; i < NUM_FRAMES; i++)
 | 
			
		||||
			(void)gen_eth_frame(&umem->frames[i][0]);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return umem;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static struct xdpsock *xsk_configure(struct xdp_umem *umem)
 | 
			
		||||
{
 | 
			
		||||
	struct sockaddr_xdp sxdp = {};
 | 
			
		||||
	int sfd, ndescs = NUM_DESCS;
 | 
			
		||||
	struct xdpsock *xsk;
 | 
			
		||||
	bool shared = true;
 | 
			
		||||
	u32 i;
 | 
			
		||||
 | 
			
		||||
	sfd = socket(PF_XDP, SOCK_RAW, 0);
 | 
			
		||||
	lassert(sfd >= 0);
 | 
			
		||||
 | 
			
		||||
	xsk = calloc(1, sizeof(*xsk));
 | 
			
		||||
	lassert(xsk);
 | 
			
		||||
 | 
			
		||||
	xsk->sfd = sfd;
 | 
			
		||||
	xsk->outstanding_tx = 0;
 | 
			
		||||
 | 
			
		||||
	if (!umem) {
 | 
			
		||||
		shared = false;
 | 
			
		||||
		xsk->umem = xdp_umem_configure(sfd);
 | 
			
		||||
	} else {
 | 
			
		||||
		xsk->umem = umem;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	lassert(setsockopt(sfd, SOL_XDP, XDP_RX_RING,
 | 
			
		||||
			   &ndescs, sizeof(int)) == 0);
 | 
			
		||||
	lassert(setsockopt(sfd, SOL_XDP, XDP_TX_RING,
 | 
			
		||||
			   &ndescs, sizeof(int)) == 0);
 | 
			
		||||
 | 
			
		||||
	/* Rx */
 | 
			
		||||
	xsk->rx.ring = mmap(NULL,
 | 
			
		||||
			    sizeof(struct xdp_ring) +
 | 
			
		||||
			    NUM_DESCS * sizeof(struct xdp_desc),
 | 
			
		||||
			    PROT_READ | PROT_WRITE,
 | 
			
		||||
			    MAP_SHARED | MAP_POPULATE, sfd,
 | 
			
		||||
			    XDP_PGOFF_RX_RING);
 | 
			
		||||
	lassert(xsk->rx.ring != MAP_FAILED);
 | 
			
		||||
 | 
			
		||||
	if (!shared) {
 | 
			
		||||
		for (i = 0; i < NUM_DESCS / 2; i++)
 | 
			
		||||
			lassert(umem_fill_to_kernel(&xsk->umem->fq, &i, 1)
 | 
			
		||||
				== 0);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/* Tx */
 | 
			
		||||
	xsk->tx.ring = mmap(NULL,
 | 
			
		||||
			 sizeof(struct xdp_ring) +
 | 
			
		||||
			 NUM_DESCS * sizeof(struct xdp_desc),
 | 
			
		||||
			 PROT_READ | PROT_WRITE,
 | 
			
		||||
			 MAP_SHARED | MAP_POPULATE, sfd,
 | 
			
		||||
			 XDP_PGOFF_TX_RING);
 | 
			
		||||
	lassert(xsk->tx.ring != MAP_FAILED);
 | 
			
		||||
 | 
			
		||||
	xsk->rx.mask = NUM_DESCS - 1;
 | 
			
		||||
	xsk->rx.size = NUM_DESCS;
 | 
			
		||||
 | 
			
		||||
	xsk->tx.mask = NUM_DESCS - 1;
 | 
			
		||||
	xsk->tx.size = NUM_DESCS;
 | 
			
		||||
 | 
			
		||||
	sxdp.sxdp_family = PF_XDP;
 | 
			
		||||
	sxdp.sxdp_ifindex = opt_ifindex;
 | 
			
		||||
	sxdp.sxdp_queue_id = opt_queue;
 | 
			
		||||
	if (shared) {
 | 
			
		||||
		sxdp.sxdp_flags = XDP_SHARED_UMEM;
 | 
			
		||||
		sxdp.sxdp_shared_umem_fd = umem->fd;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	lassert(bind(sfd, (struct sockaddr *)&sxdp, sizeof(sxdp)) == 0);
 | 
			
		||||
 | 
			
		||||
	return xsk;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void print_benchmark(bool running)
 | 
			
		||||
{
 | 
			
		||||
	const char *bench_str = "INVALID";
 | 
			
		||||
 | 
			
		||||
	if (opt_bench == BENCH_RXDROP)
 | 
			
		||||
		bench_str = "rxdrop";
 | 
			
		||||
	else if (opt_bench == BENCH_TXONLY)
 | 
			
		||||
		bench_str = "txonly";
 | 
			
		||||
	else if (opt_bench == BENCH_L2FWD)
 | 
			
		||||
		bench_str = "l2fwd";
 | 
			
		||||
 | 
			
		||||
	printf("%s:%d %s ", opt_if, opt_queue, bench_str);
 | 
			
		||||
	if (opt_xdp_flags & XDP_FLAGS_SKB_MODE)
 | 
			
		||||
		printf("xdp-skb ");
 | 
			
		||||
	else if (opt_xdp_flags & XDP_FLAGS_DRV_MODE)
 | 
			
		||||
		printf("xdp-drv ");
 | 
			
		||||
	else
 | 
			
		||||
		printf("	");
 | 
			
		||||
 | 
			
		||||
	if (opt_poll)
 | 
			
		||||
		printf("poll() ");
 | 
			
		||||
 | 
			
		||||
	if (running) {
 | 
			
		||||
		printf("running...");
 | 
			
		||||
		fflush(stdout);
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void dump_stats(void)
 | 
			
		||||
{
 | 
			
		||||
	unsigned long now = get_nsecs();
 | 
			
		||||
	long dt = now - prev_time;
 | 
			
		||||
	int i;
 | 
			
		||||
 | 
			
		||||
	prev_time = now;
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i < num_socks; i++) {
 | 
			
		||||
		char *fmt = "%-15s %'-11.0f %'-11lu\n";
 | 
			
		||||
		double rx_pps, tx_pps;
 | 
			
		||||
 | 
			
		||||
		rx_pps = (xsks[i]->rx_npkts - xsks[i]->prev_rx_npkts) *
 | 
			
		||||
			 1000000000. / dt;
 | 
			
		||||
		tx_pps = (xsks[i]->tx_npkts - xsks[i]->prev_tx_npkts) *
 | 
			
		||||
			 1000000000. / dt;
 | 
			
		||||
 | 
			
		||||
		printf("\n sock%d@", i);
 | 
			
		||||
		print_benchmark(false);
 | 
			
		||||
		printf("\n");
 | 
			
		||||
 | 
			
		||||
		printf("%-15s %-11s %-11s %-11.2f\n", "", "pps", "pkts",
 | 
			
		||||
		       dt / 1000000000.);
 | 
			
		||||
		printf(fmt, "rx", rx_pps, xsks[i]->rx_npkts);
 | 
			
		||||
		printf(fmt, "tx", tx_pps, xsks[i]->tx_npkts);
 | 
			
		||||
 | 
			
		||||
		xsks[i]->prev_rx_npkts = xsks[i]->rx_npkts;
 | 
			
		||||
		xsks[i]->prev_tx_npkts = xsks[i]->tx_npkts;
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void *poller(void *arg)
 | 
			
		||||
{
 | 
			
		||||
	(void)arg;
 | 
			
		||||
	for (;;) {
 | 
			
		||||
		sleep(opt_interval);
 | 
			
		||||
		dump_stats();
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return NULL;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void int_exit(int sig)
 | 
			
		||||
{
 | 
			
		||||
	(void)sig;
 | 
			
		||||
	dump_stats();
 | 
			
		||||
	bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags);
 | 
			
		||||
	exit(EXIT_SUCCESS);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static struct option long_options[] = {
 | 
			
		||||
	{"rxdrop", no_argument, 0, 'r'},
 | 
			
		||||
	{"txonly", no_argument, 0, 't'},
 | 
			
		||||
	{"l2fwd", no_argument, 0, 'l'},
 | 
			
		||||
	{"interface", required_argument, 0, 'i'},
 | 
			
		||||
	{"queue", required_argument, 0, 'q'},
 | 
			
		||||
	{"poll", no_argument, 0, 'p'},
 | 
			
		||||
	{"shared-buffer", no_argument, 0, 's'},
 | 
			
		||||
	{"xdp-skb", no_argument, 0, 'S'},
 | 
			
		||||
	{"xdp-native", no_argument, 0, 'N'},
 | 
			
		||||
	{"interval", required_argument, 0, 'n'},
 | 
			
		||||
	{0, 0, 0, 0}
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static void usage(const char *prog)
 | 
			
		||||
{
 | 
			
		||||
	const char *str =
 | 
			
		||||
		"  Usage: %s [OPTIONS]\n"
 | 
			
		||||
		"  Options:\n"
 | 
			
		||||
		"  -r, --rxdrop		Discard all incoming packets (default)\n"
 | 
			
		||||
		"  -t, --txonly		Only send packets\n"
 | 
			
		||||
		"  -l, --l2fwd		MAC swap L2 forwarding\n"
 | 
			
		||||
		"  -i, --interface=n	Run on interface n\n"
 | 
			
		||||
		"  -q, --queue=n	Use queue n (default 0)\n"
 | 
			
		||||
		"  -p, --poll		Use poll syscall\n"
 | 
			
		||||
		"  -s, --shared-buffer	Use shared packet buffer\n"
 | 
			
		||||
		"  -S, --xdp-skb=n	Use XDP skb-mod\n"
 | 
			
		||||
		"  -N, --xdp-native=n	Enfore XDP native mode\n"
 | 
			
		||||
		"  -n, --interval=n	Specify statistics update interval (default 1 sec).\n"
 | 
			
		||||
		"\n";
 | 
			
		||||
	fprintf(stderr, str, prog);
 | 
			
		||||
	exit(EXIT_FAILURE);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void parse_command_line(int argc, char **argv)
 | 
			
		||||
{
 | 
			
		||||
	int option_index, c;
 | 
			
		||||
 | 
			
		||||
	opterr = 0;
 | 
			
		||||
 | 
			
		||||
	for (;;) {
 | 
			
		||||
		c = getopt_long(argc, argv, "rtli:q:psSNn:", long_options,
 | 
			
		||||
				&option_index);
 | 
			
		||||
		if (c == -1)
 | 
			
		||||
			break;
 | 
			
		||||
 | 
			
		||||
		switch (c) {
 | 
			
		||||
		case 'r':
 | 
			
		||||
			opt_bench = BENCH_RXDROP;
 | 
			
		||||
			break;
 | 
			
		||||
		case 't':
 | 
			
		||||
			opt_bench = BENCH_TXONLY;
 | 
			
		||||
			break;
 | 
			
		||||
		case 'l':
 | 
			
		||||
			opt_bench = BENCH_L2FWD;
 | 
			
		||||
			break;
 | 
			
		||||
		case 'i':
 | 
			
		||||
			opt_if = optarg;
 | 
			
		||||
			break;
 | 
			
		||||
		case 'q':
 | 
			
		||||
			opt_queue = atoi(optarg);
 | 
			
		||||
			break;
 | 
			
		||||
		case 's':
 | 
			
		||||
			opt_shared_packet_buffer = 1;
 | 
			
		||||
			break;
 | 
			
		||||
		case 'p':
 | 
			
		||||
			opt_poll = 1;
 | 
			
		||||
			break;
 | 
			
		||||
		case 'S':
 | 
			
		||||
			opt_xdp_flags |= XDP_FLAGS_SKB_MODE;
 | 
			
		||||
			break;
 | 
			
		||||
		case 'N':
 | 
			
		||||
			opt_xdp_flags |= XDP_FLAGS_DRV_MODE;
 | 
			
		||||
			break;
 | 
			
		||||
		case 'n':
 | 
			
		||||
			opt_interval = atoi(optarg);
 | 
			
		||||
			break;
 | 
			
		||||
		default:
 | 
			
		||||
			usage(basename(argv[0]));
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	opt_ifindex = if_nametoindex(opt_if);
 | 
			
		||||
	if (!opt_ifindex) {
 | 
			
		||||
		fprintf(stderr, "ERROR: interface \"%s\" does not exist\n",
 | 
			
		||||
			opt_if);
 | 
			
		||||
		usage(basename(argv[0]));
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void kick_tx(int fd)
 | 
			
		||||
{
 | 
			
		||||
	int ret;
 | 
			
		||||
 | 
			
		||||
	ret = sendto(fd, NULL, 0, MSG_DONTWAIT, NULL, 0);
 | 
			
		||||
	if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN)
 | 
			
		||||
		return;
 | 
			
		||||
	lassert(0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline void complete_tx_l2fwd(struct xdpsock *xsk)
 | 
			
		||||
{
 | 
			
		||||
	u32 descs[BATCH_SIZE];
 | 
			
		||||
	unsigned int rcvd;
 | 
			
		||||
	size_t ndescs;
 | 
			
		||||
 | 
			
		||||
	if (!xsk->outstanding_tx)
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	kick_tx(xsk->sfd);
 | 
			
		||||
	ndescs = (xsk->outstanding_tx > BATCH_SIZE) ? BATCH_SIZE :
 | 
			
		||||
		 xsk->outstanding_tx;
 | 
			
		||||
 | 
			
		||||
	/* re-add completed Tx buffers */
 | 
			
		||||
	rcvd = umem_complete_from_kernel(&xsk->umem->cq, descs, ndescs);
 | 
			
		||||
	if (rcvd > 0) {
 | 
			
		||||
		umem_fill_to_kernel(&xsk->umem->fq, descs, rcvd);
 | 
			
		||||
		xsk->outstanding_tx -= rcvd;
 | 
			
		||||
		xsk->tx_npkts += rcvd;
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline void complete_tx_only(struct xdpsock *xsk)
 | 
			
		||||
{
 | 
			
		||||
	u32 descs[BATCH_SIZE];
 | 
			
		||||
	unsigned int rcvd;
 | 
			
		||||
 | 
			
		||||
	if (!xsk->outstanding_tx)
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	kick_tx(xsk->sfd);
 | 
			
		||||
 | 
			
		||||
	rcvd = umem_complete_from_kernel(&xsk->umem->cq, descs, BATCH_SIZE);
 | 
			
		||||
	if (rcvd > 0) {
 | 
			
		||||
		xsk->outstanding_tx -= rcvd;
 | 
			
		||||
		xsk->tx_npkts += rcvd;
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void rx_drop(struct xdpsock *xsk)
 | 
			
		||||
{
 | 
			
		||||
	struct xdp_desc descs[BATCH_SIZE];
 | 
			
		||||
	unsigned int rcvd, i;
 | 
			
		||||
 | 
			
		||||
	rcvd = xq_deq(&xsk->rx, descs, BATCH_SIZE);
 | 
			
		||||
	if (!rcvd)
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i < rcvd; i++) {
 | 
			
		||||
		u32 idx = descs[i].idx;
 | 
			
		||||
 | 
			
		||||
		lassert(idx < NUM_FRAMES);
 | 
			
		||||
#if DEBUG_HEXDUMP
 | 
			
		||||
		char *pkt;
 | 
			
		||||
		char buf[32];
 | 
			
		||||
 | 
			
		||||
		pkt = xq_get_data(xsk, idx, descs[i].offset);
 | 
			
		||||
		sprintf(buf, "idx=%d", idx);
 | 
			
		||||
		hex_dump(pkt, descs[i].len, buf);
 | 
			
		||||
#endif
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	xsk->rx_npkts += rcvd;
 | 
			
		||||
 | 
			
		||||
	umem_fill_to_kernel_ex(&xsk->umem->fq, descs, rcvd);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void rx_drop_all(void)
 | 
			
		||||
{
 | 
			
		||||
	struct pollfd fds[MAX_SOCKS + 1];
 | 
			
		||||
	int i, ret, timeout, nfds = 1;
 | 
			
		||||
 | 
			
		||||
	memset(fds, 0, sizeof(fds));
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i < num_socks; i++) {
 | 
			
		||||
		fds[i].fd = xsks[i]->sfd;
 | 
			
		||||
		fds[i].events = POLLIN;
 | 
			
		||||
		timeout = 1000; /* 1sn */
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	for (;;) {
 | 
			
		||||
		if (opt_poll) {
 | 
			
		||||
			ret = poll(fds, nfds, timeout);
 | 
			
		||||
			if (ret <= 0)
 | 
			
		||||
				continue;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		for (i = 0; i < num_socks; i++)
 | 
			
		||||
			rx_drop(xsks[i]);
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void tx_only(struct xdpsock *xsk)
 | 
			
		||||
{
 | 
			
		||||
	int timeout, ret, nfds = 1;
 | 
			
		||||
	struct pollfd fds[nfds + 1];
 | 
			
		||||
	unsigned int idx = 0;
 | 
			
		||||
 | 
			
		||||
	memset(fds, 0, sizeof(fds));
 | 
			
		||||
	fds[0].fd = xsk->sfd;
 | 
			
		||||
	fds[0].events = POLLOUT;
 | 
			
		||||
	timeout = 1000; /* 1sn */
 | 
			
		||||
 | 
			
		||||
	for (;;) {
 | 
			
		||||
		if (opt_poll) {
 | 
			
		||||
			ret = poll(fds, nfds, timeout);
 | 
			
		||||
			if (ret <= 0)
 | 
			
		||||
				continue;
 | 
			
		||||
 | 
			
		||||
			if (fds[0].fd != xsk->sfd ||
 | 
			
		||||
			    !(fds[0].revents & POLLOUT))
 | 
			
		||||
				continue;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if (xq_nb_free(&xsk->tx, BATCH_SIZE) >= BATCH_SIZE) {
 | 
			
		||||
			lassert(xq_enq_tx_only(&xsk->tx, idx, BATCH_SIZE) == 0);
 | 
			
		||||
 | 
			
		||||
			xsk->outstanding_tx += BATCH_SIZE;
 | 
			
		||||
			idx += BATCH_SIZE;
 | 
			
		||||
			idx %= NUM_FRAMES;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		complete_tx_only(xsk);
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void l2fwd(struct xdpsock *xsk)
 | 
			
		||||
{
 | 
			
		||||
	for (;;) {
 | 
			
		||||
		struct xdp_desc descs[BATCH_SIZE];
 | 
			
		||||
		unsigned int rcvd, i;
 | 
			
		||||
		int ret;
 | 
			
		||||
 | 
			
		||||
		for (;;) {
 | 
			
		||||
			complete_tx_l2fwd(xsk);
 | 
			
		||||
 | 
			
		||||
			rcvd = xq_deq(&xsk->rx, descs, BATCH_SIZE);
 | 
			
		||||
			if (rcvd > 0)
 | 
			
		||||
				break;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		for (i = 0; i < rcvd; i++) {
 | 
			
		||||
			char *pkt = xq_get_data(xsk, descs[i].idx,
 | 
			
		||||
						descs[i].offset);
 | 
			
		||||
 | 
			
		||||
			swap_mac_addresses(pkt);
 | 
			
		||||
#if DEBUG_HEXDUMP
 | 
			
		||||
			char buf[32];
 | 
			
		||||
			u32 idx = descs[i].idx;
 | 
			
		||||
 | 
			
		||||
			sprintf(buf, "idx=%d", idx);
 | 
			
		||||
			hex_dump(pkt, descs[i].len, buf);
 | 
			
		||||
#endif
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		xsk->rx_npkts += rcvd;
 | 
			
		||||
 | 
			
		||||
		ret = xq_enq(&xsk->tx, descs, rcvd);
 | 
			
		||||
		lassert(ret == 0);
 | 
			
		||||
		xsk->outstanding_tx += rcvd;
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int main(int argc, char **argv)
 | 
			
		||||
{
 | 
			
		||||
	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
 | 
			
		||||
	char xdp_filename[256];
 | 
			
		||||
	int i, ret, key = 0;
 | 
			
		||||
	pthread_t pt;
 | 
			
		||||
 | 
			
		||||
	parse_command_line(argc, argv);
 | 
			
		||||
 | 
			
		||||
	if (setrlimit(RLIMIT_MEMLOCK, &r)) {
 | 
			
		||||
		fprintf(stderr, "ERROR: setrlimit(RLIMIT_MEMLOCK) \"%s\"\n",
 | 
			
		||||
			strerror(errno));
 | 
			
		||||
		exit(EXIT_FAILURE);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	snprintf(xdp_filename, sizeof(xdp_filename), "%s_kern.o", argv[0]);
 | 
			
		||||
 | 
			
		||||
	if (load_bpf_file(xdp_filename)) {
 | 
			
		||||
		fprintf(stderr, "ERROR: load_bpf_file %s\n", bpf_log_buf);
 | 
			
		||||
		exit(EXIT_FAILURE);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (!prog_fd[0]) {
 | 
			
		||||
		fprintf(stderr, "ERROR: load_bpf_file: \"%s\"\n",
 | 
			
		||||
			strerror(errno));
 | 
			
		||||
		exit(EXIT_FAILURE);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd[0], opt_xdp_flags) < 0) {
 | 
			
		||||
		fprintf(stderr, "ERROR: link set xdp fd failed\n");
 | 
			
		||||
		exit(EXIT_FAILURE);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	ret = bpf_map_update_elem(map_fd[0], &key, &opt_queue, 0);
 | 
			
		||||
	if (ret) {
 | 
			
		||||
		fprintf(stderr, "ERROR: bpf_map_update_elem qidconf\n");
 | 
			
		||||
		exit(EXIT_FAILURE);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/* Create sockets... */
 | 
			
		||||
	xsks[num_socks++] = xsk_configure(NULL);
 | 
			
		||||
 | 
			
		||||
#if RR_LB
 | 
			
		||||
	for (i = 0; i < MAX_SOCKS - 1; i++)
 | 
			
		||||
		xsks[num_socks++] = xsk_configure(xsks[0]->umem);
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
	/* ...and insert them into the map. */
 | 
			
		||||
	for (i = 0; i < num_socks; i++) {
 | 
			
		||||
		key = i;
 | 
			
		||||
		ret = bpf_map_update_elem(map_fd[1], &key, &xsks[i]->sfd, 0);
 | 
			
		||||
		if (ret) {
 | 
			
		||||
			fprintf(stderr, "ERROR: bpf_map_update_elem %d\n", i);
 | 
			
		||||
			exit(EXIT_FAILURE);
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	signal(SIGINT, int_exit);
 | 
			
		||||
	signal(SIGTERM, int_exit);
 | 
			
		||||
	signal(SIGABRT, int_exit);
 | 
			
		||||
 | 
			
		||||
	setlocale(LC_ALL, "");
 | 
			
		||||
 | 
			
		||||
	ret = pthread_create(&pt, NULL, poller, NULL);
 | 
			
		||||
	lassert(ret == 0);
 | 
			
		||||
 | 
			
		||||
	prev_time = get_nsecs();
 | 
			
		||||
 | 
			
		||||
	if (opt_bench == BENCH_RXDROP)
 | 
			
		||||
		rx_drop_all();
 | 
			
		||||
	else if (opt_bench == BENCH_TXONLY)
 | 
			
		||||
		tx_only(xsks[0]);
 | 
			
		||||
	else
 | 
			
		||||
		l2fwd(xsks[0]);
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
		Loading…
	
		Reference in a new issue