mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	af_packet: TX_RING support for TPACKET_V3
Although TPACKET_V3 Rx has some benefits over TPACKET_V2 Rx, *_v3 does not currently have TX_RING support. As a result an application that wants the best perf for Tx and Rx (e.g. to handle request/response transacations) ends up needing 2 sockets, one with *_v2 for Tx and another with *_v3 for Rx. This patch enables TPACKET_V2 compatible Tx features in TPACKET_V3 so that an application can use a single descriptor to get the benefits of _v3 RX_RING and _v2 TX_RING. An application may do a block-send by first filling up multiple frames in the Tx ring and then triggering a transmit. This patch only support fixed size Tx frames for TPACKET_V3, and requires that tp_next_offset must be zero. Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									e7072f6669
								
							
						
					
					
						commit
						7f953ab2ba
					
				
					 2 changed files with 37 additions and 11 deletions
				
			
		| 
						 | 
					@ -565,7 +565,7 @@ TPACKET_V1 --> TPACKET_V2:
 | 
				
			||||||
		   (void *)hdr + TPACKET_ALIGN(sizeof(struct tpacket_hdr))
 | 
							   (void *)hdr + TPACKET_ALIGN(sizeof(struct tpacket_hdr))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
TPACKET_V2 --> TPACKET_V3:
 | 
					TPACKET_V2 --> TPACKET_V3:
 | 
				
			||||||
	- Flexible buffer implementation:
 | 
						- Flexible buffer implementation for RX_RING:
 | 
				
			||||||
		1. Blocks can be configured with non-static frame-size
 | 
							1. Blocks can be configured with non-static frame-size
 | 
				
			||||||
		2. Read/poll is at a block-level (as opposed to packet-level)
 | 
							2. Read/poll is at a block-level (as opposed to packet-level)
 | 
				
			||||||
		3. Added poll timeout to avoid indefinite user-space wait
 | 
							3. Added poll timeout to avoid indefinite user-space wait
 | 
				
			||||||
| 
						 | 
					@ -574,7 +574,12 @@ TPACKET_V2 --> TPACKET_V3:
 | 
				
			||||||
			4.1 block::timeout
 | 
								4.1 block::timeout
 | 
				
			||||||
			4.2 tpkt_hdr::sk_rxhash
 | 
								4.2 tpkt_hdr::sk_rxhash
 | 
				
			||||||
	- RX Hash data available in user space
 | 
						- RX Hash data available in user space
 | 
				
			||||||
	- Currently only RX_RING available
 | 
						- TX_RING semantics are conceptually similar to TPACKET_V2;
 | 
				
			||||||
 | 
						  use tpacket3_hdr instead of tpacket2_hdr, and TPACKET3_HDRLEN
 | 
				
			||||||
 | 
						  instead of TPACKET2_HDRLEN. In the current implementation,
 | 
				
			||||||
 | 
						  the tp_next_offset field in the tpacket3_hdr MUST be set to
 | 
				
			||||||
 | 
						  zero, indicating that the ring does not hold variable sized frames.
 | 
				
			||||||
 | 
						  Packets with non-zero values of tp_next_offset will be dropped.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
-------------------------------------------------------------------------------
 | 
					-------------------------------------------------------------------------------
 | 
				
			||||||
+ AF_PACKET fanout mode
 | 
					+ AF_PACKET fanout mode
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -409,6 +409,9 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status)
 | 
				
			||||||
		flush_dcache_page(pgv_to_page(&h.h2->tp_status));
 | 
							flush_dcache_page(pgv_to_page(&h.h2->tp_status));
 | 
				
			||||||
		break;
 | 
							break;
 | 
				
			||||||
	case TPACKET_V3:
 | 
						case TPACKET_V3:
 | 
				
			||||||
 | 
							h.h3->tp_status = status;
 | 
				
			||||||
 | 
							flush_dcache_page(pgv_to_page(&h.h3->tp_status));
 | 
				
			||||||
 | 
							break;
 | 
				
			||||||
	default:
 | 
						default:
 | 
				
			||||||
		WARN(1, "TPACKET version not supported.\n");
 | 
							WARN(1, "TPACKET version not supported.\n");
 | 
				
			||||||
		BUG();
 | 
							BUG();
 | 
				
			||||||
| 
						 | 
					@ -432,6 +435,8 @@ static int __packet_get_status(struct packet_sock *po, void *frame)
 | 
				
			||||||
		flush_dcache_page(pgv_to_page(&h.h2->tp_status));
 | 
							flush_dcache_page(pgv_to_page(&h.h2->tp_status));
 | 
				
			||||||
		return h.h2->tp_status;
 | 
							return h.h2->tp_status;
 | 
				
			||||||
	case TPACKET_V3:
 | 
						case TPACKET_V3:
 | 
				
			||||||
 | 
							flush_dcache_page(pgv_to_page(&h.h3->tp_status));
 | 
				
			||||||
 | 
							return h.h3->tp_status;
 | 
				
			||||||
	default:
 | 
						default:
 | 
				
			||||||
		WARN(1, "TPACKET version not supported.\n");
 | 
							WARN(1, "TPACKET version not supported.\n");
 | 
				
			||||||
		BUG();
 | 
							BUG();
 | 
				
			||||||
| 
						 | 
					@ -2497,6 +2502,13 @@ static int tpacket_parse_header(struct packet_sock *po, void *frame,
 | 
				
			||||||
	ph.raw = frame;
 | 
						ph.raw = frame;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	switch (po->tp_version) {
 | 
						switch (po->tp_version) {
 | 
				
			||||||
 | 
						case TPACKET_V3:
 | 
				
			||||||
 | 
							if (ph.h3->tp_next_offset != 0) {
 | 
				
			||||||
 | 
								pr_warn_once("variable sized slot not supported");
 | 
				
			||||||
 | 
								return -EINVAL;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							tp_len = ph.h3->tp_len;
 | 
				
			||||||
 | 
							break;
 | 
				
			||||||
	case TPACKET_V2:
 | 
						case TPACKET_V2:
 | 
				
			||||||
		tp_len = ph.h2->tp_len;
 | 
							tp_len = ph.h2->tp_len;
 | 
				
			||||||
		break;
 | 
							break;
 | 
				
			||||||
| 
						 | 
					@ -2516,6 +2528,9 @@ static int tpacket_parse_header(struct packet_sock *po, void *frame,
 | 
				
			||||||
		off_max = po->tx_ring.frame_size - tp_len;
 | 
							off_max = po->tx_ring.frame_size - tp_len;
 | 
				
			||||||
		if (po->sk.sk_type == SOCK_DGRAM) {
 | 
							if (po->sk.sk_type == SOCK_DGRAM) {
 | 
				
			||||||
			switch (po->tp_version) {
 | 
								switch (po->tp_version) {
 | 
				
			||||||
 | 
								case TPACKET_V3:
 | 
				
			||||||
 | 
									off = ph.h3->tp_net;
 | 
				
			||||||
 | 
									break;
 | 
				
			||||||
			case TPACKET_V2:
 | 
								case TPACKET_V2:
 | 
				
			||||||
				off = ph.h2->tp_net;
 | 
									off = ph.h2->tp_net;
 | 
				
			||||||
				break;
 | 
									break;
 | 
				
			||||||
| 
						 | 
					@ -2525,6 +2540,9 @@ static int tpacket_parse_header(struct packet_sock *po, void *frame,
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
		} else {
 | 
							} else {
 | 
				
			||||||
			switch (po->tp_version) {
 | 
								switch (po->tp_version) {
 | 
				
			||||||
 | 
								case TPACKET_V3:
 | 
				
			||||||
 | 
									off = ph.h3->tp_mac;
 | 
				
			||||||
 | 
									break;
 | 
				
			||||||
			case TPACKET_V2:
 | 
								case TPACKET_V2:
 | 
				
			||||||
				off = ph.h2->tp_mac;
 | 
									off = ph.h2->tp_mac;
 | 
				
			||||||
				break;
 | 
									break;
 | 
				
			||||||
| 
						 | 
					@ -4113,11 +4131,6 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 | 
				
			||||||
	struct tpacket_req *req = &req_u->req;
 | 
						struct tpacket_req *req = &req_u->req;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	lock_sock(sk);
 | 
						lock_sock(sk);
 | 
				
			||||||
	/* Opening a Tx-ring is NOT supported in TPACKET_V3 */
 | 
					 | 
				
			||||||
	if (!closing && tx_ring && (po->tp_version > TPACKET_V2)) {
 | 
					 | 
				
			||||||
		net_warn_ratelimited("Tx-ring is not supported.\n");
 | 
					 | 
				
			||||||
		goto out;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	rb = tx_ring ? &po->tx_ring : &po->rx_ring;
 | 
						rb = tx_ring ? &po->tx_ring : &po->rx_ring;
 | 
				
			||||||
	rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
 | 
						rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
 | 
				
			||||||
| 
						 | 
					@ -4177,11 +4190,19 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 | 
				
			||||||
			goto out;
 | 
								goto out;
 | 
				
			||||||
		switch (po->tp_version) {
 | 
							switch (po->tp_version) {
 | 
				
			||||||
		case TPACKET_V3:
 | 
							case TPACKET_V3:
 | 
				
			||||||
		/* Transmit path is not supported. We checked
 | 
								/* Block transmit is not supported yet */
 | 
				
			||||||
		 * it above but just being paranoid
 | 
								if (!tx_ring) {
 | 
				
			||||||
		 */
 | 
					 | 
				
			||||||
			if (!tx_ring)
 | 
					 | 
				
			||||||
				init_prb_bdqc(po, rb, pg_vec, req_u);
 | 
									init_prb_bdqc(po, rb, pg_vec, req_u);
 | 
				
			||||||
 | 
								} else {
 | 
				
			||||||
 | 
									struct tpacket_req3 *req3 = &req_u->req3;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									if (req3->tp_retire_blk_tov ||
 | 
				
			||||||
 | 
									    req3->tp_sizeof_priv ||
 | 
				
			||||||
 | 
									    req3->tp_feature_req_word) {
 | 
				
			||||||
 | 
										err = -EINVAL;
 | 
				
			||||||
 | 
										goto out;
 | 
				
			||||||
 | 
									}
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
			break;
 | 
								break;
 | 
				
			||||||
		default:
 | 
							default:
 | 
				
			||||||
			break;
 | 
								break;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue