mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	i40e/i40evf: enable hardware feature head write back
The hardware supports a feature to avoid updating the descriptor ring by marking each descriptor with a DD bit, and instead writes a memory location with an update to where the driver should clean up to. Enable this feature. Change-ID: I5da4e0681f0b581a6401c950a81808792267fe57 Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com> Signed-off-by: Mitch Williams <mitch.a.williams@intel.com> Signed-off-by: Catherine Sullivan <catherine.sullivan@intel.com> Tested-by: Kavindya Deegala <kavindya.s.deegala@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
This commit is contained in:
		
							parent
							
								
									6c167f582e
								
							
						
					
					
						commit
						1943d8ba95
					
				
					 4 changed files with 88 additions and 12 deletions
				
			
		| 
						 | 
				
			
			@ -2181,6 +2181,11 @@ static int i40e_configure_tx_ring(struct i40e_ring *ring)
 | 
			
		|||
	tx_ctx.fd_ena = !!(vsi->back->flags & (I40E_FLAG_FD_SB_ENABLED |
 | 
			
		||||
					       I40E_FLAG_FD_ATR_ENABLED));
 | 
			
		||||
	tx_ctx.timesync_ena = !!(vsi->back->flags & I40E_FLAG_PTP);
 | 
			
		||||
	/* FDIR VSI tx ring can still use RS bit and writebacks */
 | 
			
		||||
	if (vsi->type != I40E_VSI_FDIR)
 | 
			
		||||
		tx_ctx.head_wb_ena = 1;
 | 
			
		||||
	tx_ctx.head_wb_addr = ring->dma +
 | 
			
		||||
			      (ring->count * sizeof(struct i40e_tx_desc));
 | 
			
		||||
 | 
			
		||||
	/* As part of VSI creation/update, FW allocates certain
 | 
			
		||||
	 * Tx arbitration queue sets for each TC enabled for
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -618,6 +618,20 @@ static bool i40e_check_tx_hang(struct i40e_ring *tx_ring)
 | 
			
		|||
	return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * i40e_get_head - Retrieve head from head writeback
 | 
			
		||||
 * @tx_ring:  tx ring to fetch head of
 | 
			
		||||
 *
 | 
			
		||||
 * Returns value of Tx ring head based on value stored
 | 
			
		||||
 * in head write-back location
 | 
			
		||||
 **/
 | 
			
		||||
static inline u32 i40e_get_head(struct i40e_ring *tx_ring)
 | 
			
		||||
{
 | 
			
		||||
	void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count;
 | 
			
		||||
 | 
			
		||||
	return le32_to_cpu(*(volatile __le32 *)head);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * i40e_clean_tx_irq - Reclaim resources after transmit completes
 | 
			
		||||
 * @tx_ring:  tx ring to clean
 | 
			
		||||
| 
						 | 
				
			
			@ -629,6 +643,7 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
 | 
			
		|||
{
 | 
			
		||||
	u16 i = tx_ring->next_to_clean;
 | 
			
		||||
	struct i40e_tx_buffer *tx_buf;
 | 
			
		||||
	struct i40e_tx_desc *tx_head;
 | 
			
		||||
	struct i40e_tx_desc *tx_desc;
 | 
			
		||||
	unsigned int total_packets = 0;
 | 
			
		||||
	unsigned int total_bytes = 0;
 | 
			
		||||
| 
						 | 
				
			
			@ -637,6 +652,8 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
 | 
			
		|||
	tx_desc = I40E_TX_DESC(tx_ring, i);
 | 
			
		||||
	i -= tx_ring->count;
 | 
			
		||||
 | 
			
		||||
	tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring));
 | 
			
		||||
 | 
			
		||||
	do {
 | 
			
		||||
		struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -647,9 +664,8 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
 | 
			
		|||
		/* prevent any other reads prior to eop_desc */
 | 
			
		||||
		read_barrier_depends();
 | 
			
		||||
 | 
			
		||||
		/* if the descriptor isn't done, no work yet to do */
 | 
			
		||||
		if (!(eop_desc->cmd_type_offset_bsz &
 | 
			
		||||
		      cpu_to_le64(I40E_TX_DESC_DTYPE_DESC_DONE)))
 | 
			
		||||
		/* we have caught up to head, no work left to do */
 | 
			
		||||
		if (tx_head == tx_desc)
 | 
			
		||||
			break;
 | 
			
		||||
 | 
			
		||||
		/* clear next_to_watch to prevent false hangs */
 | 
			
		||||
| 
						 | 
				
			
			@ -905,6 +921,10 @@ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
 | 
			
		|||
 | 
			
		||||
	/* round up to nearest 4K */
 | 
			
		||||
	tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc);
 | 
			
		||||
	/* add u32 for head writeback, align after this takes care of
 | 
			
		||||
	 * guaranteeing this is at least one cache line in size
 | 
			
		||||
	 */
 | 
			
		||||
	tx_ring->size += sizeof(u32);
 | 
			
		||||
	tx_ring->size = ALIGN(tx_ring->size, 4096);
 | 
			
		||||
	tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
 | 
			
		||||
					   &tx_ring->dma, GFP_KERNEL);
 | 
			
		||||
| 
						 | 
				
			
			@ -2042,9 +2062,23 @@ static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
 | 
			
		|||
		tx_bi = &tx_ring->tx_bi[i];
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/* Place RS bit on last descriptor of any packet that spans across the
 | 
			
		||||
	 * 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline.
 | 
			
		||||
	 */
 | 
			
		||||
#define WB_STRIDE 0x3
 | 
			
		||||
	if (((i & WB_STRIDE) != WB_STRIDE) &&
 | 
			
		||||
	    (first <= &tx_ring->tx_bi[i]) &&
 | 
			
		||||
	    (first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) {
 | 
			
		||||
		tx_desc->cmd_type_offset_bsz =
 | 
			
		||||
			build_ctob(td_cmd, td_offset, size, td_tag) |
 | 
			
		||||
		cpu_to_le64((u64)I40E_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT);
 | 
			
		||||
			cpu_to_le64((u64)I40E_TX_DESC_CMD_EOP <<
 | 
			
		||||
					 I40E_TXD_QW1_CMD_SHIFT);
 | 
			
		||||
	} else {
 | 
			
		||||
		tx_desc->cmd_type_offset_bsz =
 | 
			
		||||
			build_ctob(td_cmd, td_offset, size, td_tag) |
 | 
			
		||||
			cpu_to_le64((u64)I40E_TXD_CMD <<
 | 
			
		||||
					 I40E_TXD_QW1_CMD_SHIFT);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
 | 
			
		||||
						 tx_ring->queue_index),
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -230,6 +230,9 @@ static int i40e_config_vsi_tx_queue(struct i40e_vf *vf, u16 vsi_idx,
 | 
			
		|||
	tx_ctx.qlen = info->ring_len;
 | 
			
		||||
	tx_ctx.rdylist = le16_to_cpu(pf->vsi[vsi_idx]->info.qs_handle[0]);
 | 
			
		||||
	tx_ctx.rdylist_act = 0;
 | 
			
		||||
	tx_ctx.head_wb_ena = 1;
 | 
			
		||||
	tx_ctx.head_wb_addr = info->dma_ring_addr +
 | 
			
		||||
			      (info->ring_len * sizeof(struct i40e_tx_desc));
 | 
			
		||||
 | 
			
		||||
	/* clear the context in the HMC */
 | 
			
		||||
	ret = i40e_clear_lan_tx_queue_context(hw, pf_queue_id);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -169,6 +169,20 @@ static bool i40e_check_tx_hang(struct i40e_ring *tx_ring)
 | 
			
		|||
	return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * i40e_get_head - Retrieve head from head writeback
 | 
			
		||||
 * @tx_ring:  tx ring to fetch head of
 | 
			
		||||
 *
 | 
			
		||||
 * Returns value of Tx ring head based on value stored
 | 
			
		||||
 * in head write-back location
 | 
			
		||||
 **/
 | 
			
		||||
static inline u32 i40e_get_head(struct i40e_ring *tx_ring)
 | 
			
		||||
{
 | 
			
		||||
	void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count;
 | 
			
		||||
 | 
			
		||||
	return le32_to_cpu(*(volatile __le32 *)head);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * i40e_clean_tx_irq - Reclaim resources after transmit completes
 | 
			
		||||
 * @tx_ring:  tx ring to clean
 | 
			
		||||
| 
						 | 
				
			
			@ -180,6 +194,7 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
 | 
			
		|||
{
 | 
			
		||||
	u16 i = tx_ring->next_to_clean;
 | 
			
		||||
	struct i40e_tx_buffer *tx_buf;
 | 
			
		||||
	struct i40e_tx_desc *tx_head;
 | 
			
		||||
	struct i40e_tx_desc *tx_desc;
 | 
			
		||||
	unsigned int total_packets = 0;
 | 
			
		||||
	unsigned int total_bytes = 0;
 | 
			
		||||
| 
						 | 
				
			
			@ -188,6 +203,8 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
 | 
			
		|||
	tx_desc = I40E_TX_DESC(tx_ring, i);
 | 
			
		||||
	i -= tx_ring->count;
 | 
			
		||||
 | 
			
		||||
	tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring));
 | 
			
		||||
 | 
			
		||||
	do {
 | 
			
		||||
		struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -198,9 +215,8 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
 | 
			
		|||
		/* prevent any other reads prior to eop_desc */
 | 
			
		||||
		read_barrier_depends();
 | 
			
		||||
 | 
			
		||||
		/* if the descriptor isn't done, no work yet to do */
 | 
			
		||||
		if (!(eop_desc->cmd_type_offset_bsz &
 | 
			
		||||
		      cpu_to_le64(I40E_TX_DESC_DTYPE_DESC_DONE)))
 | 
			
		||||
		/* we have caught up to head, no work left to do */
 | 
			
		||||
		if (tx_head == tx_desc)
 | 
			
		||||
			break;
 | 
			
		||||
 | 
			
		||||
		/* clear next_to_watch to prevent false hangs */
 | 
			
		||||
| 
						 | 
				
			
			@ -432,6 +448,10 @@ int i40evf_setup_tx_descriptors(struct i40e_ring *tx_ring)
 | 
			
		|||
 | 
			
		||||
	/* round up to nearest 4K */
 | 
			
		||||
	tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc);
 | 
			
		||||
	/* add u32 for head writeback, align after this takes care of
 | 
			
		||||
	 * guaranteeing this is at least one cache line in size
 | 
			
		||||
	 */
 | 
			
		||||
	tx_ring->size += sizeof(u32);
 | 
			
		||||
	tx_ring->size = ALIGN(tx_ring->size, 4096);
 | 
			
		||||
	tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
 | 
			
		||||
					   &tx_ring->dma, GFP_KERNEL);
 | 
			
		||||
| 
						 | 
				
			
			@ -1377,9 +1397,23 @@ static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
 | 
			
		|||
		tx_bi = &tx_ring->tx_bi[i];
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/* Place RS bit on last descriptor of any packet that spans across the
 | 
			
		||||
	 * 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline.
 | 
			
		||||
	 */
 | 
			
		||||
#define WB_STRIDE 0x3
 | 
			
		||||
	if (((i & WB_STRIDE) != WB_STRIDE) &&
 | 
			
		||||
	    (first <= &tx_ring->tx_bi[i]) &&
 | 
			
		||||
	    (first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) {
 | 
			
		||||
		tx_desc->cmd_type_offset_bsz =
 | 
			
		||||
			build_ctob(td_cmd, td_offset, size, td_tag) |
 | 
			
		||||
		cpu_to_le64((u64)I40E_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT);
 | 
			
		||||
			cpu_to_le64((u64)I40E_TX_DESC_CMD_EOP <<
 | 
			
		||||
					 I40E_TXD_QW1_CMD_SHIFT);
 | 
			
		||||
	} else {
 | 
			
		||||
		tx_desc->cmd_type_offset_bsz =
 | 
			
		||||
			build_ctob(td_cmd, td_offset, size, td_tag) |
 | 
			
		||||
			cpu_to_le64((u64)I40E_TXD_CMD <<
 | 
			
		||||
					 I40E_TXD_QW1_CMD_SHIFT);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
 | 
			
		||||
						 tx_ring->queue_index),
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue