forked from mirrors/linux
		
	page_pool: devmem support
Convert netmem to be a union of struct page and struct netmem. Overload
the LSB of struct netmem* to indicate that it's a net_iov, otherwise
it's a page.
Currently these entries in struct page are rented by the page_pool and
used exclusively by the net stack:
struct {
	unsigned long pp_magic;
	struct page_pool *pp;
	unsigned long _pp_mapping_pad;
	unsigned long dma_addr;
	atomic_long_t pp_ref_count;
};
Mirror these (and only these) entries into struct net_iov and implement
netmem helpers that can access these common fields regardless of
whether the underlying type is page or net_iov.
Implement checks for net_iov in netmem helpers which delegate to mm
APIs, to ensure net_iov are never passed to the mm stack.
Signed-off-by: Mina Almasry <almasrymina@google.com>
Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Link: https://patch.msgid.link/20240910171458.219195-6-almasrymina@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
			
			
This commit is contained in:
		
							parent
							
								
									28c5c74eea
								
							
						
					
					
						commit
						8ab79ed50c
					
				
					 8 changed files with 218 additions and 69 deletions
				
			
		|  | @ -8,12 +8,52 @@ | ||||||
| #ifndef _NET_NETMEM_H | #ifndef _NET_NETMEM_H | ||||||
| #define _NET_NETMEM_H | #define _NET_NETMEM_H | ||||||
| 
 | 
 | ||||||
|  | #include <linux/mm.h> | ||||||
|  | #include <net/net_debug.h> | ||||||
|  | 
 | ||||||
| /* net_iov */ | /* net_iov */ | ||||||
| 
 | 
 | ||||||
|  | DECLARE_STATIC_KEY_FALSE(page_pool_mem_providers); | ||||||
|  | 
 | ||||||
|  | /*  We overload the LSB of the struct page pointer to indicate whether it's
 | ||||||
|  |  *  a page or net_iov. | ||||||
|  |  */ | ||||||
|  | #define NET_IOV 0x01UL | ||||||
|  | 
 | ||||||
| struct net_iov { | struct net_iov { | ||||||
|  | 	unsigned long __unused_padding; | ||||||
|  | 	unsigned long pp_magic; | ||||||
|  | 	struct page_pool *pp; | ||||||
| 	struct dmabuf_genpool_chunk_owner *owner; | 	struct dmabuf_genpool_chunk_owner *owner; | ||||||
|  | 	unsigned long dma_addr; | ||||||
|  | 	atomic_long_t pp_ref_count; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | /* These fields in struct page are used by the page_pool and net stack:
 | ||||||
|  |  * | ||||||
|  |  *        struct { | ||||||
|  |  *                unsigned long pp_magic; | ||||||
|  |  *                struct page_pool *pp; | ||||||
|  |  *                unsigned long _pp_mapping_pad; | ||||||
|  |  *                unsigned long dma_addr; | ||||||
|  |  *                atomic_long_t pp_ref_count; | ||||||
|  |  *        }; | ||||||
|  |  * | ||||||
|  |  * We mirror the page_pool fields here so the page_pool can access these fields | ||||||
|  |  * without worrying whether the underlying fields belong to a page or net_iov. | ||||||
|  |  * | ||||||
|  |  * The non-net stack fields of struct page are private to the mm stack and must | ||||||
|  |  * never be mirrored to net_iov. | ||||||
|  |  */ | ||||||
|  | #define NET_IOV_ASSERT_OFFSET(pg, iov)             \ | ||||||
|  | 	static_assert(offsetof(struct page, pg) == \ | ||||||
|  | 		      offsetof(struct net_iov, iov)) | ||||||
|  | NET_IOV_ASSERT_OFFSET(pp_magic, pp_magic); | ||||||
|  | NET_IOV_ASSERT_OFFSET(pp, pp); | ||||||
|  | NET_IOV_ASSERT_OFFSET(dma_addr, dma_addr); | ||||||
|  | NET_IOV_ASSERT_OFFSET(pp_ref_count, pp_ref_count); | ||||||
|  | #undef NET_IOV_ASSERT_OFFSET | ||||||
|  | 
 | ||||||
| /* netmem */ | /* netmem */ | ||||||
| 
 | 
 | ||||||
| /**
 | /**
 | ||||||
|  | @ -27,20 +67,37 @@ struct net_iov { | ||||||
|  */ |  */ | ||||||
| typedef unsigned long __bitwise netmem_ref; | typedef unsigned long __bitwise netmem_ref; | ||||||
| 
 | 
 | ||||||
|  | static inline bool netmem_is_net_iov(const netmem_ref netmem) | ||||||
|  | { | ||||||
|  | 	return (__force unsigned long)netmem & NET_IOV; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /* This conversion fails (returns NULL) if the netmem_ref is not struct page
 | /* This conversion fails (returns NULL) if the netmem_ref is not struct page
 | ||||||
|  * backed. |  * backed. | ||||||
|  * |  | ||||||
|  * Currently struct page is the only possible netmem, and this helper never |  | ||||||
|  * fails. |  | ||||||
|  */ |  */ | ||||||
| static inline struct page *netmem_to_page(netmem_ref netmem) | static inline struct page *netmem_to_page(netmem_ref netmem) | ||||||
| { | { | ||||||
|  | 	if (WARN_ON_ONCE(netmem_is_net_iov(netmem))) | ||||||
|  | 		return NULL; | ||||||
|  | 
 | ||||||
| 	return (__force struct page *)netmem; | 	return (__force struct page *)netmem; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /* Converting from page to netmem is always safe, because a page can always be
 | static inline struct net_iov *netmem_to_net_iov(netmem_ref netmem) | ||||||
|  * a netmem. | { | ||||||
|  */ | 	if (netmem_is_net_iov(netmem)) | ||||||
|  | 		return (struct net_iov *)((__force unsigned long)netmem & | ||||||
|  | 					  ~NET_IOV); | ||||||
|  | 
 | ||||||
|  | 	DEBUG_NET_WARN_ON_ONCE(true); | ||||||
|  | 	return NULL; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline netmem_ref net_iov_to_netmem(struct net_iov *niov) | ||||||
|  | { | ||||||
|  | 	return (__force netmem_ref)((unsigned long)niov | NET_IOV); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static inline netmem_ref page_to_netmem(struct page *page) | static inline netmem_ref page_to_netmem(struct page *page) | ||||||
| { | { | ||||||
| 	return (__force netmem_ref)page; | 	return (__force netmem_ref)page; | ||||||
|  | @ -48,17 +105,70 @@ static inline netmem_ref page_to_netmem(struct page *page) | ||||||
| 
 | 
 | ||||||
| static inline int netmem_ref_count(netmem_ref netmem) | static inline int netmem_ref_count(netmem_ref netmem) | ||||||
| { | { | ||||||
|  | 	/* The non-pp refcount of net_iov is always 1. On net_iov, we only
 | ||||||
|  | 	 * support pp refcounting which uses the pp_ref_count field. | ||||||
|  | 	 */ | ||||||
|  | 	if (netmem_is_net_iov(netmem)) | ||||||
|  | 		return 1; | ||||||
|  | 
 | ||||||
| 	return page_ref_count(netmem_to_page(netmem)); | 	return page_ref_count(netmem_to_page(netmem)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline unsigned long netmem_to_pfn(netmem_ref netmem) | static inline unsigned long netmem_pfn_trace(netmem_ref netmem) | ||||||
| { | { | ||||||
|  | 	if (netmem_is_net_iov(netmem)) | ||||||
|  | 		return 0; | ||||||
|  | 
 | ||||||
| 	return page_to_pfn(netmem_to_page(netmem)); | 	return page_to_pfn(netmem_to_page(netmem)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static inline struct net_iov *__netmem_clear_lsb(netmem_ref netmem) | ||||||
|  | { | ||||||
|  | 	return (struct net_iov *)((__force unsigned long)netmem & ~NET_IOV); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline struct page_pool *netmem_get_pp(netmem_ref netmem) | ||||||
|  | { | ||||||
|  | 	return __netmem_clear_lsb(netmem)->pp; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline atomic_long_t *netmem_get_pp_ref_count_ref(netmem_ref netmem) | ||||||
|  | { | ||||||
|  | 	return &__netmem_clear_lsb(netmem)->pp_ref_count; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline bool netmem_is_pref_nid(netmem_ref netmem, int pref_nid) | ||||||
|  | { | ||||||
|  | 	/* NUMA node preference only makes sense if we're allocating
 | ||||||
|  | 	 * system memory. Memory providers (which give us net_iovs) | ||||||
|  | 	 * choose for us. | ||||||
|  | 	 */ | ||||||
|  | 	if (netmem_is_net_iov(netmem)) | ||||||
|  | 		return true; | ||||||
|  | 
 | ||||||
|  | 	return page_to_nid(netmem_to_page(netmem)) == pref_nid; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static inline netmem_ref netmem_compound_head(netmem_ref netmem) | static inline netmem_ref netmem_compound_head(netmem_ref netmem) | ||||||
| { | { | ||||||
|  | 	/* niov are never compounded */ | ||||||
|  | 	if (netmem_is_net_iov(netmem)) | ||||||
|  | 		return netmem; | ||||||
|  | 
 | ||||||
| 	return page_to_netmem(compound_head(netmem_to_page(netmem))); | 	return page_to_netmem(compound_head(netmem_to_page(netmem))); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static inline void *netmem_address(netmem_ref netmem) | ||||||
|  | { | ||||||
|  | 	if (netmem_is_net_iov(netmem)) | ||||||
|  | 		return NULL; | ||||||
|  | 
 | ||||||
|  | 	return page_address(netmem_to_page(netmem)); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline unsigned long netmem_get_dma_addr(netmem_ref netmem) | ||||||
|  | { | ||||||
|  | 	return __netmem_clear_lsb(netmem)->dma_addr; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| #endif /* _NET_NETMEM_H */ | #endif /* _NET_NETMEM_H */ | ||||||
|  |  | ||||||
|  | @ -216,7 +216,7 @@ page_pool_get_dma_dir(const struct page_pool *pool) | ||||||
| 
 | 
 | ||||||
| static inline void page_pool_fragment_netmem(netmem_ref netmem, long nr) | static inline void page_pool_fragment_netmem(netmem_ref netmem, long nr) | ||||||
| { | { | ||||||
| 	atomic_long_set(&netmem_to_page(netmem)->pp_ref_count, nr); | 	atomic_long_set(netmem_get_pp_ref_count_ref(netmem), nr); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /**
 | /**
 | ||||||
|  | @ -244,7 +244,7 @@ static inline void page_pool_fragment_page(struct page *page, long nr) | ||||||
| 
 | 
 | ||||||
| static inline long page_pool_unref_netmem(netmem_ref netmem, long nr) | static inline long page_pool_unref_netmem(netmem_ref netmem, long nr) | ||||||
| { | { | ||||||
| 	struct page *page = netmem_to_page(netmem); | 	atomic_long_t *pp_ref_count = netmem_get_pp_ref_count_ref(netmem); | ||||||
| 	long ret; | 	long ret; | ||||||
| 
 | 
 | ||||||
| 	/* If nr == pp_ref_count then we have cleared all remaining
 | 	/* If nr == pp_ref_count then we have cleared all remaining
 | ||||||
|  | @ -261,19 +261,19 @@ static inline long page_pool_unref_netmem(netmem_ref netmem, long nr) | ||||||
| 	 * initially, and only overwrite it when the page is partitioned into | 	 * initially, and only overwrite it when the page is partitioned into | ||||||
| 	 * more than one piece. | 	 * more than one piece. | ||||||
| 	 */ | 	 */ | ||||||
| 	if (atomic_long_read(&page->pp_ref_count) == nr) { | 	if (atomic_long_read(pp_ref_count) == nr) { | ||||||
| 		/* As we have ensured nr is always one for constant case using
 | 		/* As we have ensured nr is always one for constant case using
 | ||||||
| 		 * the BUILD_BUG_ON(), only need to handle the non-constant case | 		 * the BUILD_BUG_ON(), only need to handle the non-constant case | ||||||
| 		 * here for pp_ref_count draining, which is a rare case. | 		 * here for pp_ref_count draining, which is a rare case. | ||||||
| 		 */ | 		 */ | ||||||
| 		BUILD_BUG_ON(__builtin_constant_p(nr) && nr != 1); | 		BUILD_BUG_ON(__builtin_constant_p(nr) && nr != 1); | ||||||
| 		if (!__builtin_constant_p(nr)) | 		if (!__builtin_constant_p(nr)) | ||||||
| 			atomic_long_set(&page->pp_ref_count, 1); | 			atomic_long_set(pp_ref_count, 1); | ||||||
| 
 | 
 | ||||||
| 		return 0; | 		return 0; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	ret = atomic_long_sub_return(nr, &page->pp_ref_count); | 	ret = atomic_long_sub_return(nr, pp_ref_count); | ||||||
| 	WARN_ON(ret < 0); | 	WARN_ON(ret < 0); | ||||||
| 
 | 
 | ||||||
| 	/* We are the last user here too, reset pp_ref_count back to 1 to
 | 	/* We are the last user here too, reset pp_ref_count back to 1 to
 | ||||||
|  | @ -282,7 +282,7 @@ static inline long page_pool_unref_netmem(netmem_ref netmem, long nr) | ||||||
| 	 * page_pool_unref_page() currently. | 	 * page_pool_unref_page() currently. | ||||||
| 	 */ | 	 */ | ||||||
| 	if (unlikely(!ret)) | 	if (unlikely(!ret)) | ||||||
| 		atomic_long_set(&page->pp_ref_count, 1); | 		atomic_long_set(pp_ref_count, 1); | ||||||
| 
 | 
 | ||||||
| 	return ret; | 	return ret; | ||||||
| } | } | ||||||
|  | @ -401,9 +401,7 @@ static inline void page_pool_free_va(struct page_pool *pool, void *va, | ||||||
| 
 | 
 | ||||||
| static inline dma_addr_t page_pool_get_dma_addr_netmem(netmem_ref netmem) | static inline dma_addr_t page_pool_get_dma_addr_netmem(netmem_ref netmem) | ||||||
| { | { | ||||||
| 	struct page *page = netmem_to_page(netmem); | 	dma_addr_t ret = netmem_get_dma_addr(netmem); | ||||||
| 
 |  | ||||||
| 	dma_addr_t ret = page->dma_addr; |  | ||||||
| 
 | 
 | ||||||
| 	if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) | 	if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) | ||||||
| 		ret <<= PAGE_SHIFT; | 		ret <<= PAGE_SHIFT; | ||||||
|  | @ -423,24 +421,6 @@ static inline dma_addr_t page_pool_get_dma_addr(const struct page *page) | ||||||
| 	return page_pool_get_dma_addr_netmem(page_to_netmem((struct page *)page)); | 	return page_pool_get_dma_addr_netmem(page_to_netmem((struct page *)page)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline bool page_pool_set_dma_addr_netmem(netmem_ref netmem, |  | ||||||
| 						 dma_addr_t addr) |  | ||||||
| { |  | ||||||
| 	struct page *page = netmem_to_page(netmem); |  | ||||||
| 
 |  | ||||||
| 	if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) { |  | ||||||
| 		page->dma_addr = addr >> PAGE_SHIFT; |  | ||||||
| 
 |  | ||||||
| 		/* We assume page alignment to shave off bottom bits,
 |  | ||||||
| 		 * if this "compression" doesn't work we need to drop. |  | ||||||
| 		 */ |  | ||||||
| 		return addr != (dma_addr_t)page->dma_addr << PAGE_SHIFT; |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	page->dma_addr = addr; |  | ||||||
| 	return false; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /**
 | /**
 | ||||||
|  * page_pool_dma_sync_for_cpu - sync Rx page for CPU after it's written by HW |  * page_pool_dma_sync_for_cpu - sync Rx page for CPU after it's written by HW | ||||||
|  * @pool: &page_pool the @page belongs to |  * @pool: &page_pool the @page belongs to | ||||||
|  | @ -463,11 +443,6 @@ static inline void page_pool_dma_sync_for_cpu(const struct page_pool *pool, | ||||||
| 				      page_pool_get_dma_dir(pool)); | 				      page_pool_get_dma_dir(pool)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline bool page_pool_set_dma_addr(struct page *page, dma_addr_t addr) |  | ||||||
| { |  | ||||||
| 	return page_pool_set_dma_addr_netmem(page_to_netmem(page), addr); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static inline bool page_pool_put(struct page_pool *pool) | static inline bool page_pool_put(struct page_pool *pool) | ||||||
| { | { | ||||||
| 	return refcount_dec_and_test(&pool->user_cnt); | 	return refcount_dec_and_test(&pool->user_cnt); | ||||||
|  |  | ||||||
|  | @ -57,12 +57,12 @@ TRACE_EVENT(page_pool_state_release, | ||||||
| 		__entry->pool		= pool; | 		__entry->pool		= pool; | ||||||
| 		__entry->netmem		= (__force unsigned long)netmem; | 		__entry->netmem		= (__force unsigned long)netmem; | ||||||
| 		__entry->release	= release; | 		__entry->release	= release; | ||||||
| 		__entry->pfn		= netmem_to_pfn(netmem); | 		__entry->pfn		= netmem_pfn_trace(netmem); | ||||||
| 	), | 	), | ||||||
| 
 | 
 | ||||||
| 	TP_printk("page_pool=%p netmem=%p pfn=0x%lx release=%u", | 	TP_printk("page_pool=%p netmem=%p is_net_iov=%lu pfn=0x%lx release=%u", | ||||||
| 		  __entry->pool, (void *)__entry->netmem, | 		  __entry->pool, (void *)__entry->netmem, | ||||||
| 		  __entry->pfn, __entry->release) | 		  __entry->netmem & NET_IOV, __entry->pfn, __entry->release) | ||||||
| ); | ); | ||||||
| 
 | 
 | ||||||
| TRACE_EVENT(page_pool_state_hold, | TRACE_EVENT(page_pool_state_hold, | ||||||
|  | @ -83,12 +83,12 @@ TRACE_EVENT(page_pool_state_hold, | ||||||
| 		__entry->pool	= pool; | 		__entry->pool	= pool; | ||||||
| 		__entry->netmem	= (__force unsigned long)netmem; | 		__entry->netmem	= (__force unsigned long)netmem; | ||||||
| 		__entry->hold	= hold; | 		__entry->hold	= hold; | ||||||
| 		__entry->pfn	= netmem_to_pfn(netmem); | 		__entry->pfn	= netmem_pfn_trace(netmem); | ||||||
| 	), | 	), | ||||||
| 
 | 
 | ||||||
| 	TP_printk("page_pool=%p netmem=%p pfn=0x%lx hold=%u", | 	TP_printk("page_pool=%p netmem=%p is_net_iov=%lu, pfn=0x%lx hold=%u", | ||||||
| 		  __entry->pool, (void *)__entry->netmem, | 		  __entry->pool, (void *)__entry->netmem, | ||||||
| 		  __entry->pfn, __entry->hold) | 		  __entry->netmem & NET_IOV, __entry->pfn, __entry->hold) | ||||||
| ); | ); | ||||||
| 
 | 
 | ||||||
| TRACE_EVENT(page_pool_update_nid, | TRACE_EVENT(page_pool_update_nid, | ||||||
|  |  | ||||||
|  | @ -18,6 +18,7 @@ | ||||||
| #include <trace/events/page_pool.h> | #include <trace/events/page_pool.h> | ||||||
| 
 | 
 | ||||||
| #include "devmem.h" | #include "devmem.h" | ||||||
|  | #include "page_pool_priv.h" | ||||||
| 
 | 
 | ||||||
| /* Device memory support */ | /* Device memory support */ | ||||||
| 
 | 
 | ||||||
|  | @ -82,6 +83,10 @@ net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding) | ||||||
| 	index = offset / PAGE_SIZE; | 	index = offset / PAGE_SIZE; | ||||||
| 	niov = &owner->niovs[index]; | 	niov = &owner->niovs[index]; | ||||||
| 
 | 
 | ||||||
|  | 	niov->pp_magic = 0; | ||||||
|  | 	niov->pp = NULL; | ||||||
|  | 	atomic_long_set(&niov->pp_ref_count, 0); | ||||||
|  | 
 | ||||||
| 	return niov; | 	return niov; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -269,6 +274,8 @@ net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd, | ||||||
| 		for (i = 0; i < owner->num_niovs; i++) { | 		for (i = 0; i < owner->num_niovs; i++) { | ||||||
| 			niov = &owner->niovs[i]; | 			niov = &owner->niovs[i]; | ||||||
| 			niov->owner = owner; | 			niov->owner = owner; | ||||||
|  | 			page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov), | ||||||
|  | 						      net_devmem_get_dma_addr(niov)); | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
| 		virtual += len; | 		virtual += len; | ||||||
|  |  | ||||||
							
								
								
									
										31
									
								
								net/core/netmem_priv.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										31
									
								
								net/core/netmem_priv.h
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,31 @@ | ||||||
|  | /* SPDX-License-Identifier: GPL-2.0 */ | ||||||
|  | 
 | ||||||
|  | #ifndef __NETMEM_PRIV_H | ||||||
|  | #define __NETMEM_PRIV_H | ||||||
|  | 
 | ||||||
|  | static inline unsigned long netmem_get_pp_magic(netmem_ref netmem) | ||||||
|  | { | ||||||
|  | 	return __netmem_clear_lsb(netmem)->pp_magic; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline void netmem_or_pp_magic(netmem_ref netmem, unsigned long pp_magic) | ||||||
|  | { | ||||||
|  | 	__netmem_clear_lsb(netmem)->pp_magic |= pp_magic; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline void netmem_clear_pp_magic(netmem_ref netmem) | ||||||
|  | { | ||||||
|  | 	__netmem_clear_lsb(netmem)->pp_magic = 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline void netmem_set_pp(netmem_ref netmem, struct page_pool *pool) | ||||||
|  | { | ||||||
|  | 	__netmem_clear_lsb(netmem)->pp = pool; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline void netmem_set_dma_addr(netmem_ref netmem, | ||||||
|  | 				       unsigned long dma_addr) | ||||||
|  | { | ||||||
|  | 	__netmem_clear_lsb(netmem)->dma_addr = dma_addr; | ||||||
|  | } | ||||||
|  | #endif | ||||||
|  | @ -24,8 +24,11 @@ | ||||||
| 
 | 
 | ||||||
| #include <trace/events/page_pool.h> | #include <trace/events/page_pool.h> | ||||||
| 
 | 
 | ||||||
|  | #include "netmem_priv.h" | ||||||
| #include "page_pool_priv.h" | #include "page_pool_priv.h" | ||||||
| 
 | 
 | ||||||
|  | DEFINE_STATIC_KEY_FALSE(page_pool_mem_providers); | ||||||
|  | 
 | ||||||
| #define DEFER_TIME (msecs_to_jiffies(1000)) | #define DEFER_TIME (msecs_to_jiffies(1000)) | ||||||
| #define DEFER_WARN_INTERVAL (60 * HZ) | #define DEFER_WARN_INTERVAL (60 * HZ) | ||||||
| 
 | 
 | ||||||
|  | @ -358,7 +361,7 @@ static noinline netmem_ref page_pool_refill_alloc_cache(struct page_pool *pool) | ||||||
| 		if (unlikely(!netmem)) | 		if (unlikely(!netmem)) | ||||||
| 			break; | 			break; | ||||||
| 
 | 
 | ||||||
| 		if (likely(page_to_nid(netmem_to_page(netmem)) == pref_nid)) { | 		if (likely(netmem_is_pref_nid(netmem, pref_nid))) { | ||||||
| 			pool->alloc.cache[pool->alloc.count++] = netmem; | 			pool->alloc.cache[pool->alloc.count++] = netmem; | ||||||
| 		} else { | 		} else { | ||||||
| 			/* NUMA mismatch;
 | 			/* NUMA mismatch;
 | ||||||
|  | @ -454,10 +457,8 @@ static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem) | ||||||
| 
 | 
 | ||||||
| static void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem) | static void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem) | ||||||
| { | { | ||||||
| 	struct page *page = netmem_to_page(netmem); | 	netmem_set_pp(netmem, pool); | ||||||
| 
 | 	netmem_or_pp_magic(netmem, PP_SIGNATURE); | ||||||
| 	page->pp = pool; |  | ||||||
| 	page->pp_magic |= PP_SIGNATURE; |  | ||||||
| 
 | 
 | ||||||
| 	/* Ensuring all pages have been split into one fragment initially:
 | 	/* Ensuring all pages have been split into one fragment initially:
 | ||||||
| 	 * page_pool_set_pp_info() is only called once for every page when it | 	 * page_pool_set_pp_info() is only called once for every page when it | ||||||
|  | @ -472,10 +473,8 @@ static void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem) | ||||||
| 
 | 
 | ||||||
| static void page_pool_clear_pp_info(netmem_ref netmem) | static void page_pool_clear_pp_info(netmem_ref netmem) | ||||||
| { | { | ||||||
| 	struct page *page = netmem_to_page(netmem); | 	netmem_clear_pp_magic(netmem); | ||||||
| 
 | 	netmem_set_pp(netmem, NULL); | ||||||
| 	page->pp_magic = 0; |  | ||||||
| 	page->pp = NULL; |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static struct page *__page_pool_alloc_page_order(struct page_pool *pool, | static struct page *__page_pool_alloc_page_order(struct page_pool *pool, | ||||||
|  | @ -692,8 +691,9 @@ static bool page_pool_recycle_in_cache(netmem_ref netmem, | ||||||
| 
 | 
 | ||||||
| static bool __page_pool_page_can_be_recycled(netmem_ref netmem) | static bool __page_pool_page_can_be_recycled(netmem_ref netmem) | ||||||
| { | { | ||||||
| 	return page_ref_count(netmem_to_page(netmem)) == 1 && | 	return netmem_is_net_iov(netmem) || | ||||||
| 	       !page_is_pfmemalloc(netmem_to_page(netmem)); | 	       (page_ref_count(netmem_to_page(netmem)) == 1 && | ||||||
|  | 		!page_is_pfmemalloc(netmem_to_page(netmem))); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /* If the page refcnt == 1, this will try to recycle the page.
 | /* If the page refcnt == 1, this will try to recycle the page.
 | ||||||
|  | @ -728,6 +728,7 @@ __page_pool_put_page(struct page_pool *pool, netmem_ref netmem, | ||||||
| 		/* Page found as candidate for recycling */ | 		/* Page found as candidate for recycling */ | ||||||
| 		return netmem; | 		return netmem; | ||||||
| 	} | 	} | ||||||
|  | 
 | ||||||
| 	/* Fallback/non-XDP mode: API user have elevated refcnt.
 | 	/* Fallback/non-XDP mode: API user have elevated refcnt.
 | ||||||
| 	 * | 	 * | ||||||
| 	 * Many drivers split up the page into fragments, and some | 	 * Many drivers split up the page into fragments, and some | ||||||
|  | @ -949,7 +950,7 @@ static void page_pool_empty_ring(struct page_pool *pool) | ||||||
| 	/* Empty recycle ring */ | 	/* Empty recycle ring */ | ||||||
| 	while ((netmem = (__force netmem_ref)ptr_ring_consume_bh(&pool->ring))) { | 	while ((netmem = (__force netmem_ref)ptr_ring_consume_bh(&pool->ring))) { | ||||||
| 		/* Verify the refcnt invariant of cached pages */ | 		/* Verify the refcnt invariant of cached pages */ | ||||||
| 		if (!(page_ref_count(netmem_to_page(netmem)) == 1)) | 		if (!(netmem_ref_count(netmem) == 1)) | ||||||
| 			pr_crit("%s() page_pool refcnt %d violation\n", | 			pr_crit("%s() page_pool refcnt %d violation\n", | ||||||
| 				__func__, netmem_ref_count(netmem)); | 				__func__, netmem_ref_count(netmem)); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -3,10 +3,36 @@ | ||||||
| #ifndef __PAGE_POOL_PRIV_H | #ifndef __PAGE_POOL_PRIV_H | ||||||
| #define __PAGE_POOL_PRIV_H | #define __PAGE_POOL_PRIV_H | ||||||
| 
 | 
 | ||||||
|  | #include <net/page_pool/helpers.h> | ||||||
|  | 
 | ||||||
|  | #include "netmem_priv.h" | ||||||
|  | 
 | ||||||
| s32 page_pool_inflight(const struct page_pool *pool, bool strict); | s32 page_pool_inflight(const struct page_pool *pool, bool strict); | ||||||
| 
 | 
 | ||||||
| int page_pool_list(struct page_pool *pool); | int page_pool_list(struct page_pool *pool); | ||||||
| void page_pool_detached(struct page_pool *pool); | void page_pool_detached(struct page_pool *pool); | ||||||
| void page_pool_unlist(struct page_pool *pool); | void page_pool_unlist(struct page_pool *pool); | ||||||
| 
 | 
 | ||||||
|  | static inline bool | ||||||
|  | page_pool_set_dma_addr_netmem(netmem_ref netmem, dma_addr_t addr) | ||||||
|  | { | ||||||
|  | 	if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) { | ||||||
|  | 		netmem_set_dma_addr(netmem, addr >> PAGE_SHIFT); | ||||||
|  | 
 | ||||||
|  | 		/* We assume page alignment to shave off bottom bits,
 | ||||||
|  | 		 * if this "compression" doesn't work we need to drop. | ||||||
|  | 		 */ | ||||||
|  | 		return addr != (dma_addr_t)netmem_get_dma_addr(netmem) | ||||||
|  | 				       << PAGE_SHIFT; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	netmem_set_dma_addr(netmem, addr); | ||||||
|  | 	return false; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline bool page_pool_set_dma_addr(struct page *page, dma_addr_t addr) | ||||||
|  | { | ||||||
|  | 	return page_pool_set_dma_addr_netmem(page_to_netmem(page), addr); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|  | @ -88,6 +88,7 @@ | ||||||
| #include <linux/textsearch.h> | #include <linux/textsearch.h> | ||||||
| 
 | 
 | ||||||
| #include "dev.h" | #include "dev.h" | ||||||
|  | #include "netmem_priv.h" | ||||||
| #include "sock_destructor.h" | #include "sock_destructor.h" | ||||||
| 
 | 
 | ||||||
| #ifdef CONFIG_SKB_EXTENSIONS | #ifdef CONFIG_SKB_EXTENSIONS | ||||||
|  | @ -920,9 +921,9 @@ static void skb_clone_fraglist(struct sk_buff *skb) | ||||||
| 		skb_get(list); | 		skb_get(list); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static bool is_pp_page(struct page *page) | static bool is_pp_netmem(netmem_ref netmem) | ||||||
| { | { | ||||||
| 	return (page->pp_magic & ~0x3UL) == PP_SIGNATURE; | 	return (netmem_get_pp_magic(netmem) & ~0x3UL) == PP_SIGNATURE; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb, | int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb, | ||||||
|  | @ -1020,9 +1021,7 @@ EXPORT_SYMBOL(skb_cow_data_for_xdp); | ||||||
| #if IS_ENABLED(CONFIG_PAGE_POOL) | #if IS_ENABLED(CONFIG_PAGE_POOL) | ||||||
| bool napi_pp_put_page(netmem_ref netmem) | bool napi_pp_put_page(netmem_ref netmem) | ||||||
| { | { | ||||||
| 	struct page *page = netmem_to_page(netmem); | 	netmem = netmem_compound_head(netmem); | ||||||
| 
 |  | ||||||
| 	page = compound_head(page); |  | ||||||
| 
 | 
 | ||||||
| 	/* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation
 | 	/* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation
 | ||||||
| 	 * in order to preserve any existing bits, such as bit 0 for the | 	 * in order to preserve any existing bits, such as bit 0 for the | ||||||
|  | @ -1031,10 +1030,10 @@ bool napi_pp_put_page(netmem_ref netmem) | ||||||
| 	 * and page_is_pfmemalloc() is checked in __page_pool_put_page() | 	 * and page_is_pfmemalloc() is checked in __page_pool_put_page() | ||||||
| 	 * to avoid recycling the pfmemalloc page. | 	 * to avoid recycling the pfmemalloc page. | ||||||
| 	 */ | 	 */ | ||||||
| 	if (unlikely(!is_pp_page(page))) | 	if (unlikely(!is_pp_netmem(netmem))) | ||||||
| 		return false; | 		return false; | ||||||
| 
 | 
 | ||||||
| 	page_pool_put_full_netmem(page->pp, page_to_netmem(page), false); | 	page_pool_put_full_netmem(netmem_get_pp(netmem), netmem, false); | ||||||
| 
 | 
 | ||||||
| 	return true; | 	return true; | ||||||
| } | } | ||||||
|  | @ -1061,7 +1060,7 @@ static bool skb_pp_recycle(struct sk_buff *skb, void *data) | ||||||
| static int skb_pp_frag_ref(struct sk_buff *skb) | static int skb_pp_frag_ref(struct sk_buff *skb) | ||||||
| { | { | ||||||
| 	struct skb_shared_info *shinfo; | 	struct skb_shared_info *shinfo; | ||||||
| 	struct page *head_page; | 	netmem_ref head_netmem; | ||||||
| 	int i; | 	int i; | ||||||
| 
 | 
 | ||||||
| 	if (!skb->pp_recycle) | 	if (!skb->pp_recycle) | ||||||
|  | @ -1070,11 +1069,11 @@ static int skb_pp_frag_ref(struct sk_buff *skb) | ||||||
| 	shinfo = skb_shinfo(skb); | 	shinfo = skb_shinfo(skb); | ||||||
| 
 | 
 | ||||||
| 	for (i = 0; i < shinfo->nr_frags; i++) { | 	for (i = 0; i < shinfo->nr_frags; i++) { | ||||||
| 		head_page = compound_head(skb_frag_page(&shinfo->frags[i])); | 		head_netmem = netmem_compound_head(shinfo->frags[i].netmem); | ||||||
| 		if (likely(is_pp_page(head_page))) | 		if (likely(is_pp_netmem(head_netmem))) | ||||||
| 			page_pool_ref_page(head_page); | 			page_pool_ref_netmem(head_netmem); | ||||||
| 		else | 		else | ||||||
| 			page_ref_inc(head_page); | 			page_ref_inc(netmem_to_page(head_netmem)); | ||||||
| 	} | 	} | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Mina Almasry
						Mina Almasry