mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	A profile of a network benchmark showed iommu_num_pages rather high up:
     0.52%  iommu_num_pages
Looking at the profile, an integer divide is taking almost all of the time:
      %
         :      c000000000376ea4 <.iommu_num_pages>:
    1.93 :      c000000000376ea4:       fb e1 ff f8     std     r31,-8(r1)
    0.00 :      c000000000376ea8:       f8 21 ff c1     stdu    r1,-64(r1)
    0.00 :      c000000000376eac:       7c 3f 0b 78     mr      r31,r1
    3.86 :      c000000000376eb0:       38 84 ff ff     addi    r4,r4,-1
    0.00 :      c000000000376eb4:       38 05 ff ff     addi    r0,r5,-1
    0.00 :      c000000000376eb8:       7c 84 2a 14     add     r4,r4,r5
   46.95 :      c000000000376ebc:       7c 00 18 38     and     r0,r0,r3
   45.66 :      c000000000376ec0:       7c 84 02 14     add     r4,r4,r0
    0.00 :      c000000000376ec4:       7c 64 2b 92     divdu   r3,r4,r5
    0.00 :      c000000000376ec8:       38 3f 00 40     addi    r1,r31,64
    0.00 :      c000000000376ecc:       eb e1 ff f8     ld      r31,-8(r1)
    1.61 :      c000000000376ed0:       4e 80 00 20     blr
Since every caller of iommu_num_pages passes in a constant power of two
we can inline this such that the divide is replaced by a shift. The
entire function is only a few instructions once optimised, so it is
a good candidate for inlining overall.
Signed-off-by: Anton Blanchard <anton@samba.org>
Cc: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
		
	
			
		
			
				
	
	
		
			40 lines
		
	
	
	
		
			1,006 B
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			40 lines
		
	
	
	
		
			1,006 B
		
	
	
	
		
			C
		
	
	
	
	
	
/*
 | 
						|
 * IOMMU helper functions for the free area management
 | 
						|
 */
 | 
						|
 | 
						|
#include <linux/module.h>
 | 
						|
#include <linux/bitmap.h>
 | 
						|
 | 
						|
int iommu_is_span_boundary(unsigned int index, unsigned int nr,
 | 
						|
			   unsigned long shift,
 | 
						|
			   unsigned long boundary_size)
 | 
						|
{
 | 
						|
	BUG_ON(!is_power_of_2(boundary_size));
 | 
						|
 | 
						|
	shift = (shift + index) & (boundary_size - 1);
 | 
						|
	return shift + nr > boundary_size;
 | 
						|
}
 | 
						|
 | 
						|
unsigned long iommu_area_alloc(unsigned long *map, unsigned long size,
 | 
						|
			       unsigned long start, unsigned int nr,
 | 
						|
			       unsigned long shift, unsigned long boundary_size,
 | 
						|
			       unsigned long align_mask)
 | 
						|
{
 | 
						|
	unsigned long index;
 | 
						|
 | 
						|
	/* We don't want the last of the limit */
 | 
						|
	size -= 1;
 | 
						|
again:
 | 
						|
	index = bitmap_find_next_zero_area(map, size, start, nr, align_mask);
 | 
						|
	if (index < size) {
 | 
						|
		if (iommu_is_span_boundary(index, nr, shift, boundary_size)) {
 | 
						|
			/* we could do more effectively */
 | 
						|
			start = index + 1;
 | 
						|
			goto again;
 | 
						|
		}
 | 
						|
		bitmap_set(map, index, nr);
 | 
						|
		return index;
 | 
						|
	}
 | 
						|
	return -1;
 | 
						|
}
 | 
						|
EXPORT_SYMBOL(iommu_area_alloc);
 |