mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	ARMv6 and greater introduced a new instruction ("bx") which can be used
to return from function calls.  Recent CPUs perform better when the
"bx lr" instruction is used rather than the "mov pc, lr" instruction,
and this sequence is strongly recommended to be used by the ARM
architecture manual (section A.4.1.1).
We provide a new macro "ret" with all its variants for the condition
code which will resolve to the appropriate instruction.
Rather than doing this piecemeal, and miss some instances, change all
the "mov pc" instances to use the new macro, with the exception of
the "movs" instruction and the kprobes code.  This allows us to detect
the "mov pc, lr" case and fix it up - and also gives us the possibility
of deploying this for other registers depending on the CPU selection.
Reported-by: Will Deacon <will.deacon@arm.com>
Tested-by: Stephen Warren <swarren@nvidia.com> # Tegra Jetson TK1
Tested-by: Robert Jarzmik <robert.jarzmik@free.fr> # mioa701_bootresume.S
Tested-by: Andrew Lunn <andrew@lunn.ch> # Kirkwood
Tested-by: Shawn Guo <shawn.guo@freescale.com>
Tested-by: Tony Lindgren <tony@atomide.com> # OMAPs
Tested-by: Gregory CLEMENT <gregory.clement@free-electrons.com> # Armada XP, 375, 385
Acked-by: Sekhar Nori <nsekhar@ti.com> # DaVinci
Acked-by: Christoffer Dall <christoffer.dall@linaro.org> # kvm/hyp
Acked-by: Haojian Zhuang <haojian.zhuang@gmail.com> # PXA3xx
Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> # Xen
Tested-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de> # ARMv7M
Tested-by: Simon Horman <horms+renesas@verge.net.au> # Shmobile
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
		
	
			
		
			
				
	
	
		
			250 lines
		
	
	
	
		
			6.1 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			250 lines
		
	
	
	
		
			6.1 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
/*
 | 
						|
 *  linux/arch/arm/mm/cache-fa.S
 | 
						|
 *
 | 
						|
 *  Copyright (C) 2005 Faraday Corp.
 | 
						|
 *  Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
 | 
						|
 *
 | 
						|
 * Based on cache-v4wb.S:
 | 
						|
 *  Copyright (C) 1997-2002 Russell king
 | 
						|
 *
 | 
						|
 * This program is free software; you can redistribute it and/or modify
 | 
						|
 * it under the terms of the GNU General Public License version 2 as
 | 
						|
 * published by the Free Software Foundation.
 | 
						|
 *
 | 
						|
 *  Processors: FA520 FA526 FA626	
 | 
						|
 */
 | 
						|
#include <linux/linkage.h>
 | 
						|
#include <linux/init.h>
 | 
						|
#include <asm/assembler.h>
 | 
						|
#include <asm/memory.h>
 | 
						|
#include <asm/page.h>
 | 
						|
 | 
						|
#include "proc-macros.S"
 | 
						|
 | 
						|
/*
 | 
						|
 * The size of one data cache line.
 | 
						|
 */
 | 
						|
#define CACHE_DLINESIZE	16
 | 
						|
 | 
						|
/*
 | 
						|
 * The total size of the data cache.
 | 
						|
 */
 | 
						|
#ifdef CONFIG_ARCH_GEMINI
 | 
						|
#define CACHE_DSIZE	8192
 | 
						|
#else
 | 
						|
#define CACHE_DSIZE	16384 
 | 
						|
#endif 
 | 
						|
 | 
						|
/* FIXME: put optimal value here. Current one is just estimation */
 | 
						|
#define CACHE_DLIMIT	(CACHE_DSIZE * 2)
 | 
						|
 | 
						|
/*
 | 
						|
 *	flush_icache_all()
 | 
						|
 *
 | 
						|
 *	Unconditionally clean and invalidate the entire icache.
 | 
						|
 */
 | 
						|
ENTRY(fa_flush_icache_all)
 | 
						|
	mov	r0, #0
 | 
						|
	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
 | 
						|
	ret	lr
 | 
						|
ENDPROC(fa_flush_icache_all)
 | 
						|
 | 
						|
/*
 | 
						|
 *	flush_user_cache_all()
 | 
						|
 *
 | 
						|
 *	Clean and invalidate all cache entries in a particular address
 | 
						|
 *	space.
 | 
						|
 */
 | 
						|
ENTRY(fa_flush_user_cache_all)
 | 
						|
	/* FALLTHROUGH */
 | 
						|
/*
 | 
						|
 *	flush_kern_cache_all()
 | 
						|
 *
 | 
						|
 *	Clean and invalidate the entire cache.
 | 
						|
 */
 | 
						|
ENTRY(fa_flush_kern_cache_all)
 | 
						|
	mov	ip, #0
 | 
						|
	mov	r2, #VM_EXEC
 | 
						|
__flush_whole_cache:
 | 
						|
	mcr	p15, 0, ip, c7, c14, 0		@ clean/invalidate D cache
 | 
						|
	tst	r2, #VM_EXEC
 | 
						|
	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
 | 
						|
	mcrne	p15, 0, ip, c7, c5, 6		@ invalidate BTB
 | 
						|
	mcrne	p15, 0, ip, c7, c10, 4		@ drain write buffer
 | 
						|
	mcrne	p15, 0, ip, c7, c5, 4		@ prefetch flush
 | 
						|
	ret	lr
 | 
						|
 | 
						|
/*
 | 
						|
 *	flush_user_cache_range(start, end, flags)
 | 
						|
 *
 | 
						|
 *	Invalidate a range of cache entries in the specified
 | 
						|
 *	address space.
 | 
						|
 *
 | 
						|
 *	- start - start address (inclusive, page aligned)
 | 
						|
 *	- end	- end address (exclusive, page aligned)
 | 
						|
 *	- flags	- vma_area_struct flags describing address space
 | 
						|
 */
 | 
						|
ENTRY(fa_flush_user_cache_range)
 | 
						|
	mov	ip, #0
 | 
						|
	sub	r3, r1, r0			@ calculate total size
 | 
						|
	cmp	r3, #CACHE_DLIMIT		@ total size >= limit?
 | 
						|
	bhs	__flush_whole_cache		@ flush whole D cache
 | 
						|
 | 
						|
1:	tst	r2, #VM_EXEC
 | 
						|
	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I line
 | 
						|
	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D entry
 | 
						|
	add	r0, r0, #CACHE_DLINESIZE
 | 
						|
	cmp	r0, r1
 | 
						|
	blo	1b
 | 
						|
	tst	r2, #VM_EXEC
 | 
						|
	mcrne	p15, 0, ip, c7, c5, 6		@ invalidate BTB
 | 
						|
	mcrne	p15, 0, ip, c7, c10, 4		@ data write barrier
 | 
						|
	mcrne	p15, 0, ip, c7, c5, 4		@ prefetch flush
 | 
						|
	ret	lr
 | 
						|
 | 
						|
/*
 | 
						|
 *	coherent_kern_range(start, end)
 | 
						|
 *
 | 
						|
 *	Ensure coherency between the Icache and the Dcache in the
 | 
						|
 *	region described by start.  If you have non-snooping
 | 
						|
 *	Harvard caches, you need to implement this function.
 | 
						|
 *
 | 
						|
 *	- start  - virtual start address
 | 
						|
 *	- end	 - virtual end address
 | 
						|
 */
 | 
						|
ENTRY(fa_coherent_kern_range)
 | 
						|
	/* fall through */
 | 
						|
 | 
						|
/*
 | 
						|
 *	coherent_user_range(start, end)
 | 
						|
 *
 | 
						|
 *	Ensure coherency between the Icache and the Dcache in the
 | 
						|
 *	region described by start.  If you have non-snooping
 | 
						|
 *	Harvard caches, you need to implement this function.
 | 
						|
 *
 | 
						|
 *	- start  - virtual start address
 | 
						|
 *	- end	 - virtual end address
 | 
						|
 */
 | 
						|
ENTRY(fa_coherent_user_range)
 | 
						|
	bic	r0, r0, #CACHE_DLINESIZE - 1
 | 
						|
1:	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D entry
 | 
						|
	mcr	p15, 0, r0, c7, c5, 1		@ invalidate I entry
 | 
						|
	add	r0, r0, #CACHE_DLINESIZE
 | 
						|
	cmp	r0, r1
 | 
						|
	blo	1b
 | 
						|
	mov	r0, #0
 | 
						|
	mcr	p15, 0, r0, c7, c5, 6		@ invalidate BTB
 | 
						|
	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
 | 
						|
	mcr	p15, 0, r0, c7, c5, 4		@ prefetch flush
 | 
						|
	ret	lr
 | 
						|
 | 
						|
/*
 | 
						|
 *	flush_kern_dcache_area(void *addr, size_t size)
 | 
						|
 *
 | 
						|
 *	Ensure that the data held in the page kaddr is written back
 | 
						|
 *	to the page in question.
 | 
						|
 *
 | 
						|
 *	- addr	- kernel address
 | 
						|
 *	- size	- size of region
 | 
						|
 */
 | 
						|
ENTRY(fa_flush_kern_dcache_area)
 | 
						|
	add	r1, r0, r1
 | 
						|
1:	mcr	p15, 0, r0, c7, c14, 1		@ clean & invalidate D line
 | 
						|
	add	r0, r0, #CACHE_DLINESIZE
 | 
						|
	cmp	r0, r1
 | 
						|
	blo	1b
 | 
						|
	mov	r0, #0
 | 
						|
	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
 | 
						|
	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
 | 
						|
	ret	lr
 | 
						|
 | 
						|
/*
 | 
						|
 *	dma_inv_range(start, end)
 | 
						|
 *
 | 
						|
 *	Invalidate (discard) the specified virtual address range.
 | 
						|
 *	May not write back any entries.  If 'start' or 'end'
 | 
						|
 *	are not cache line aligned, those lines must be written
 | 
						|
 *	back.
 | 
						|
 *
 | 
						|
 *	- start  - virtual start address
 | 
						|
 *	- end	 - virtual end address
 | 
						|
 */
 | 
						|
fa_dma_inv_range:
 | 
						|
	tst	r0, #CACHE_DLINESIZE - 1
 | 
						|
	bic	r0, r0, #CACHE_DLINESIZE - 1
 | 
						|
	mcrne	p15, 0, r0, c7, c14, 1		@ clean & invalidate D entry
 | 
						|
	tst	r1, #CACHE_DLINESIZE - 1
 | 
						|
	bic	r1, r1, #CACHE_DLINESIZE - 1
 | 
						|
	mcrne	p15, 0, r1, c7, c14, 1		@ clean & invalidate D entry
 | 
						|
1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
 | 
						|
	add	r0, r0, #CACHE_DLINESIZE
 | 
						|
	cmp	r0, r1
 | 
						|
	blo	1b
 | 
						|
	mov	r0, #0
 | 
						|
	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
 | 
						|
	ret	lr
 | 
						|
 | 
						|
/*
 | 
						|
 *	dma_clean_range(start, end)
 | 
						|
 *
 | 
						|
 *	Clean (write back) the specified virtual address range.
 | 
						|
 *
 | 
						|
 *	- start  - virtual start address
 | 
						|
 *	- end	 - virtual end address
 | 
						|
 */
 | 
						|
fa_dma_clean_range:
 | 
						|
	bic	r0, r0, #CACHE_DLINESIZE - 1
 | 
						|
1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
 | 
						|
	add	r0, r0, #CACHE_DLINESIZE
 | 
						|
	cmp	r0, r1
 | 
						|
	blo	1b
 | 
						|
	mov	r0, #0	
 | 
						|
	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
 | 
						|
	ret	lr
 | 
						|
 | 
						|
/*
 | 
						|
 *	dma_flush_range(start,end)
 | 
						|
 *	- start   - virtual start address of region
 | 
						|
 *	- end     - virtual end address of region
 | 
						|
 */
 | 
						|
ENTRY(fa_dma_flush_range)
 | 
						|
	bic	r0, r0, #CACHE_DLINESIZE - 1
 | 
						|
1:	mcr	p15, 0, r0, c7, c14, 1		@ clean & invalidate D entry
 | 
						|
	add	r0, r0, #CACHE_DLINESIZE
 | 
						|
	cmp	r0, r1
 | 
						|
	blo	1b
 | 
						|
	mov	r0, #0	
 | 
						|
	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
 | 
						|
	ret	lr
 | 
						|
 | 
						|
/*
 | 
						|
 *	dma_map_area(start, size, dir)
 | 
						|
 *	- start	- kernel virtual start address
 | 
						|
 *	- size	- size of region
 | 
						|
 *	- dir	- DMA direction
 | 
						|
 */
 | 
						|
ENTRY(fa_dma_map_area)
 | 
						|
	add	r1, r1, r0
 | 
						|
	cmp	r2, #DMA_TO_DEVICE
 | 
						|
	beq	fa_dma_clean_range
 | 
						|
	bcs	fa_dma_inv_range
 | 
						|
	b	fa_dma_flush_range
 | 
						|
ENDPROC(fa_dma_map_area)
 | 
						|
 | 
						|
/*
 | 
						|
 *	dma_unmap_area(start, size, dir)
 | 
						|
 *	- start	- kernel virtual start address
 | 
						|
 *	- size	- size of region
 | 
						|
 *	- dir	- DMA direction
 | 
						|
 */
 | 
						|
ENTRY(fa_dma_unmap_area)
 | 
						|
	ret	lr
 | 
						|
ENDPROC(fa_dma_unmap_area)
 | 
						|
 | 
						|
	.globl	fa_flush_kern_cache_louis
 | 
						|
	.equ	fa_flush_kern_cache_louis, fa_flush_kern_cache_all
 | 
						|
 | 
						|
	__INITDATA
 | 
						|
 | 
						|
	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
 | 
						|
	define_cache_functions fa
 |