mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	x86/mm: Use INVPCID for __native_flush_tlb_single()
This uses INVPCID to shoot down individual lines of the user mapping instead of marking the entire user map as invalid. This could/might/possibly be faster. This for sure needs tlb_single_page_flush_ceiling to be redetermined; esp. since INVPCID is _slow_. A detailed performance analysis is available here: https://lkml.kernel.org/r/3062e486-3539-8a1f-5724-16199420be71@intel.com [ Peterz: Split out from big combo patch ] Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Andy Lutomirski <luto@kernel.org> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: Eduardo Valentin <eduval@amazon.com> Cc: Greg KH <gregkh@linuxfoundation.org> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Juergen Gross <jgross@suse.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Will Deacon <will.deacon@arm.com> Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
		
							parent
							
								
									21e9445911
								
							
						
					
					
						commit
						6cff64b86a
					
				
					 3 changed files with 60 additions and 28 deletions
				
			
		| 
						 | 
				
			
			@ -197,6 +197,7 @@
 | 
			
		|||
#define X86_FEATURE_CAT_L3		( 7*32+ 4) /* Cache Allocation Technology L3 */
 | 
			
		||||
#define X86_FEATURE_CAT_L2		( 7*32+ 5) /* Cache Allocation Technology L2 */
 | 
			
		||||
#define X86_FEATURE_CDP_L3		( 7*32+ 6) /* Code and Data Prioritization L3 */
 | 
			
		||||
#define X86_FEATURE_INVPCID_SINGLE	( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
 | 
			
		||||
 | 
			
		||||
#define X86_FEATURE_HW_PSTATE		( 7*32+ 8) /* AMD HW-PState */
 | 
			
		||||
#define X86_FEATURE_PROC_FEEDBACK	( 7*32+ 9) /* AMD ProcFeedbackInterface */
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -85,6 +85,18 @@ static inline u16 kern_pcid(u16 asid)
 | 
			
		|||
	return asid + 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * The user PCID is just the kernel one, plus the "switch bit".
 | 
			
		||||
 */
 | 
			
		||||
static inline u16 user_pcid(u16 asid)
 | 
			
		||||
{
 | 
			
		||||
	u16 ret = kern_pcid(asid);
 | 
			
		||||
#ifdef CONFIG_PAGE_TABLE_ISOLATION
 | 
			
		||||
	ret |= 1 << X86_CR3_PTI_SWITCH_BIT;
 | 
			
		||||
#endif
 | 
			
		||||
	return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
struct pgd_t;
 | 
			
		||||
static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
 | 
			
		||||
{
 | 
			
		||||
| 
						 | 
				
			
			@ -335,6 +347,8 @@ static inline void __native_flush_tlb_global(void)
 | 
			
		|||
		/*
 | 
			
		||||
		 * Using INVPCID is considerably faster than a pair of writes
 | 
			
		||||
		 * to CR4 sandwiched inside an IRQ flag save/restore.
 | 
			
		||||
		 *
 | 
			
		||||
		 * Note, this works with CR4.PCIDE=0 or 1.
 | 
			
		||||
		 */
 | 
			
		||||
		invpcid_flush_all();
 | 
			
		||||
		return;
 | 
			
		||||
| 
						 | 
				
			
			@ -368,7 +382,14 @@ static inline void __native_flush_tlb_single(unsigned long addr)
 | 
			
		|||
	if (!static_cpu_has(X86_FEATURE_PTI))
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	invalidate_user_asid(loaded_mm_asid);
 | 
			
		||||
	/*
 | 
			
		||||
	 * Some platforms #GP if we call invpcid(type=1/2) before CR4.PCIDE=1.
 | 
			
		||||
	 * Just use invalidate_user_asid() in case we are called early.
 | 
			
		||||
	 */
 | 
			
		||||
	if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE))
 | 
			
		||||
		invalidate_user_asid(loaded_mm_asid);
 | 
			
		||||
	else
 | 
			
		||||
		invpcid_flush_one(user_pcid(loaded_mm_asid), addr);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -203,34 +203,44 @@ static void __init probe_page_size_mask(void)
 | 
			
		|||
 | 
			
		||||
static void setup_pcid(void)
 | 
			
		||||
{
 | 
			
		||||
#ifdef CONFIG_X86_64
 | 
			
		||||
	if (boot_cpu_has(X86_FEATURE_PCID)) {
 | 
			
		||||
		if (boot_cpu_has(X86_FEATURE_PGE)) {
 | 
			
		||||
			/*
 | 
			
		||||
			 * This can't be cr4_set_bits_and_update_boot() --
 | 
			
		||||
			 * the trampoline code can't handle CR4.PCIDE and
 | 
			
		||||
			 * it wouldn't do any good anyway.  Despite the name,
 | 
			
		||||
			 * cr4_set_bits_and_update_boot() doesn't actually
 | 
			
		||||
			 * cause the bits in question to remain set all the
 | 
			
		||||
			 * way through the secondary boot asm.
 | 
			
		||||
			 *
 | 
			
		||||
			 * Instead, we brute-force it and set CR4.PCIDE
 | 
			
		||||
			 * manually in start_secondary().
 | 
			
		||||
			 */
 | 
			
		||||
			cr4_set_bits(X86_CR4_PCIDE);
 | 
			
		||||
		} else {
 | 
			
		||||
			/*
 | 
			
		||||
			 * flush_tlb_all(), as currently implemented, won't
 | 
			
		||||
			 * work if PCID is on but PGE is not.  Since that
 | 
			
		||||
			 * combination doesn't exist on real hardware, there's
 | 
			
		||||
			 * no reason to try to fully support it, but it's
 | 
			
		||||
			 * polite to avoid corrupting data if we're on
 | 
			
		||||
			 * an improperly configured VM.
 | 
			
		||||
			 */
 | 
			
		||||
			setup_clear_cpu_cap(X86_FEATURE_PCID);
 | 
			
		||||
		}
 | 
			
		||||
	if (!IS_ENABLED(CONFIG_X86_64))
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	if (!boot_cpu_has(X86_FEATURE_PCID))
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	if (boot_cpu_has(X86_FEATURE_PGE)) {
 | 
			
		||||
		/*
 | 
			
		||||
		 * This can't be cr4_set_bits_and_update_boot() -- the
 | 
			
		||||
		 * trampoline code can't handle CR4.PCIDE and it wouldn't
 | 
			
		||||
		 * do any good anyway.  Despite the name,
 | 
			
		||||
		 * cr4_set_bits_and_update_boot() doesn't actually cause
 | 
			
		||||
		 * the bits in question to remain set all the way through
 | 
			
		||||
		 * the secondary boot asm.
 | 
			
		||||
		 *
 | 
			
		||||
		 * Instead, we brute-force it and set CR4.PCIDE manually in
 | 
			
		||||
		 * start_secondary().
 | 
			
		||||
		 */
 | 
			
		||||
		cr4_set_bits(X86_CR4_PCIDE);
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
		 * INVPCID's single-context modes (2/3) only work if we set
 | 
			
		||||
		 * X86_CR4_PCIDE, *and* we INVPCID support.  It's unusable
 | 
			
		||||
		 * on systems that have X86_CR4_PCIDE clear, or that have
 | 
			
		||||
		 * no INVPCID support at all.
 | 
			
		||||
		 */
 | 
			
		||||
		if (boot_cpu_has(X86_FEATURE_INVPCID))
 | 
			
		||||
			setup_force_cpu_cap(X86_FEATURE_INVPCID_SINGLE);
 | 
			
		||||
	} else {
 | 
			
		||||
		/*
 | 
			
		||||
		 * flush_tlb_all(), as currently implemented, won't work if
 | 
			
		||||
		 * PCID is on but PGE is not.  Since that combination
 | 
			
		||||
		 * doesn't exist on real hardware, there's no reason to try
 | 
			
		||||
		 * to fully support it, but it's polite to avoid corrupting
 | 
			
		||||
		 * data if we're on an improperly configured VM.
 | 
			
		||||
		 */
 | 
			
		||||
		setup_clear_cpu_cap(X86_FEATURE_PCID);
 | 
			
		||||
	}
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#ifdef CONFIG_X86_32
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue