mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-03 18:20:25 +02:00 
			
		
		
		
	KVM: Pin (as in FOLL_PIN) pages during kvm_vcpu_map()
Pin, as in FOLL_PIN, pages when mapping them for direct access by KVM.
As per Documentation/core-api/pin_user_pages.rst, writing to a page that
was gotten via FOLL_GET is explicitly disallowed.
  Correct (uses FOLL_PIN calls):
      pin_user_pages()
      write to the data within the pages
      unpin_user_pages()
  INCORRECT (uses FOLL_GET calls):
      get_user_pages()
      write to the data within the pages
      put_page()
Unfortunately, FOLL_PIN is a "private" flag, and so kvm_follow_pfn must
use a one-off bool instead of being able to piggyback the "flags" field.
Link: https://lwn.net/Articles/930667
Link: https://lore.kernel.org/all/cover.1683044162.git.lstoakes@gmail.com
Tested-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Tested-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-ID: <20241010182427.1434605-32-seanjc@google.com>
			
			
This commit is contained in:
		
							parent
							
								
									2ff072ba7a
								
							
						
					
					
						commit
						2bcb52a360
					
				
					 3 changed files with 47 additions and 16 deletions
				
			
		| 
						 | 
				
			
			@ -280,7 +280,7 @@ struct kvm_host_map {
 | 
			
		|||
	 * can be used as guest memory but they are not managed by host
 | 
			
		||||
	 * kernel).
 | 
			
		||||
	 */
 | 
			
		||||
	struct page *refcounted_page;
 | 
			
		||||
	struct page *pinned_page;
 | 
			
		||||
	struct page *page;
 | 
			
		||||
	void *hva;
 | 
			
		||||
	kvm_pfn_t pfn;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -2814,9 +2814,12 @@ static kvm_pfn_t kvm_resolve_pfn(struct kvm_follow_pfn *kfp, struct page *page,
 | 
			
		|||
	 */
 | 
			
		||||
	if (map) {
 | 
			
		||||
		pfn = map->pfn;
 | 
			
		||||
		page = kvm_pfn_to_refcounted_page(pfn);
 | 
			
		||||
		if (page && !get_page_unless_zero(page))
 | 
			
		||||
			return KVM_PFN_ERR_FAULT;
 | 
			
		||||
 | 
			
		||||
		if (!kfp->pin) {
 | 
			
		||||
			page = kvm_pfn_to_refcounted_page(pfn);
 | 
			
		||||
			if (page && !get_page_unless_zero(page))
 | 
			
		||||
				return KVM_PFN_ERR_FAULT;
 | 
			
		||||
		}
 | 
			
		||||
	} else {
 | 
			
		||||
		pfn = page_to_pfn(page);
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			@ -2834,16 +2837,24 @@ static kvm_pfn_t kvm_resolve_pfn(struct kvm_follow_pfn *kfp, struct page *page,
 | 
			
		|||
static bool hva_to_pfn_fast(struct kvm_follow_pfn *kfp, kvm_pfn_t *pfn)
 | 
			
		||||
{
 | 
			
		||||
	struct page *page;
 | 
			
		||||
	bool r;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Fast pin a writable pfn only if it is a write fault request
 | 
			
		||||
	 * or the caller allows to map a writable pfn for a read fault
 | 
			
		||||
	 * request.
 | 
			
		||||
	 * Try the fast-only path when the caller wants to pin/get the page for
 | 
			
		||||
	 * writing.  If the caller only wants to read the page, KVM must go
 | 
			
		||||
	 * down the full, slow path in order to avoid racing an operation that
 | 
			
		||||
	 * breaks Copy-on-Write (CoW), e.g. so that KVM doesn't end up pointing
 | 
			
		||||
	 * at the old, read-only page while mm/ points at a new, writable page.
 | 
			
		||||
	 */
 | 
			
		||||
	if (!((kfp->flags & FOLL_WRITE) || kfp->map_writable))
 | 
			
		||||
		return false;
 | 
			
		||||
 | 
			
		||||
	if (get_user_page_fast_only(kfp->hva, FOLL_WRITE, &page)) {
 | 
			
		||||
	if (kfp->pin)
 | 
			
		||||
		r = pin_user_pages_fast(kfp->hva, 1, FOLL_WRITE, &page) == 1;
 | 
			
		||||
	else
 | 
			
		||||
		r = get_user_page_fast_only(kfp->hva, FOLL_WRITE, &page);
 | 
			
		||||
 | 
			
		||||
	if (r) {
 | 
			
		||||
		*pfn = kvm_resolve_pfn(kfp, page, NULL, true);
 | 
			
		||||
		return true;
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			@ -2872,10 +2883,21 @@ static int hva_to_pfn_slow(struct kvm_follow_pfn *kfp, kvm_pfn_t *pfn)
 | 
			
		|||
	struct page *page, *wpage;
 | 
			
		||||
	int npages;
 | 
			
		||||
 | 
			
		||||
	npages = get_user_pages_unlocked(kfp->hva, 1, &page, flags);
 | 
			
		||||
	if (kfp->pin)
 | 
			
		||||
		npages = pin_user_pages_unlocked(kfp->hva, 1, &page, flags);
 | 
			
		||||
	else
 | 
			
		||||
		npages = get_user_pages_unlocked(kfp->hva, 1, &page, flags);
 | 
			
		||||
	if (npages != 1)
 | 
			
		||||
		return npages;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Pinning is mutually exclusive with opportunistically mapping a read
 | 
			
		||||
	 * fault as writable, as KVM should never pin pages when mapping memory
 | 
			
		||||
	 * into the guest (pinning is only for direct accesses from KVM).
 | 
			
		||||
	 */
 | 
			
		||||
	if (WARN_ON_ONCE(kfp->map_writable && kfp->pin))
 | 
			
		||||
		goto out;
 | 
			
		||||
 | 
			
		||||
	/* map read fault as writable if possible */
 | 
			
		||||
	if (!(flags & FOLL_WRITE) && kfp->map_writable &&
 | 
			
		||||
	    get_user_page_fast_only(kfp->hva, FOLL_WRITE, &wpage)) {
 | 
			
		||||
| 
						 | 
				
			
			@ -2884,6 +2906,7 @@ static int hva_to_pfn_slow(struct kvm_follow_pfn *kfp, kvm_pfn_t *pfn)
 | 
			
		|||
		flags |= FOLL_WRITE;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
out:
 | 
			
		||||
	*pfn = kvm_resolve_pfn(kfp, page, NULL, flags & FOLL_WRITE);
 | 
			
		||||
	return npages;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -3093,10 +3116,11 @@ int kvm_vcpu_map(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map)
 | 
			
		|||
		.slot = gfn_to_memslot(vcpu->kvm, gfn),
 | 
			
		||||
		.gfn = gfn,
 | 
			
		||||
		.flags = FOLL_WRITE,
 | 
			
		||||
		.refcounted_page = &map->refcounted_page,
 | 
			
		||||
		.refcounted_page = &map->pinned_page,
 | 
			
		||||
		.pin = true,
 | 
			
		||||
	};
 | 
			
		||||
 | 
			
		||||
	map->refcounted_page = NULL;
 | 
			
		||||
	map->pinned_page = NULL;
 | 
			
		||||
	map->page = NULL;
 | 
			
		||||
	map->hva = NULL;
 | 
			
		||||
	map->gfn = gfn;
 | 
			
		||||
| 
						 | 
				
			
			@ -3133,16 +3157,16 @@ void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty)
 | 
			
		|||
	if (dirty)
 | 
			
		||||
		kvm_vcpu_mark_page_dirty(vcpu, map->gfn);
 | 
			
		||||
 | 
			
		||||
	if (map->refcounted_page) {
 | 
			
		||||
	if (map->pinned_page) {
 | 
			
		||||
		if (dirty)
 | 
			
		||||
			kvm_release_page_dirty(map->refcounted_page);
 | 
			
		||||
		else
 | 
			
		||||
			kvm_release_page_clean(map->refcounted_page);
 | 
			
		||||
			kvm_set_page_dirty(map->pinned_page);
 | 
			
		||||
		kvm_set_page_accessed(map->pinned_page);
 | 
			
		||||
		unpin_user_page(map->pinned_page);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	map->hva = NULL;
 | 
			
		||||
	map->page = NULL;
 | 
			
		||||
	map->refcounted_page = NULL;
 | 
			
		||||
	map->pinned_page = NULL;
 | 
			
		||||
}
 | 
			
		||||
EXPORT_SYMBOL_GPL(kvm_vcpu_unmap);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -30,6 +30,13 @@ struct kvm_follow_pfn {
 | 
			
		|||
	/* FOLL_* flags modifying lookup behavior, e.g. FOLL_WRITE. */
 | 
			
		||||
	unsigned int flags;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Pin the page (effectively FOLL_PIN, which is an mm/ internal flag).
 | 
			
		||||
	 * The page *must* be pinned if KVM will write to the page via a kernel
 | 
			
		||||
	 * mapping, e.g. via kmap(), mremap(), etc.
 | 
			
		||||
	 */
 | 
			
		||||
	bool pin;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * If non-NULL, try to get a writable mapping even for a read fault.
 | 
			
		||||
	 * Set to true if a writable mapping was obtained.
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue