mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	- Yosry has also eliminated cgroup's atomic rstat flushing. - Nhat Pham adds the new cachestat() syscall. It provides userspace with the ability to query pagecache status - a similar concept to mincore() but more powerful and with improved usability. - Mel Gorman provides more optimizations for compaction, reducing the prevalence of page rescanning. - Lorenzo Stoakes has done some maintanance work on the get_user_pages() interface. - Liam Howlett continues with cleanups and maintenance work to the maple tree code. Peng Zhang also does some work on maple tree. - Johannes Weiner has done some cleanup work on the compaction code. - David Hildenbrand has contributed additional selftests for get_user_pages(). - Thomas Gleixner has contributed some maintenance and optimization work for the vmalloc code. - Baolin Wang has provided some compaction cleanups, - SeongJae Park continues maintenance work on the DAMON code. - Huang Ying has done some maintenance on the swap code's usage of device refcounting. - Christoph Hellwig has some cleanups for the filemap/directio code. - Ryan Roberts provides two patch series which yield some rationalization of the kernel's access to pte entries - use the provided APIs rather than open-coding accesses. - Lorenzo Stoakes has some fixes to the interaction between pagecache and directio access to file mappings. - John Hubbard has a series of fixes to the MM selftesting code. - ZhangPeng continues the folio conversion campaign. - Hugh Dickins has been working on the pagetable handling code, mainly with a view to reducing the load on the mmap_lock. - Catalin Marinas has reduced the arm64 kmalloc() minimum alignment from 128 to 8. - Domenico Cerasuolo has improved the zswap reclaim mechanism by reorganizing the LRU management. - Matthew Wilcox provides some fixups to make gfs2 work better with the buffer_head code. - Vishal Moola also has done some folio conversion work. - Matthew Wilcox has removed the remnants of the pagevec code - their functionality is migrated over to struct folio_batch. -----BEGIN PGP SIGNATURE----- iHUEABYIAB0WIQTTMBEPP41GrTpTJgfdBJ7gKXxAjgUCZJejewAKCRDdBJ7gKXxA joggAPwKMfT9lvDBEUnJagY7dbDPky1cSYZdJKxxM2cApGa42gEA6Cl8HRAWqSOh J0qXCzqaaN8+BuEyLGDVPaXur9KirwY= =B7yQ -----END PGP SIGNATURE----- Merge tag 'mm-stable-2023-06-24-19-15' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Pull mm updates from Andrew Morton: - Yosry Ahmed brought back some cgroup v1 stats in OOM logs - Yosry has also eliminated cgroup's atomic rstat flushing - Nhat Pham adds the new cachestat() syscall. It provides userspace with the ability to query pagecache status - a similar concept to mincore() but more powerful and with improved usability - Mel Gorman provides more optimizations for compaction, reducing the prevalence of page rescanning - Lorenzo Stoakes has done some maintanance work on the get_user_pages() interface - Liam Howlett continues with cleanups and maintenance work to the maple tree code. Peng Zhang also does some work on maple tree - Johannes Weiner has done some cleanup work on the compaction code - David Hildenbrand has contributed additional selftests for get_user_pages() - Thomas Gleixner has contributed some maintenance and optimization work for the vmalloc code - Baolin Wang has provided some compaction cleanups, - SeongJae Park continues maintenance work on the DAMON code - Huang Ying has done some maintenance on the swap code's usage of device refcounting - Christoph Hellwig has some cleanups for the filemap/directio code - Ryan Roberts provides two patch series which yield some rationalization of the kernel's access to pte entries - use the provided APIs rather than open-coding accesses - Lorenzo Stoakes has some fixes to the interaction between pagecache and directio access to file mappings - John Hubbard has a series of fixes to the MM selftesting code - ZhangPeng continues the folio conversion campaign - Hugh Dickins has been working on the pagetable handling code, mainly with a view to reducing the load on the mmap_lock - Catalin Marinas has reduced the arm64 kmalloc() minimum alignment from 128 to 8 - Domenico Cerasuolo has improved the zswap reclaim mechanism by reorganizing the LRU management - Matthew Wilcox provides some fixups to make gfs2 work better with the buffer_head code - Vishal Moola also has done some folio conversion work - Matthew Wilcox has removed the remnants of the pagevec code - their functionality is migrated over to struct folio_batch * tag 'mm-stable-2023-06-24-19-15' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (380 commits) mm/hugetlb: remove hugetlb_set_page_subpool() mm: nommu: correct the range of mmap_sem_read_lock in task_mem() hugetlb: revert use of page_cache_next_miss() Revert "page cache: fix page_cache_next/prev_miss off by one" mm/vmscan: fix root proactive reclaim unthrottling unbalanced node mm: memcg: rename and document global_reclaim() mm: kill [add|del]_page_to_lru_list() mm: compaction: convert to use a folio in isolate_migratepages_block() mm: zswap: fix double invalidate with exclusive loads mm: remove unnecessary pagevec includes mm: remove references to pagevec mm: rename invalidate_mapping_pagevec to mapping_try_invalidate mm: remove struct pagevec net: convert sunrpc from pagevec to folio_batch i915: convert i915_gpu_error to use a folio_batch pagevec: rename fbatch_count() mm: remove check_move_unevictable_pages() drm: convert drm_gem_put_pages() to use a folio_batch i915: convert shmem_sg_free_table() to use a folio_batch scatterlist: add sg_set_folio() ...
		
			
				
	
	
		
			258 lines
		
	
	
	
		
			6.1 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			258 lines
		
	
	
	
		
			6.1 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
// SPDX-License-Identifier: GPL-2.0
 | 
						|
 | 
						|
/*
 | 
						|
 * Copyright (c) 2021, Google LLC.
 | 
						|
 * Pasha Tatashin <pasha.tatashin@soleen.com>
 | 
						|
 */
 | 
						|
#include <linux/kstrtox.h>
 | 
						|
#include <linux/mm.h>
 | 
						|
#include <linux/page_table_check.h>
 | 
						|
 | 
						|
#undef pr_fmt
 | 
						|
#define pr_fmt(fmt)	"page_table_check: " fmt
 | 
						|
 | 
						|
struct page_table_check {
 | 
						|
	atomic_t anon_map_count;
 | 
						|
	atomic_t file_map_count;
 | 
						|
};
 | 
						|
 | 
						|
static bool __page_table_check_enabled __initdata =
 | 
						|
				IS_ENABLED(CONFIG_PAGE_TABLE_CHECK_ENFORCED);
 | 
						|
 | 
						|
DEFINE_STATIC_KEY_TRUE(page_table_check_disabled);
 | 
						|
EXPORT_SYMBOL(page_table_check_disabled);
 | 
						|
 | 
						|
static int __init early_page_table_check_param(char *buf)
 | 
						|
{
 | 
						|
	return kstrtobool(buf, &__page_table_check_enabled);
 | 
						|
}
 | 
						|
 | 
						|
early_param("page_table_check", early_page_table_check_param);
 | 
						|
 | 
						|
static bool __init need_page_table_check(void)
 | 
						|
{
 | 
						|
	return __page_table_check_enabled;
 | 
						|
}
 | 
						|
 | 
						|
static void __init init_page_table_check(void)
 | 
						|
{
 | 
						|
	if (!__page_table_check_enabled)
 | 
						|
		return;
 | 
						|
	static_branch_disable(&page_table_check_disabled);
 | 
						|
}
 | 
						|
 | 
						|
struct page_ext_operations page_table_check_ops = {
 | 
						|
	.size = sizeof(struct page_table_check),
 | 
						|
	.need = need_page_table_check,
 | 
						|
	.init = init_page_table_check,
 | 
						|
	.need_shared_flags = false,
 | 
						|
};
 | 
						|
 | 
						|
static struct page_table_check *get_page_table_check(struct page_ext *page_ext)
 | 
						|
{
 | 
						|
	BUG_ON(!page_ext);
 | 
						|
	return (void *)(page_ext) + page_table_check_ops.offset;
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * An entry is removed from the page table, decrement the counters for that page
 | 
						|
 * verify that it is of correct type and counters do not become negative.
 | 
						|
 */
 | 
						|
static void page_table_check_clear(struct mm_struct *mm, unsigned long addr,
 | 
						|
				   unsigned long pfn, unsigned long pgcnt)
 | 
						|
{
 | 
						|
	struct page_ext *page_ext;
 | 
						|
	struct page *page;
 | 
						|
	unsigned long i;
 | 
						|
	bool anon;
 | 
						|
 | 
						|
	if (!pfn_valid(pfn))
 | 
						|
		return;
 | 
						|
 | 
						|
	page = pfn_to_page(pfn);
 | 
						|
	page_ext = page_ext_get(page);
 | 
						|
 | 
						|
	BUG_ON(PageSlab(page));
 | 
						|
	anon = PageAnon(page);
 | 
						|
 | 
						|
	for (i = 0; i < pgcnt; i++) {
 | 
						|
		struct page_table_check *ptc = get_page_table_check(page_ext);
 | 
						|
 | 
						|
		if (anon) {
 | 
						|
			BUG_ON(atomic_read(&ptc->file_map_count));
 | 
						|
			BUG_ON(atomic_dec_return(&ptc->anon_map_count) < 0);
 | 
						|
		} else {
 | 
						|
			BUG_ON(atomic_read(&ptc->anon_map_count));
 | 
						|
			BUG_ON(atomic_dec_return(&ptc->file_map_count) < 0);
 | 
						|
		}
 | 
						|
		page_ext = page_ext_next(page_ext);
 | 
						|
	}
 | 
						|
	page_ext_put(page_ext);
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * A new entry is added to the page table, increment the counters for that page
 | 
						|
 * verify that it is of correct type and is not being mapped with a different
 | 
						|
 * type to a different process.
 | 
						|
 */
 | 
						|
static void page_table_check_set(struct mm_struct *mm, unsigned long addr,
 | 
						|
				 unsigned long pfn, unsigned long pgcnt,
 | 
						|
				 bool rw)
 | 
						|
{
 | 
						|
	struct page_ext *page_ext;
 | 
						|
	struct page *page;
 | 
						|
	unsigned long i;
 | 
						|
	bool anon;
 | 
						|
 | 
						|
	if (!pfn_valid(pfn))
 | 
						|
		return;
 | 
						|
 | 
						|
	page = pfn_to_page(pfn);
 | 
						|
	page_ext = page_ext_get(page);
 | 
						|
 | 
						|
	BUG_ON(PageSlab(page));
 | 
						|
	anon = PageAnon(page);
 | 
						|
 | 
						|
	for (i = 0; i < pgcnt; i++) {
 | 
						|
		struct page_table_check *ptc = get_page_table_check(page_ext);
 | 
						|
 | 
						|
		if (anon) {
 | 
						|
			BUG_ON(atomic_read(&ptc->file_map_count));
 | 
						|
			BUG_ON(atomic_inc_return(&ptc->anon_map_count) > 1 && rw);
 | 
						|
		} else {
 | 
						|
			BUG_ON(atomic_read(&ptc->anon_map_count));
 | 
						|
			BUG_ON(atomic_inc_return(&ptc->file_map_count) < 0);
 | 
						|
		}
 | 
						|
		page_ext = page_ext_next(page_ext);
 | 
						|
	}
 | 
						|
	page_ext_put(page_ext);
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * page is on free list, or is being allocated, verify that counters are zeroes
 | 
						|
 * crash if they are not.
 | 
						|
 */
 | 
						|
void __page_table_check_zero(struct page *page, unsigned int order)
 | 
						|
{
 | 
						|
	struct page_ext *page_ext;
 | 
						|
	unsigned long i;
 | 
						|
 | 
						|
	BUG_ON(PageSlab(page));
 | 
						|
 | 
						|
	page_ext = page_ext_get(page);
 | 
						|
	BUG_ON(!page_ext);
 | 
						|
	for (i = 0; i < (1ul << order); i++) {
 | 
						|
		struct page_table_check *ptc = get_page_table_check(page_ext);
 | 
						|
 | 
						|
		BUG_ON(atomic_read(&ptc->anon_map_count));
 | 
						|
		BUG_ON(atomic_read(&ptc->file_map_count));
 | 
						|
		page_ext = page_ext_next(page_ext);
 | 
						|
	}
 | 
						|
	page_ext_put(page_ext);
 | 
						|
}
 | 
						|
 | 
						|
void __page_table_check_pte_clear(struct mm_struct *mm, unsigned long addr,
 | 
						|
				  pte_t pte)
 | 
						|
{
 | 
						|
	if (&init_mm == mm)
 | 
						|
		return;
 | 
						|
 | 
						|
	if (pte_user_accessible_page(pte)) {
 | 
						|
		page_table_check_clear(mm, addr, pte_pfn(pte),
 | 
						|
				       PAGE_SIZE >> PAGE_SHIFT);
 | 
						|
	}
 | 
						|
}
 | 
						|
EXPORT_SYMBOL(__page_table_check_pte_clear);
 | 
						|
 | 
						|
void __page_table_check_pmd_clear(struct mm_struct *mm, unsigned long addr,
 | 
						|
				  pmd_t pmd)
 | 
						|
{
 | 
						|
	if (&init_mm == mm)
 | 
						|
		return;
 | 
						|
 | 
						|
	if (pmd_user_accessible_page(pmd)) {
 | 
						|
		page_table_check_clear(mm, addr, pmd_pfn(pmd),
 | 
						|
				       PMD_SIZE >> PAGE_SHIFT);
 | 
						|
	}
 | 
						|
}
 | 
						|
EXPORT_SYMBOL(__page_table_check_pmd_clear);
 | 
						|
 | 
						|
void __page_table_check_pud_clear(struct mm_struct *mm, unsigned long addr,
 | 
						|
				  pud_t pud)
 | 
						|
{
 | 
						|
	if (&init_mm == mm)
 | 
						|
		return;
 | 
						|
 | 
						|
	if (pud_user_accessible_page(pud)) {
 | 
						|
		page_table_check_clear(mm, addr, pud_pfn(pud),
 | 
						|
				       PUD_SIZE >> PAGE_SHIFT);
 | 
						|
	}
 | 
						|
}
 | 
						|
EXPORT_SYMBOL(__page_table_check_pud_clear);
 | 
						|
 | 
						|
void __page_table_check_pte_set(struct mm_struct *mm, unsigned long addr,
 | 
						|
				pte_t *ptep, pte_t pte)
 | 
						|
{
 | 
						|
	if (&init_mm == mm)
 | 
						|
		return;
 | 
						|
 | 
						|
	__page_table_check_pte_clear(mm, addr, ptep_get(ptep));
 | 
						|
	if (pte_user_accessible_page(pte)) {
 | 
						|
		page_table_check_set(mm, addr, pte_pfn(pte),
 | 
						|
				     PAGE_SIZE >> PAGE_SHIFT,
 | 
						|
				     pte_write(pte));
 | 
						|
	}
 | 
						|
}
 | 
						|
EXPORT_SYMBOL(__page_table_check_pte_set);
 | 
						|
 | 
						|
void __page_table_check_pmd_set(struct mm_struct *mm, unsigned long addr,
 | 
						|
				pmd_t *pmdp, pmd_t pmd)
 | 
						|
{
 | 
						|
	if (&init_mm == mm)
 | 
						|
		return;
 | 
						|
 | 
						|
	__page_table_check_pmd_clear(mm, addr, *pmdp);
 | 
						|
	if (pmd_user_accessible_page(pmd)) {
 | 
						|
		page_table_check_set(mm, addr, pmd_pfn(pmd),
 | 
						|
				     PMD_SIZE >> PAGE_SHIFT,
 | 
						|
				     pmd_write(pmd));
 | 
						|
	}
 | 
						|
}
 | 
						|
EXPORT_SYMBOL(__page_table_check_pmd_set);
 | 
						|
 | 
						|
void __page_table_check_pud_set(struct mm_struct *mm, unsigned long addr,
 | 
						|
				pud_t *pudp, pud_t pud)
 | 
						|
{
 | 
						|
	if (&init_mm == mm)
 | 
						|
		return;
 | 
						|
 | 
						|
	__page_table_check_pud_clear(mm, addr, *pudp);
 | 
						|
	if (pud_user_accessible_page(pud)) {
 | 
						|
		page_table_check_set(mm, addr, pud_pfn(pud),
 | 
						|
				     PUD_SIZE >> PAGE_SHIFT,
 | 
						|
				     pud_write(pud));
 | 
						|
	}
 | 
						|
}
 | 
						|
EXPORT_SYMBOL(__page_table_check_pud_set);
 | 
						|
 | 
						|
void __page_table_check_pte_clear_range(struct mm_struct *mm,
 | 
						|
					unsigned long addr,
 | 
						|
					pmd_t pmd)
 | 
						|
{
 | 
						|
	if (&init_mm == mm)
 | 
						|
		return;
 | 
						|
 | 
						|
	if (!pmd_bad(pmd) && !pmd_leaf(pmd)) {
 | 
						|
		pte_t *ptep = pte_offset_map(&pmd, addr);
 | 
						|
		unsigned long i;
 | 
						|
 | 
						|
		if (WARN_ON(!ptep))
 | 
						|
			return;
 | 
						|
		for (i = 0; i < PTRS_PER_PTE; i++) {
 | 
						|
			__page_table_check_pte_clear(mm, addr, ptep_get(ptep));
 | 
						|
			addr += PAGE_SIZE;
 | 
						|
			ptep++;
 | 
						|
		}
 | 
						|
		pte_unmap(ptep - PTRS_PER_PTE);
 | 
						|
	}
 | 
						|
}
 |