forked from mirrors/linux
- The 4 patch series "Fix uprobe pte be overwritten when expanding vma"
fixes a longstanding and quite obscure bug related to the vma merging of the uprobe mmap page. -----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQTTMBEPP41GrTpTJgfdBJ7gKXxAjgUCaEN1LAAKCRDdBJ7gKXxA jsfLAQCC+C8397X6lNKPI3bHGLGEHubP2uzb6bOFMAU6fIRobQEAqUnoUhfP+xsu tDbcQEBZ+vfyeT9Zr9vA+uBDcA3OGw0= =9oaG -----END PGP SIGNATURE----- Merge tag 'mm-stable-2025-06-06-16-09' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Pull more MM updates from Andrew Morton: "The series 'Fix uprobe pte be overwritten when expanding vma' fixes a longstanding and quite obscure bug related to the vma merging of the uprobe mmap page" * tag 'mm-stable-2025-06-06-16-09' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: selftests/mm: add test about uprobe pte be orphan during vma merge selftests/mm: extract read_sysfs and write_sysfs into vm_util mm: expose abnormal new_pte during move_ptes mm: fix uprobe pte be overwritten when expanding vma mm/damon: s/primitives/code/ on comments
This commit is contained in:
commit
bdc7f8c5ad
16 changed files with 122 additions and 44 deletions
|
|
@ -1,6 +1,6 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
/*
|
/*
|
||||||
* Common Primitives for DAMON Modules
|
* Common Code for DAMON Modules
|
||||||
*
|
*
|
||||||
* Author: SeongJae Park <sj@kernel.org>
|
* Author: SeongJae Park <sj@kernel.org>
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
/* SPDX-License-Identifier: GPL-2.0 */
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
/*
|
/*
|
||||||
* Common Primitives for DAMON Modules
|
* Common Code for DAMON Modules
|
||||||
*
|
*
|
||||||
* Author: SeongJae Park <sj@kernel.org>
|
* Author: SeongJae Park <sj@kernel.org>
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
/*
|
/*
|
||||||
* Common Primitives for Data Access Monitoring
|
* Common Code for Data Access Monitoring
|
||||||
*
|
*
|
||||||
* Author: SeongJae Park <sj@kernel.org>
|
* Author: SeongJae Park <sj@kernel.org>
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
/* SPDX-License-Identifier: GPL-2.0 */
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
/*
|
/*
|
||||||
* Common Primitives for Data Access Monitoring
|
* Common Code for Data Access Monitoring
|
||||||
*
|
*
|
||||||
* Author: SeongJae Park <sj@kernel.org>
|
* Author: SeongJae Park <sj@kernel.org>
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
/*
|
/*
|
||||||
* DAMON Primitives for The Physical Address Space
|
* DAMON Code for The Physical Address Space
|
||||||
*
|
*
|
||||||
* Author: SeongJae Park <sj@kernel.org>
|
* Author: SeongJae Park <sj@kernel.org>
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
/*
|
/*
|
||||||
* Common Primitives for DAMON Sysfs Interface
|
* Common Code for DAMON Sysfs Interface
|
||||||
*
|
*
|
||||||
* Author: SeongJae Park <sj@kernel.org>
|
* Author: SeongJae Park <sj@kernel.org>
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
/* SPDX-License-Identifier: GPL-2.0 */
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
/*
|
/*
|
||||||
* Common Primitives for DAMON Sysfs Interface
|
* Common Code for DAMON Sysfs Interface
|
||||||
*
|
*
|
||||||
* Author: SeongJae Park <sj@kernel.org>
|
* Author: SeongJae Park <sj@kernel.org>
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
/*
|
/*
|
||||||
* DAMON Primitives for Virtual Address Spaces
|
* DAMON Code for Virtual Address Spaces
|
||||||
*
|
*
|
||||||
* Author: SeongJae Park <sj@kernel.org>
|
* Author: SeongJae Park <sj@kernel.org>
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -237,6 +237,8 @@ static int move_ptes(struct pagetable_move_control *pmc,
|
||||||
|
|
||||||
for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE,
|
for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE,
|
||||||
new_pte++, new_addr += PAGE_SIZE) {
|
new_pte++, new_addr += PAGE_SIZE) {
|
||||||
|
VM_WARN_ON_ONCE(!pte_none(*new_pte));
|
||||||
|
|
||||||
if (pte_none(ptep_get(old_pte)))
|
if (pte_none(ptep_get(old_pte)))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
|
|
||||||
20
mm/vma.c
20
mm/vma.c
|
|
@ -169,6 +169,9 @@ static void init_multi_vma_prep(struct vma_prepare *vp,
|
||||||
vp->file = vma->vm_file;
|
vp->file = vma->vm_file;
|
||||||
if (vp->file)
|
if (vp->file)
|
||||||
vp->mapping = vma->vm_file->f_mapping;
|
vp->mapping = vma->vm_file->f_mapping;
|
||||||
|
|
||||||
|
if (vmg && vmg->skip_vma_uprobe)
|
||||||
|
vp->skip_vma_uprobe = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -358,10 +361,13 @@ static void vma_complete(struct vma_prepare *vp, struct vma_iterator *vmi,
|
||||||
|
|
||||||
if (vp->file) {
|
if (vp->file) {
|
||||||
i_mmap_unlock_write(vp->mapping);
|
i_mmap_unlock_write(vp->mapping);
|
||||||
uprobe_mmap(vp->vma);
|
|
||||||
|
|
||||||
if (vp->adj_next)
|
if (!vp->skip_vma_uprobe) {
|
||||||
uprobe_mmap(vp->adj_next);
|
uprobe_mmap(vp->vma);
|
||||||
|
|
||||||
|
if (vp->adj_next)
|
||||||
|
uprobe_mmap(vp->adj_next);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vp->remove) {
|
if (vp->remove) {
|
||||||
|
|
@ -1830,6 +1836,14 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
|
||||||
faulted_in_anon_vma = false;
|
faulted_in_anon_vma = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the VMA we are copying might contain a uprobe PTE, ensure
|
||||||
|
* that we do not establish one upon merge. Otherwise, when mremap()
|
||||||
|
* moves page tables, it will orphan the newly created PTE.
|
||||||
|
*/
|
||||||
|
if (vma->vm_file)
|
||||||
|
vmg.skip_vma_uprobe = true;
|
||||||
|
|
||||||
new_vma = find_vma_prev(mm, addr, &vmg.prev);
|
new_vma = find_vma_prev(mm, addr, &vmg.prev);
|
||||||
if (new_vma && new_vma->vm_start < addr + len)
|
if (new_vma && new_vma->vm_start < addr + len)
|
||||||
return NULL; /* should never get here */
|
return NULL; /* should never get here */
|
||||||
|
|
|
||||||
7
mm/vma.h
7
mm/vma.h
|
|
@ -19,6 +19,8 @@ struct vma_prepare {
|
||||||
struct vm_area_struct *insert;
|
struct vm_area_struct *insert;
|
||||||
struct vm_area_struct *remove;
|
struct vm_area_struct *remove;
|
||||||
struct vm_area_struct *remove2;
|
struct vm_area_struct *remove2;
|
||||||
|
|
||||||
|
bool skip_vma_uprobe :1;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct unlink_vma_file_batch {
|
struct unlink_vma_file_batch {
|
||||||
|
|
@ -120,6 +122,11 @@ struct vma_merge_struct {
|
||||||
*/
|
*/
|
||||||
bool give_up_on_oom :1;
|
bool give_up_on_oom :1;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If set, skip uprobe_mmap upon merged vma.
|
||||||
|
*/
|
||||||
|
bool skip_vma_uprobe :1;
|
||||||
|
|
||||||
/* Internal flags set during merge process: */
|
/* Internal flags set during merge process: */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
||||||
|
|
@ -58,40 +58,12 @@ int debug;
|
||||||
|
|
||||||
static int ksm_write_sysfs(const char *file_path, unsigned long val)
|
static int ksm_write_sysfs(const char *file_path, unsigned long val)
|
||||||
{
|
{
|
||||||
FILE *f = fopen(file_path, "w");
|
return write_sysfs(file_path, val);
|
||||||
|
|
||||||
if (!f) {
|
|
||||||
fprintf(stderr, "f %s\n", file_path);
|
|
||||||
perror("fopen");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
if (fprintf(f, "%lu", val) < 0) {
|
|
||||||
perror("fprintf");
|
|
||||||
fclose(f);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
fclose(f);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int ksm_read_sysfs(const char *file_path, unsigned long *val)
|
static int ksm_read_sysfs(const char *file_path, unsigned long *val)
|
||||||
{
|
{
|
||||||
FILE *f = fopen(file_path, "r");
|
return read_sysfs(file_path, val);
|
||||||
|
|
||||||
if (!f) {
|
|
||||||
fprintf(stderr, "f %s\n", file_path);
|
|
||||||
perror("fopen");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
if (fscanf(f, "%lu", val) != 1) {
|
|
||||||
perror("fscanf");
|
|
||||||
fclose(f);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
fclose(f);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ksm_print_sysfs(void)
|
static void ksm_print_sysfs(void)
|
||||||
|
|
|
||||||
|
|
@ -2,11 +2,14 @@
|
||||||
|
|
||||||
#define _GNU_SOURCE
|
#define _GNU_SOURCE
|
||||||
#include "../kselftest_harness.h"
|
#include "../kselftest_harness.h"
|
||||||
|
#include <fcntl.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <sys/mman.h>
|
#include <sys/mman.h>
|
||||||
|
#include <sys/syscall.h>
|
||||||
#include <sys/wait.h>
|
#include <sys/wait.h>
|
||||||
|
#include <linux/perf_event.h>
|
||||||
#include "vm_util.h"
|
#include "vm_util.h"
|
||||||
|
|
||||||
FIXTURE(merge)
|
FIXTURE(merge)
|
||||||
|
|
@ -452,4 +455,44 @@ TEST_F(merge, forked_source_vma)
|
||||||
ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr2 + 5 * page_size);
|
ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr2 + 5 * page_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(merge, handle_uprobe_upon_merged_vma)
|
||||||
|
{
|
||||||
|
const size_t attr_sz = sizeof(struct perf_event_attr);
|
||||||
|
unsigned int page_size = self->page_size;
|
||||||
|
const char *probe_file = "./foo";
|
||||||
|
char *carveout = self->carveout;
|
||||||
|
struct perf_event_attr attr;
|
||||||
|
unsigned long type;
|
||||||
|
void *ptr1, *ptr2;
|
||||||
|
int fd;
|
||||||
|
|
||||||
|
fd = open(probe_file, O_RDWR|O_CREAT, 0600);
|
||||||
|
ASSERT_GE(fd, 0);
|
||||||
|
|
||||||
|
ASSERT_EQ(ftruncate(fd, page_size), 0);
|
||||||
|
ASSERT_EQ(read_sysfs("/sys/bus/event_source/devices/uprobe/type", &type), 0);
|
||||||
|
|
||||||
|
memset(&attr, 0, attr_sz);
|
||||||
|
attr.size = attr_sz;
|
||||||
|
attr.type = type;
|
||||||
|
attr.config1 = (__u64)(long)probe_file;
|
||||||
|
attr.config2 = 0x0;
|
||||||
|
|
||||||
|
ASSERT_GE(syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0), 0);
|
||||||
|
|
||||||
|
ptr1 = mmap(&carveout[page_size], 10 * page_size, PROT_EXEC,
|
||||||
|
MAP_PRIVATE | MAP_FIXED, fd, 0);
|
||||||
|
ASSERT_NE(ptr1, MAP_FAILED);
|
||||||
|
|
||||||
|
ptr2 = mremap(ptr1, page_size, 2 * page_size,
|
||||||
|
MREMAP_MAYMOVE | MREMAP_FIXED, ptr1 + 5 * page_size);
|
||||||
|
ASSERT_NE(ptr2, MAP_FAILED);
|
||||||
|
|
||||||
|
ASSERT_NE(mremap(ptr2, page_size, page_size,
|
||||||
|
MREMAP_MAYMOVE | MREMAP_FIXED, ptr1), MAP_FAILED);
|
||||||
|
|
||||||
|
close(fd);
|
||||||
|
remove(probe_file);
|
||||||
|
}
|
||||||
|
|
||||||
TEST_HARNESS_MAIN
|
TEST_HARNESS_MAIN
|
||||||
|
|
|
||||||
|
|
@ -77,7 +77,7 @@ void show(unsigned long ps)
|
||||||
system(buf);
|
system(buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned long read_sysfs(int warn, char *fmt, ...)
|
unsigned long thuge_read_sysfs(int warn, char *fmt, ...)
|
||||||
{
|
{
|
||||||
char *line = NULL;
|
char *line = NULL;
|
||||||
size_t linelen = 0;
|
size_t linelen = 0;
|
||||||
|
|
@ -106,7 +106,7 @@ unsigned long read_sysfs(int warn, char *fmt, ...)
|
||||||
|
|
||||||
unsigned long read_free(unsigned long ps)
|
unsigned long read_free(unsigned long ps)
|
||||||
{
|
{
|
||||||
return read_sysfs(ps != getpagesize(),
|
return thuge_read_sysfs(ps != getpagesize(),
|
||||||
"/sys/kernel/mm/hugepages/hugepages-%lukB/free_hugepages",
|
"/sys/kernel/mm/hugepages/hugepages-%lukB/free_hugepages",
|
||||||
ps >> 10);
|
ps >> 10);
|
||||||
}
|
}
|
||||||
|
|
@ -195,7 +195,7 @@ void find_pagesizes(void)
|
||||||
}
|
}
|
||||||
globfree(&g);
|
globfree(&g);
|
||||||
|
|
||||||
if (read_sysfs(0, "/proc/sys/kernel/shmmax") < NUM_PAGES * largest)
|
if (thuge_read_sysfs(0, "/proc/sys/kernel/shmmax") < NUM_PAGES * largest)
|
||||||
ksft_exit_fail_msg("Please do echo %lu > /proc/sys/kernel/shmmax",
|
ksft_exit_fail_msg("Please do echo %lu > /proc/sys/kernel/shmmax",
|
||||||
largest * NUM_PAGES);
|
largest * NUM_PAGES);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -486,3 +486,41 @@ int close_procmap(struct procmap_fd *procmap)
|
||||||
{
|
{
|
||||||
return close(procmap->fd);
|
return close(procmap->fd);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int write_sysfs(const char *file_path, unsigned long val)
|
||||||
|
{
|
||||||
|
FILE *f = fopen(file_path, "w");
|
||||||
|
|
||||||
|
if (!f) {
|
||||||
|
fprintf(stderr, "f %s\n", file_path);
|
||||||
|
perror("fopen");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
if (fprintf(f, "%lu", val) < 0) {
|
||||||
|
perror("fprintf");
|
||||||
|
fclose(f);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
fclose(f);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int read_sysfs(const char *file_path, unsigned long *val)
|
||||||
|
{
|
||||||
|
FILE *f = fopen(file_path, "r");
|
||||||
|
|
||||||
|
if (!f) {
|
||||||
|
fprintf(stderr, "f %s\n", file_path);
|
||||||
|
perror("fopen");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
if (fscanf(f, "%lu", val) != 1) {
|
||||||
|
perror("fscanf");
|
||||||
|
fclose(f);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
fclose(f);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -88,6 +88,8 @@ int open_procmap(pid_t pid, struct procmap_fd *procmap_out);
|
||||||
int query_procmap(struct procmap_fd *procmap);
|
int query_procmap(struct procmap_fd *procmap);
|
||||||
bool find_vma_procmap(struct procmap_fd *procmap, void *address);
|
bool find_vma_procmap(struct procmap_fd *procmap, void *address);
|
||||||
int close_procmap(struct procmap_fd *procmap);
|
int close_procmap(struct procmap_fd *procmap);
|
||||||
|
int write_sysfs(const char *file_path, unsigned long val);
|
||||||
|
int read_sysfs(const char *file_path, unsigned long *val);
|
||||||
|
|
||||||
static inline int open_self_procmap(struct procmap_fd *procmap_out)
|
static inline int open_self_procmap(struct procmap_fd *procmap_out)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue