mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-01 00:58:39 +02:00 
			
		
		
		
	Merge tag 'denywrite-for-5.15' of git://github.com/davidhildenbrand/linux
Pull MAP_DENYWRITE removal from David Hildenbrand:
 "Remove all in-tree usage of MAP_DENYWRITE from the kernel and remove
  VM_DENYWRITE.
  There are some (minor) user-visible changes:
   - We no longer deny write access to shared libaries loaded via legacy
     uselib(); this behavior matches modern user space e.g. dlopen().
   - We no longer deny write access to the elf interpreter after exec
     completed, treating it just like shared libraries (which it often
     is).
   - We always deny write access to the file linked via /proc/pid/exe:
     sys_prctl(PR_SET_MM_MAP/EXE_FILE) will fail if write access to the
     file cannot be denied, and write access to the file will remain
     denied until the link is effectivel gone (exec, termination,
     sys_prctl(PR_SET_MM_MAP/EXE_FILE)) -- just as if exec'ing the file.
  Cross-compiled for a bunch of architectures (alpha, microblaze, i386,
  s390x, ...) and verified via ltp that especially the relevant tests
  (i.e., creat07 and execve04) continue working as expected"
* tag 'denywrite-for-5.15' of git://github.com/davidhildenbrand/linux:
  fs: update documentation of get_write_access() and friends
  mm: ignore MAP_DENYWRITE in ksys_mmap_pgoff()
  mm: remove VM_DENYWRITE
  binfmt: remove in-tree usage of MAP_DENYWRITE
  kernel/fork: always deny write access to current MM exe_file
  kernel/fork: factor out replacing the current MM exe_file
  binfmt: don't use MAP_DENYWRITE when loading shared libraries via uselib()
			
			
This commit is contained in:
		
						commit
						49624efa65
					
				
					 16 changed files with 119 additions and 103 deletions
				
			
		|  | @ -202,8 +202,7 @@ static int load_aout_binary(struct linux_binprm *bprm) | ||||||
| 
 | 
 | ||||||
| 		error = vm_mmap(bprm->file, N_TXTADDR(ex), ex.a_text, | 		error = vm_mmap(bprm->file, N_TXTADDR(ex), ex.a_text, | ||||||
| 				PROT_READ | PROT_EXEC, | 				PROT_READ | PROT_EXEC, | ||||||
| 				MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | | 				MAP_FIXED | MAP_PRIVATE | MAP_32BIT, | ||||||
| 				MAP_32BIT, |  | ||||||
| 				fd_offset); | 				fd_offset); | ||||||
| 
 | 
 | ||||||
| 		if (error != N_TXTADDR(ex)) | 		if (error != N_TXTADDR(ex)) | ||||||
|  | @ -211,8 +210,7 @@ static int load_aout_binary(struct linux_binprm *bprm) | ||||||
| 
 | 
 | ||||||
| 		error = vm_mmap(bprm->file, N_DATADDR(ex), ex.a_data, | 		error = vm_mmap(bprm->file, N_DATADDR(ex), ex.a_data, | ||||||
| 				PROT_READ | PROT_WRITE | PROT_EXEC, | 				PROT_READ | PROT_WRITE | PROT_EXEC, | ||||||
| 				MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | | 				MAP_FIXED | MAP_PRIVATE | MAP_32BIT, | ||||||
| 				MAP_32BIT, |  | ||||||
| 				fd_offset + ex.a_text); | 				fd_offset + ex.a_text); | ||||||
| 		if (error != N_DATADDR(ex)) | 		if (error != N_DATADDR(ex)) | ||||||
| 			return error; | 			return error; | ||||||
|  | @ -293,7 +291,7 @@ static int load_aout_library(struct file *file) | ||||||
| 	/* Now use mmap to map the library into memory. */ | 	/* Now use mmap to map the library into memory. */ | ||||||
| 	error = vm_mmap(file, start_addr, ex.a_text + ex.a_data, | 	error = vm_mmap(file, start_addr, ex.a_text + ex.a_data, | ||||||
| 			PROT_READ | PROT_WRITE | PROT_EXEC, | 			PROT_READ | PROT_WRITE | PROT_EXEC, | ||||||
| 			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_32BIT, | 			MAP_FIXED | MAP_PRIVATE | MAP_32BIT, | ||||||
| 			N_TXTOFF(ex)); | 			N_TXTOFF(ex)); | ||||||
| 	retval = error; | 	retval = error; | ||||||
| 	if (error != start_addr) | 	if (error != start_addr) | ||||||
|  |  | ||||||
|  | @ -221,8 +221,7 @@ static int load_aout_binary(struct linux_binprm * bprm) | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
| 		error = vm_mmap(bprm->file, N_TXTADDR(ex), ex.a_text, | 		error = vm_mmap(bprm->file, N_TXTADDR(ex), ex.a_text, | ||||||
| 			PROT_READ | PROT_EXEC, | 			PROT_READ | PROT_EXEC, MAP_FIXED | MAP_PRIVATE, | ||||||
| 			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE, |  | ||||||
| 			fd_offset); | 			fd_offset); | ||||||
| 
 | 
 | ||||||
| 		if (error != N_TXTADDR(ex)) | 		if (error != N_TXTADDR(ex)) | ||||||
|  | @ -230,7 +229,7 @@ static int load_aout_binary(struct linux_binprm * bprm) | ||||||
| 
 | 
 | ||||||
| 		error = vm_mmap(bprm->file, N_DATADDR(ex), ex.a_data, | 		error = vm_mmap(bprm->file, N_DATADDR(ex), ex.a_data, | ||||||
| 				PROT_READ | PROT_WRITE | PROT_EXEC, | 				PROT_READ | PROT_WRITE | PROT_EXEC, | ||||||
| 				MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE, | 				MAP_FIXED | MAP_PRIVATE, | ||||||
| 				fd_offset + ex.a_text); | 				fd_offset + ex.a_text); | ||||||
| 		if (error != N_DATADDR(ex)) | 		if (error != N_DATADDR(ex)) | ||||||
| 			return error; | 			return error; | ||||||
|  | @ -309,7 +308,7 @@ static int load_aout_library(struct file *file) | ||||||
| 	/* Now use mmap to map the library into memory. */ | 	/* Now use mmap to map the library into memory. */ | ||||||
| 	error = vm_mmap(file, start_addr, ex.a_text + ex.a_data, | 	error = vm_mmap(file, start_addr, ex.a_text + ex.a_data, | ||||||
| 			PROT_READ | PROT_WRITE | PROT_EXEC, | 			PROT_READ | PROT_WRITE | PROT_EXEC, | ||||||
| 			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE, | 			MAP_FIXED | MAP_PRIVATE; | ||||||
| 			N_TXTOFF(ex)); | 			N_TXTOFF(ex)); | ||||||
| 	retval = error; | 	retval = error; | ||||||
| 	if (error != start_addr) | 	if (error != start_addr) | ||||||
|  |  | ||||||
|  | @ -622,7 +622,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, | ||||||
| 	eppnt = interp_elf_phdata; | 	eppnt = interp_elf_phdata; | ||||||
| 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) { | 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) { | ||||||
| 		if (eppnt->p_type == PT_LOAD) { | 		if (eppnt->p_type == PT_LOAD) { | ||||||
| 			int elf_type = MAP_PRIVATE | MAP_DENYWRITE; | 			int elf_type = MAP_PRIVATE; | ||||||
| 			int elf_prot = make_prot(eppnt->p_flags, arch_state, | 			int elf_prot = make_prot(eppnt->p_flags, arch_state, | ||||||
| 						 true, true); | 						 true, true); | ||||||
| 			unsigned long vaddr = 0; | 			unsigned long vaddr = 0; | ||||||
|  | @ -1070,7 +1070,7 @@ static int load_elf_binary(struct linux_binprm *bprm) | ||||||
| 		elf_prot = make_prot(elf_ppnt->p_flags, &arch_state, | 		elf_prot = make_prot(elf_ppnt->p_flags, &arch_state, | ||||||
| 				     !!interpreter, false); | 				     !!interpreter, false); | ||||||
| 
 | 
 | ||||||
| 		elf_flags = MAP_PRIVATE | MAP_DENYWRITE; | 		elf_flags = MAP_PRIVATE; | ||||||
| 
 | 
 | ||||||
| 		vaddr = elf_ppnt->p_vaddr; | 		vaddr = elf_ppnt->p_vaddr; | ||||||
| 		/*
 | 		/*
 | ||||||
|  | @ -1384,7 +1384,7 @@ static int load_elf_library(struct file *file) | ||||||
| 			(eppnt->p_filesz + | 			(eppnt->p_filesz + | ||||||
| 			 ELF_PAGEOFFSET(eppnt->p_vaddr)), | 			 ELF_PAGEOFFSET(eppnt->p_vaddr)), | ||||||
| 			PROT_READ | PROT_WRITE | PROT_EXEC, | 			PROT_READ | PROT_WRITE | PROT_EXEC, | ||||||
| 			MAP_FIXED_NOREPLACE | MAP_PRIVATE | MAP_DENYWRITE, | 			MAP_FIXED_NOREPLACE | MAP_PRIVATE, | ||||||
| 			(eppnt->p_offset - | 			(eppnt->p_offset - | ||||||
| 			 ELF_PAGEOFFSET(eppnt->p_vaddr))); | 			 ELF_PAGEOFFSET(eppnt->p_vaddr))); | ||||||
| 	if (error != ELF_PAGESTART(eppnt->p_vaddr)) | 	if (error != ELF_PAGESTART(eppnt->p_vaddr)) | ||||||
|  |  | ||||||
|  | @ -1041,7 +1041,7 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params, | ||||||
| 		if (phdr->p_flags & PF_W) prot |= PROT_WRITE; | 		if (phdr->p_flags & PF_W) prot |= PROT_WRITE; | ||||||
| 		if (phdr->p_flags & PF_X) prot |= PROT_EXEC; | 		if (phdr->p_flags & PF_X) prot |= PROT_EXEC; | ||||||
| 
 | 
 | ||||||
| 		flags = MAP_PRIVATE | MAP_DENYWRITE; | 		flags = MAP_PRIVATE; | ||||||
| 		maddr = 0; | 		maddr = 0; | ||||||
| 
 | 
 | ||||||
| 		switch (params->flags & ELF_FDPIC_FLAG_ARRANGEMENT) { | 		switch (params->flags & ELF_FDPIC_FLAG_ARRANGEMENT) { | ||||||
|  |  | ||||||
|  | @ -1272,7 +1272,9 @@ int begin_new_exec(struct linux_binprm * bprm) | ||||||
| 	 * not visibile until then. This also enables the update | 	 * not visibile until then. This also enables the update | ||||||
| 	 * to be lockless. | 	 * to be lockless. | ||||||
| 	 */ | 	 */ | ||||||
| 	set_mm_exe_file(bprm->mm, bprm->file); | 	retval = set_mm_exe_file(bprm->mm, bprm->file); | ||||||
|  | 	if (retval) | ||||||
|  | 		goto out; | ||||||
| 
 | 
 | ||||||
| 	/* If the binary is not readable then enforce mm->dumpable=0 */ | 	/* If the binary is not readable then enforce mm->dumpable=0 */ | ||||||
| 	would_dump(bprm, bprm->file); | 	would_dump(bprm, bprm->file); | ||||||
|  |  | ||||||
|  | @ -619,7 +619,6 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) | ||||||
| 		[ilog2(VM_MAYSHARE)]	= "ms", | 		[ilog2(VM_MAYSHARE)]	= "ms", | ||||||
| 		[ilog2(VM_GROWSDOWN)]	= "gd", | 		[ilog2(VM_GROWSDOWN)]	= "gd", | ||||||
| 		[ilog2(VM_PFNMAP)]	= "pf", | 		[ilog2(VM_PFNMAP)]	= "pf", | ||||||
| 		[ilog2(VM_DENYWRITE)]	= "dw", |  | ||||||
| 		[ilog2(VM_LOCKED)]	= "lo", | 		[ilog2(VM_LOCKED)]	= "lo", | ||||||
| 		[ilog2(VM_IO)]		= "io", | 		[ilog2(VM_IO)]		= "io", | ||||||
| 		[ilog2(VM_SEQ_READ)]	= "sr", | 		[ilog2(VM_SEQ_READ)]	= "sr", | ||||||
|  |  | ||||||
|  | @ -3023,15 +3023,20 @@ static inline void file_end_write(struct file *file) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  |  * This is used for regular files where some users -- especially the | ||||||
|  |  * currently executed binary in a process, previously handled via | ||||||
|  |  * VM_DENYWRITE -- cannot handle concurrent write (and maybe mmap | ||||||
|  |  * read-write shared) accesses. | ||||||
|  |  * | ||||||
|  * get_write_access() gets write permission for a file. |  * get_write_access() gets write permission for a file. | ||||||
|  * put_write_access() releases this write permission. |  * put_write_access() releases this write permission. | ||||||
|  * This is used for regular files. |  * deny_write_access() denies write access to a file. | ||||||
|  * We cannot support write (and maybe mmap read-write shared) accesses and |  * allow_write_access() re-enables write access to a file. | ||||||
|  * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode |  * | ||||||
|  * can have the following values: |  * The i_writecount field of an inode can have the following values: | ||||||
|  * 0: no writers, no VM_DENYWRITE mappings |  * 0: no write access, no denied write access | ||||||
|  * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist |  * < 0: (-i_writecount) users that denied write access to the file. | ||||||
|  * > 0: (i_writecount) users are writing to the file. |  * > 0: (i_writecount) users that have write access to the file. | ||||||
|  * |  * | ||||||
|  * Normally we operate on that counter with atomic_{inc,dec} and it's safe |  * Normally we operate on that counter with atomic_{inc,dec} and it's safe | ||||||
|  * except for the cases where we don't hold i_writecount yet. Then we need to |  * except for the cases where we don't hold i_writecount yet. Then we need to | ||||||
|  |  | ||||||
|  | @ -281,7 +281,6 @@ extern unsigned int kobjsize(const void *objp); | ||||||
| #define VM_GROWSDOWN	0x00000100	/* general info on the segment */ | #define VM_GROWSDOWN	0x00000100	/* general info on the segment */ | ||||||
| #define VM_UFFD_MISSING	0x00000200	/* missing pages tracking */ | #define VM_UFFD_MISSING	0x00000200	/* missing pages tracking */ | ||||||
| #define VM_PFNMAP	0x00000400	/* Page-ranges managed without "struct page", just pure PFN */ | #define VM_PFNMAP	0x00000400	/* Page-ranges managed without "struct page", just pure PFN */ | ||||||
| #define VM_DENYWRITE	0x00000800	/* ETXTBSY on write attempts.. */ |  | ||||||
| #define VM_UFFD_WP	0x00001000	/* wrprotect pages tracking */ | #define VM_UFFD_WP	0x00001000	/* wrprotect pages tracking */ | ||||||
| 
 | 
 | ||||||
| #define VM_LOCKED	0x00002000 | #define VM_LOCKED	0x00002000 | ||||||
|  | @ -2573,7 +2572,8 @@ static inline int check_data_rlimit(unsigned long rlim, | ||||||
| extern int mm_take_all_locks(struct mm_struct *mm); | extern int mm_take_all_locks(struct mm_struct *mm); | ||||||
| extern void mm_drop_all_locks(struct mm_struct *mm); | extern void mm_drop_all_locks(struct mm_struct *mm); | ||||||
| 
 | 
 | ||||||
| extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file); | extern int set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file); | ||||||
|  | extern int replace_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file); | ||||||
| extern struct file *get_mm_exe_file(struct mm_struct *mm); | extern struct file *get_mm_exe_file(struct mm_struct *mm); | ||||||
| extern struct file *get_task_exe_file(struct task_struct *task); | extern struct file *get_task_exe_file(struct task_struct *task); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -32,7 +32,8 @@ | ||||||
|  * The historical set of flags that all mmap implementations implicitly |  * The historical set of flags that all mmap implementations implicitly | ||||||
|  * support when a ->mmap_validate() op is not provided in file_operations. |  * support when a ->mmap_validate() op is not provided in file_operations. | ||||||
|  * |  * | ||||||
|  * MAP_EXECUTABLE is completely ignored throughout the kernel. |  * MAP_EXECUTABLE and MAP_DENYWRITE are completely ignored throughout the | ||||||
|  |  * kernel. | ||||||
|  */ |  */ | ||||||
| #define LEGACY_MAP_MASK (MAP_SHARED \ | #define LEGACY_MAP_MASK (MAP_SHARED \ | ||||||
| 		| MAP_PRIVATE \ | 		| MAP_PRIVATE \ | ||||||
|  | @ -153,7 +154,6 @@ static inline unsigned long | ||||||
| calc_vm_flag_bits(unsigned long flags) | calc_vm_flag_bits(unsigned long flags) | ||||||
| { | { | ||||||
| 	return _calc_vm_trans(flags, MAP_GROWSDOWN,  VM_GROWSDOWN ) | | 	return _calc_vm_trans(flags, MAP_GROWSDOWN,  VM_GROWSDOWN ) | | ||||||
| 	       _calc_vm_trans(flags, MAP_DENYWRITE,  VM_DENYWRITE ) | |  | ||||||
| 	       _calc_vm_trans(flags, MAP_LOCKED,     VM_LOCKED    ) | | 	       _calc_vm_trans(flags, MAP_LOCKED,     VM_LOCKED    ) | | ||||||
| 	       _calc_vm_trans(flags, MAP_SYNC,	     VM_SYNC      ) | | 	       _calc_vm_trans(flags, MAP_SYNC,	     VM_SYNC      ) | | ||||||
| 	       arch_calc_vm_flag_bits(flags); | 	       arch_calc_vm_flag_bits(flags); | ||||||
|  |  | ||||||
|  | @ -165,7 +165,6 @@ IF_HAVE_PG_SKIP_KASAN_POISON(PG_skip_kasan_poison, "skip_kasan_poison") | ||||||
| 	{VM_UFFD_MISSING,		"uffd_missing"	},		\ | 	{VM_UFFD_MISSING,		"uffd_missing"	},		\ | ||||||
| IF_HAVE_UFFD_MINOR(VM_UFFD_MINOR,	"uffd_minor"	)		\ | IF_HAVE_UFFD_MINOR(VM_UFFD_MINOR,	"uffd_minor"	)		\ | ||||||
| 	{VM_PFNMAP,			"pfnmap"	},		\ | 	{VM_PFNMAP,			"pfnmap"	},		\ | ||||||
| 	{VM_DENYWRITE,			"denywrite"	},		\ |  | ||||||
| 	{VM_UFFD_WP,			"uffd_wp"	},		\ | 	{VM_UFFD_WP,			"uffd_wp"	},		\ | ||||||
| 	{VM_LOCKED,			"locked"	},		\ | 	{VM_LOCKED,			"locked"	},		\ | ||||||
| 	{VM_IO,				"io"		},		\ | 	{VM_IO,				"io"		},		\ | ||||||
|  |  | ||||||
|  | @ -8320,8 +8320,6 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) | ||||||
| 	else | 	else | ||||||
| 		flags = MAP_PRIVATE; | 		flags = MAP_PRIVATE; | ||||||
| 
 | 
 | ||||||
| 	if (vma->vm_flags & VM_DENYWRITE) |  | ||||||
| 		flags |= MAP_DENYWRITE; |  | ||||||
| 	if (vma->vm_flags & VM_LOCKED) | 	if (vma->vm_flags & VM_LOCKED) | ||||||
| 		flags |= MAP_LOCKED; | 		flags |= MAP_LOCKED; | ||||||
| 	if (is_vm_hugetlb_page(vma)) | 	if (is_vm_hugetlb_page(vma)) | ||||||
|  |  | ||||||
|  | @ -471,6 +471,20 @@ void free_task(struct task_struct *tsk) | ||||||
| } | } | ||||||
| EXPORT_SYMBOL(free_task); | EXPORT_SYMBOL(free_task); | ||||||
| 
 | 
 | ||||||
|  | static void dup_mm_exe_file(struct mm_struct *mm, struct mm_struct *oldmm) | ||||||
|  | { | ||||||
|  | 	struct file *exe_file; | ||||||
|  | 
 | ||||||
|  | 	exe_file = get_mm_exe_file(oldmm); | ||||||
|  | 	RCU_INIT_POINTER(mm->exe_file, exe_file); | ||||||
|  | 	/*
 | ||||||
|  | 	 * We depend on the oldmm having properly denied write access to the | ||||||
|  | 	 * exe_file already. | ||||||
|  | 	 */ | ||||||
|  | 	if (exe_file && deny_write_access(exe_file)) | ||||||
|  | 		pr_warn_once("deny_write_access() failed in %s\n", __func__); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| #ifdef CONFIG_MMU | #ifdef CONFIG_MMU | ||||||
| static __latent_entropy int dup_mmap(struct mm_struct *mm, | static __latent_entropy int dup_mmap(struct mm_struct *mm, | ||||||
| 					struct mm_struct *oldmm) | 					struct mm_struct *oldmm) | ||||||
|  | @ -494,7 +508,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, | ||||||
| 	mmap_write_lock_nested(mm, SINGLE_DEPTH_NESTING); | 	mmap_write_lock_nested(mm, SINGLE_DEPTH_NESTING); | ||||||
| 
 | 
 | ||||||
| 	/* No ordering required: file already has been exposed. */ | 	/* No ordering required: file already has been exposed. */ | ||||||
| 	RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm)); | 	dup_mm_exe_file(mm, oldmm); | ||||||
| 
 | 
 | ||||||
| 	mm->total_vm = oldmm->total_vm; | 	mm->total_vm = oldmm->total_vm; | ||||||
| 	mm->data_vm = oldmm->data_vm; | 	mm->data_vm = oldmm->data_vm; | ||||||
|  | @ -557,12 +571,9 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, | ||||||
| 		tmp->vm_flags &= ~(VM_LOCKED | VM_LOCKONFAULT); | 		tmp->vm_flags &= ~(VM_LOCKED | VM_LOCKONFAULT); | ||||||
| 		file = tmp->vm_file; | 		file = tmp->vm_file; | ||||||
| 		if (file) { | 		if (file) { | ||||||
| 			struct inode *inode = file_inode(file); |  | ||||||
| 			struct address_space *mapping = file->f_mapping; | 			struct address_space *mapping = file->f_mapping; | ||||||
| 
 | 
 | ||||||
| 			get_file(file); | 			get_file(file); | ||||||
| 			if (tmp->vm_flags & VM_DENYWRITE) |  | ||||||
| 				put_write_access(inode); |  | ||||||
| 			i_mmap_lock_write(mapping); | 			i_mmap_lock_write(mapping); | ||||||
| 			if (tmp->vm_flags & VM_SHARED) | 			if (tmp->vm_flags & VM_SHARED) | ||||||
| 				mapping_allow_writable(mapping); | 				mapping_allow_writable(mapping); | ||||||
|  | @ -640,7 +651,7 @@ static inline void mm_free_pgd(struct mm_struct *mm) | ||||||
| static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | ||||||
| { | { | ||||||
| 	mmap_write_lock(oldmm); | 	mmap_write_lock(oldmm); | ||||||
| 	RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm)); | 	dup_mm_exe_file(mm, oldmm); | ||||||
| 	mmap_write_unlock(oldmm); | 	mmap_write_unlock(oldmm); | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
|  | @ -1150,11 +1161,11 @@ void mmput_async(struct mm_struct *mm) | ||||||
|  * |  * | ||||||
|  * Main users are mmput() and sys_execve(). Callers prevent concurrent |  * Main users are mmput() and sys_execve(). Callers prevent concurrent | ||||||
|  * invocations: in mmput() nobody alive left, in execve task is single |  * invocations: in mmput() nobody alive left, in execve task is single | ||||||
|  * threaded. sys_prctl(PR_SET_MM_MAP/EXE_FILE) also needs to set the |  * threaded. | ||||||
|  * mm->exe_file, but does so without using set_mm_exe_file() in order |  * | ||||||
|  * to avoid the need for any locks. |  * Can only fail if new_exe_file != NULL. | ||||||
|  */ |  */ | ||||||
| void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file) | int set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file) | ||||||
| { | { | ||||||
| 	struct file *old_exe_file; | 	struct file *old_exe_file; | ||||||
| 
 | 
 | ||||||
|  | @ -1165,12 +1176,74 @@ void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file) | ||||||
| 	 */ | 	 */ | ||||||
| 	old_exe_file = rcu_dereference_raw(mm->exe_file); | 	old_exe_file = rcu_dereference_raw(mm->exe_file); | ||||||
| 
 | 
 | ||||||
| 	if (new_exe_file) | 	if (new_exe_file) { | ||||||
|  | 		/*
 | ||||||
|  | 		 * We expect the caller (i.e., sys_execve) to already denied | ||||||
|  | 		 * write access, so this is unlikely to fail. | ||||||
|  | 		 */ | ||||||
|  | 		if (unlikely(deny_write_access(new_exe_file))) | ||||||
|  | 			return -EACCES; | ||||||
| 		get_file(new_exe_file); | 		get_file(new_exe_file); | ||||||
|  | 	} | ||||||
| 	rcu_assign_pointer(mm->exe_file, new_exe_file); | 	rcu_assign_pointer(mm->exe_file, new_exe_file); | ||||||
| 	if (old_exe_file) | 	if (old_exe_file) { | ||||||
|  | 		allow_write_access(old_exe_file); | ||||||
| 		fput(old_exe_file); | 		fput(old_exe_file); | ||||||
| 	} | 	} | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /**
 | ||||||
|  |  * replace_mm_exe_file - replace a reference to the mm's executable file | ||||||
|  |  * | ||||||
|  |  * This changes mm's executable file (shown as symlink /proc/[pid]/exe), | ||||||
|  |  * dealing with concurrent invocation and without grabbing the mmap lock in | ||||||
|  |  * write mode. | ||||||
|  |  * | ||||||
|  |  * Main user is sys_prctl(PR_SET_MM_MAP/EXE_FILE). | ||||||
|  |  */ | ||||||
|  | int replace_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file) | ||||||
|  | { | ||||||
|  | 	struct vm_area_struct *vma; | ||||||
|  | 	struct file *old_exe_file; | ||||||
|  | 	int ret = 0; | ||||||
|  | 
 | ||||||
|  | 	/* Forbid mm->exe_file change if old file still mapped. */ | ||||||
|  | 	old_exe_file = get_mm_exe_file(mm); | ||||||
|  | 	if (old_exe_file) { | ||||||
|  | 		mmap_read_lock(mm); | ||||||
|  | 		for (vma = mm->mmap; vma && !ret; vma = vma->vm_next) { | ||||||
|  | 			if (!vma->vm_file) | ||||||
|  | 				continue; | ||||||
|  | 			if (path_equal(&vma->vm_file->f_path, | ||||||
|  | 				       &old_exe_file->f_path)) | ||||||
|  | 				ret = -EBUSY; | ||||||
|  | 		} | ||||||
|  | 		mmap_read_unlock(mm); | ||||||
|  | 		fput(old_exe_file); | ||||||
|  | 		if (ret) | ||||||
|  | 			return ret; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	/* set the new file, lockless */ | ||||||
|  | 	ret = deny_write_access(new_exe_file); | ||||||
|  | 	if (ret) | ||||||
|  | 		return -EACCES; | ||||||
|  | 	get_file(new_exe_file); | ||||||
|  | 
 | ||||||
|  | 	old_exe_file = xchg(&mm->exe_file, new_exe_file); | ||||||
|  | 	if (old_exe_file) { | ||||||
|  | 		/*
 | ||||||
|  | 		 * Don't race with dup_mmap() getting the file and disallowing | ||||||
|  | 		 * write access while someone might open the file writable. | ||||||
|  | 		 */ | ||||||
|  | 		mmap_read_lock(mm); | ||||||
|  | 		allow_write_access(old_exe_file); | ||||||
|  | 		fput(old_exe_file); | ||||||
|  | 		mmap_read_unlock(mm); | ||||||
|  | 	} | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
| 
 | 
 | ||||||
| /**
 | /**
 | ||||||
|  * get_mm_exe_file - acquire a reference to the mm's executable file |  * get_mm_exe_file - acquire a reference to the mm's executable file | ||||||
|  |  | ||||||
							
								
								
									
										33
									
								
								kernel/sys.c
									
									
									
									
									
								
							
							
						
						
									
										33
									
								
								kernel/sys.c
									
									
									
									
									
								
							|  | @ -1847,7 +1847,6 @@ SYSCALL_DEFINE1(umask, int, mask) | ||||||
| static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) | static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) | ||||||
| { | { | ||||||
| 	struct fd exe; | 	struct fd exe; | ||||||
| 	struct file *old_exe, *exe_file; |  | ||||||
| 	struct inode *inode; | 	struct inode *inode; | ||||||
| 	int err; | 	int err; | ||||||
| 
 | 
 | ||||||
|  | @ -1870,40 +1869,10 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) | ||||||
| 	if (err) | 	if (err) | ||||||
| 		goto exit; | 		goto exit; | ||||||
| 
 | 
 | ||||||
| 	/*
 | 	err = replace_mm_exe_file(mm, exe.file); | ||||||
| 	 * Forbid mm->exe_file change if old file still mapped. |  | ||||||
| 	 */ |  | ||||||
| 	exe_file = get_mm_exe_file(mm); |  | ||||||
| 	err = -EBUSY; |  | ||||||
| 	if (exe_file) { |  | ||||||
| 		struct vm_area_struct *vma; |  | ||||||
| 
 |  | ||||||
| 		mmap_read_lock(mm); |  | ||||||
| 		for (vma = mm->mmap; vma; vma = vma->vm_next) { |  | ||||||
| 			if (!vma->vm_file) |  | ||||||
| 				continue; |  | ||||||
| 			if (path_equal(&vma->vm_file->f_path, |  | ||||||
| 				       &exe_file->f_path)) |  | ||||||
| 				goto exit_err; |  | ||||||
| 		} |  | ||||||
| 
 |  | ||||||
| 		mmap_read_unlock(mm); |  | ||||||
| 		fput(exe_file); |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	err = 0; |  | ||||||
| 	/* set the new file, lockless */ |  | ||||||
| 	get_file(exe.file); |  | ||||||
| 	old_exe = xchg(&mm->exe_file, exe.file); |  | ||||||
| 	if (old_exe) |  | ||||||
| 		fput(old_exe); |  | ||||||
| exit: | exit: | ||||||
| 	fdput(exe); | 	fdput(exe); | ||||||
| 	return err; | 	return err; | ||||||
| exit_err: |  | ||||||
| 	mmap_read_unlock(mm); |  | ||||||
| 	fput(exe_file); |  | ||||||
| 	goto exit; |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  |  | ||||||
|  | @ -675,9 +675,8 @@ flags(void) | ||||||
| 			"uptodate|dirty|lru|active|swapbacked", | 			"uptodate|dirty|lru|active|swapbacked", | ||||||
| 			cmp_buffer); | 			cmp_buffer); | ||||||
| 
 | 
 | ||||||
| 	flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC | 	flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; | ||||||
| 			| VM_DENYWRITE; | 	test("read|exec|mayread|maywrite|mayexec", "%pGv", &flags); | ||||||
| 	test("read|exec|mayread|maywrite|mayexec|denywrite", "%pGv", &flags); |  | ||||||
| 
 | 
 | ||||||
| 	gfp = GFP_TRANSHUGE; | 	gfp = GFP_TRANSHUGE; | ||||||
| 	test("GFP_TRANSHUGE", "%pGg", &gfp); | 	test("GFP_TRANSHUGE", "%pGg", &gfp); | ||||||
|  |  | ||||||
							
								
								
									
										27
									
								
								mm/mmap.c
									
									
									
									
									
								
							
							
						
						
									
										27
									
								
								mm/mmap.c
									
									
									
									
									
								
							|  | @ -148,8 +148,6 @@ void vma_set_page_prot(struct vm_area_struct *vma) | ||||||
| static void __remove_shared_vm_struct(struct vm_area_struct *vma, | static void __remove_shared_vm_struct(struct vm_area_struct *vma, | ||||||
| 		struct file *file, struct address_space *mapping) | 		struct file *file, struct address_space *mapping) | ||||||
| { | { | ||||||
| 	if (vma->vm_flags & VM_DENYWRITE) |  | ||||||
| 		allow_write_access(file); |  | ||||||
| 	if (vma->vm_flags & VM_SHARED) | 	if (vma->vm_flags & VM_SHARED) | ||||||
| 		mapping_unmap_writable(mapping); | 		mapping_unmap_writable(mapping); | ||||||
| 
 | 
 | ||||||
|  | @ -667,8 +665,6 @@ static void __vma_link_file(struct vm_area_struct *vma) | ||||||
| 	if (file) { | 	if (file) { | ||||||
| 		struct address_space *mapping = file->f_mapping; | 		struct address_space *mapping = file->f_mapping; | ||||||
| 
 | 
 | ||||||
| 		if (vma->vm_flags & VM_DENYWRITE) |  | ||||||
| 			put_write_access(file_inode(file)); |  | ||||||
| 		if (vma->vm_flags & VM_SHARED) | 		if (vma->vm_flags & VM_SHARED) | ||||||
| 			mapping_allow_writable(mapping); | 			mapping_allow_writable(mapping); | ||||||
| 
 | 
 | ||||||
|  | @ -1625,8 +1621,6 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len, | ||||||
| 			return PTR_ERR(file); | 			return PTR_ERR(file); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	flags &= ~MAP_DENYWRITE; |  | ||||||
| 
 |  | ||||||
| 	retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff); | 	retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff); | ||||||
| out_fput: | out_fput: | ||||||
| 	if (file) | 	if (file) | ||||||
|  | @ -1783,22 +1777,12 @@ unsigned long mmap_region(struct file *file, unsigned long addr, | ||||||
| 	vma->vm_pgoff = pgoff; | 	vma->vm_pgoff = pgoff; | ||||||
| 
 | 
 | ||||||
| 	if (file) { | 	if (file) { | ||||||
| 		if (vm_flags & VM_DENYWRITE) { |  | ||||||
| 			error = deny_write_access(file); |  | ||||||
| 			if (error) |  | ||||||
| 				goto free_vma; |  | ||||||
| 		} |  | ||||||
| 		if (vm_flags & VM_SHARED) { | 		if (vm_flags & VM_SHARED) { | ||||||
| 			error = mapping_map_writable(file->f_mapping); | 			error = mapping_map_writable(file->f_mapping); | ||||||
| 			if (error) | 			if (error) | ||||||
| 				goto allow_write_and_free_vma; | 				goto free_vma; | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
| 		/* ->mmap() can change vma->vm_file, but must guarantee that
 |  | ||||||
| 		 * vma_link() below can deny write-access if VM_DENYWRITE is set |  | ||||||
| 		 * and map writably if VM_SHARED is set. This usually means the |  | ||||||
| 		 * new file must not have been exposed to user-space, yet. |  | ||||||
| 		 */ |  | ||||||
| 		vma->vm_file = get_file(file); | 		vma->vm_file = get_file(file); | ||||||
| 		error = call_mmap(file, vma); | 		error = call_mmap(file, vma); | ||||||
| 		if (error) | 		if (error) | ||||||
|  | @ -1855,13 +1839,9 @@ unsigned long mmap_region(struct file *file, unsigned long addr, | ||||||
| 
 | 
 | ||||||
| 	vma_link(mm, vma, prev, rb_link, rb_parent); | 	vma_link(mm, vma, prev, rb_link, rb_parent); | ||||||
| 	/* Once vma denies write, undo our temporary denial count */ | 	/* Once vma denies write, undo our temporary denial count */ | ||||||
| 	if (file) { |  | ||||||
| unmap_writable: | unmap_writable: | ||||||
| 		if (vm_flags & VM_SHARED) | 	if (file && vm_flags & VM_SHARED) | ||||||
| 		mapping_unmap_writable(file->f_mapping); | 		mapping_unmap_writable(file->f_mapping); | ||||||
| 		if (vm_flags & VM_DENYWRITE) |  | ||||||
| 			allow_write_access(file); |  | ||||||
| 	} |  | ||||||
| 	file = vma->vm_file; | 	file = vma->vm_file; | ||||||
| out: | out: | ||||||
| 	perf_event_mmap(vma); | 	perf_event_mmap(vma); | ||||||
|  | @ -1901,9 +1881,6 @@ unsigned long mmap_region(struct file *file, unsigned long addr, | ||||||
| 	charged = 0; | 	charged = 0; | ||||||
| 	if (vm_flags & VM_SHARED) | 	if (vm_flags & VM_SHARED) | ||||||
| 		mapping_unmap_writable(file->f_mapping); | 		mapping_unmap_writable(file->f_mapping); | ||||||
| allow_write_and_free_vma: |  | ||||||
| 	if (vm_flags & VM_DENYWRITE) |  | ||||||
| 		allow_write_access(file); |  | ||||||
| free_vma: | free_vma: | ||||||
| 	vm_area_free(vma); | 	vm_area_free(vma); | ||||||
| unacct_error: | unacct_error: | ||||||
|  |  | ||||||
|  | @ -1293,8 +1293,6 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len, | ||||||
| 			goto out; | 			goto out; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	flags &= ~MAP_DENYWRITE; |  | ||||||
| 
 |  | ||||||
| 	retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff); | 	retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff); | ||||||
| 
 | 
 | ||||||
| 	if (file) | 	if (file) | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Linus Torvalds
						Linus Torvalds