mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	Typical PDE creation code looks like:
	pde = create_proc_entry("foo", 0, NULL);
	if (pde)
		pde->proc_fops = &foo_proc_fops;
Notice that PDE is first created, only then ->proc_fops is set up to
final value. This is a problem because right after creation
a) PDE is fully visible in /proc , and
b) ->proc_fops are proc_file_operations which do not have ->open callback. So, it's
   possible to ->read without ->open (see one class of oopses below).
The fix is new API called proc_create() which makes sure ->proc_fops are
set up before gluing PDE to main tree. Typical new code looks like:
	pde = proc_create("foo", 0, NULL, &foo_proc_fops);
	if (!pde)
		return -ENOMEM;
Fix most networking users for a start.
In the long run, create_proc_entry() for regular files will go.
BUG: unable to handle kernel NULL pointer dereference at virtual address 00000024
printing eip: c1188c1b *pdpt = 000000002929e001 *pde = 0000000000000000
Oops: 0002 [#1] PREEMPT SMP DEBUG_PAGEALLOC
last sysfs file: /sys/block/sda/sda1/dev
Modules linked in: foo af_packet ipv6 cpufreq_ondemand loop serio_raw psmouse k8temp hwmon sr_mod cdrom
Pid: 24679, comm: cat Not tainted (2.6.24-rc3-mm1 #2)
EIP: 0060:[<c1188c1b>] EFLAGS: 00210002 CPU: 0
EIP is at mutex_lock_nested+0x75/0x25d
EAX: 000006fe EBX: fffffffb ECX: 00001000 EDX: e9340570
ESI: 00000020 EDI: 00200246 EBP: e9340570 ESP: e8ea1ef8
 DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068
Process cat (pid: 24679, ti=E8EA1000 task=E9340570 task.ti=E8EA1000)
Stack: 00000000 c106f7ce e8ee05b4 00000000 00000001 458003d0 f6fb6f20 fffffffb
       00000000 c106f7aa 00001000 c106f7ce 08ae9000 f6db53f0 00000020 00200246
       00000000 00000002 00000000 00200246 00200246 e8ee05a0 fffffffb e8ee0550
Call Trace:
 [<c106f7ce>] seq_read+0x24/0x28a
 [<c106f7aa>] seq_read+0x0/0x28a
 [<c106f7ce>] seq_read+0x24/0x28a
 [<c106f7aa>] seq_read+0x0/0x28a
 [<c10818b8>] proc_reg_read+0x60/0x73
 [<c1081858>] proc_reg_read+0x0/0x73
 [<c105a34f>] vfs_read+0x6c/0x8b
 [<c105a6f3>] sys_read+0x3c/0x63
 [<c10025f2>] sysenter_past_esp+0x5f/0xa5
 [<c10697a7>] destroy_inode+0x24/0x33
 =======================
INFO: lockdep is turned off.
Code: 75 21 68 e1 1a 19 c1 68 87 00 00 00 68 b8 e8 1f c1 68 25 73 1f c1 e8 84 06 e9 ff e8 52 b8 e7 ff 83 c4 10 9c 5f fa e8 28 89 ea ff <f0> fe 4e 04 79 0a f3 90 80 7e 04 00 7e f8 eb f0 39 76 34 74 33
EIP: [<c1188c1b>] mutex_lock_nested+0x75/0x25d SS:ESP 0068:e8ea1ef8
[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Alexey Dobriyan <adobriyan@sw.ru>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
		
	
			
		
			
				
	
	
		
			240 lines
		
	
	
	
		
			5.2 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			240 lines
		
	
	
	
		
			5.2 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/*
 | 
						|
 *  linux/fs/proc/root.c
 | 
						|
 *
 | 
						|
 *  Copyright (C) 1991, 1992 Linus Torvalds
 | 
						|
 *
 | 
						|
 *  proc root directory handling functions
 | 
						|
 */
 | 
						|
 | 
						|
#include <asm/uaccess.h>
 | 
						|
 | 
						|
#include <linux/errno.h>
 | 
						|
#include <linux/time.h>
 | 
						|
#include <linux/proc_fs.h>
 | 
						|
#include <linux/stat.h>
 | 
						|
#include <linux/init.h>
 | 
						|
#include <linux/sched.h>
 | 
						|
#include <linux/module.h>
 | 
						|
#include <linux/bitops.h>
 | 
						|
#include <linux/smp_lock.h>
 | 
						|
#include <linux/mount.h>
 | 
						|
#include <linux/pid_namespace.h>
 | 
						|
 | 
						|
#include "internal.h"
 | 
						|
 | 
						|
struct proc_dir_entry *proc_bus, *proc_root_fs, *proc_root_driver;
 | 
						|
 | 
						|
static int proc_test_super(struct super_block *sb, void *data)
 | 
						|
{
 | 
						|
	return sb->s_fs_info == data;
 | 
						|
}
 | 
						|
 | 
						|
static int proc_set_super(struct super_block *sb, void *data)
 | 
						|
{
 | 
						|
	struct pid_namespace *ns;
 | 
						|
 | 
						|
	ns = (struct pid_namespace *)data;
 | 
						|
	sb->s_fs_info = get_pid_ns(ns);
 | 
						|
	return set_anon_super(sb, NULL);
 | 
						|
}
 | 
						|
 | 
						|
static int proc_get_sb(struct file_system_type *fs_type,
 | 
						|
	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 | 
						|
{
 | 
						|
	int err;
 | 
						|
	struct super_block *sb;
 | 
						|
	struct pid_namespace *ns;
 | 
						|
	struct proc_inode *ei;
 | 
						|
 | 
						|
	if (proc_mnt) {
 | 
						|
		/* Seed the root directory with a pid so it doesn't need
 | 
						|
		 * to be special in base.c.  I would do this earlier but
 | 
						|
		 * the only task alive when /proc is mounted the first time
 | 
						|
		 * is the init_task and it doesn't have any pids.
 | 
						|
		 */
 | 
						|
		ei = PROC_I(proc_mnt->mnt_sb->s_root->d_inode);
 | 
						|
		if (!ei->pid)
 | 
						|
			ei->pid = find_get_pid(1);
 | 
						|
	}
 | 
						|
 | 
						|
	if (flags & MS_KERNMOUNT)
 | 
						|
		ns = (struct pid_namespace *)data;
 | 
						|
	else
 | 
						|
		ns = current->nsproxy->pid_ns;
 | 
						|
 | 
						|
	sb = sget(fs_type, proc_test_super, proc_set_super, ns);
 | 
						|
	if (IS_ERR(sb))
 | 
						|
		return PTR_ERR(sb);
 | 
						|
 | 
						|
	if (!sb->s_root) {
 | 
						|
		sb->s_flags = flags;
 | 
						|
		err = proc_fill_super(sb);
 | 
						|
		if (err) {
 | 
						|
			up_write(&sb->s_umount);
 | 
						|
			deactivate_super(sb);
 | 
						|
			return err;
 | 
						|
		}
 | 
						|
 | 
						|
		ei = PROC_I(sb->s_root->d_inode);
 | 
						|
		if (!ei->pid) {
 | 
						|
			rcu_read_lock();
 | 
						|
			ei->pid = get_pid(find_pid_ns(1, ns));
 | 
						|
			rcu_read_unlock();
 | 
						|
		}
 | 
						|
 | 
						|
		sb->s_flags |= MS_ACTIVE;
 | 
						|
		ns->proc_mnt = mnt;
 | 
						|
	}
 | 
						|
 | 
						|
	return simple_set_mnt(mnt, sb);
 | 
						|
}
 | 
						|
 | 
						|
static void proc_kill_sb(struct super_block *sb)
 | 
						|
{
 | 
						|
	struct pid_namespace *ns;
 | 
						|
 | 
						|
	ns = (struct pid_namespace *)sb->s_fs_info;
 | 
						|
	kill_anon_super(sb);
 | 
						|
	put_pid_ns(ns);
 | 
						|
}
 | 
						|
 | 
						|
static struct file_system_type proc_fs_type = {
 | 
						|
	.name		= "proc",
 | 
						|
	.get_sb		= proc_get_sb,
 | 
						|
	.kill_sb	= proc_kill_sb,
 | 
						|
};
 | 
						|
 | 
						|
void __init proc_root_init(void)
 | 
						|
{
 | 
						|
	int err = proc_init_inodecache();
 | 
						|
	if (err)
 | 
						|
		return;
 | 
						|
	err = register_filesystem(&proc_fs_type);
 | 
						|
	if (err)
 | 
						|
		return;
 | 
						|
	proc_mnt = kern_mount_data(&proc_fs_type, &init_pid_ns);
 | 
						|
	err = PTR_ERR(proc_mnt);
 | 
						|
	if (IS_ERR(proc_mnt)) {
 | 
						|
		unregister_filesystem(&proc_fs_type);
 | 
						|
		return;
 | 
						|
	}
 | 
						|
 | 
						|
	proc_misc_init();
 | 
						|
 | 
						|
	proc_net_init();
 | 
						|
 | 
						|
#ifdef CONFIG_SYSVIPC
 | 
						|
	proc_mkdir("sysvipc", NULL);
 | 
						|
#endif
 | 
						|
	proc_root_fs = proc_mkdir("fs", NULL);
 | 
						|
	proc_root_driver = proc_mkdir("driver", NULL);
 | 
						|
	proc_mkdir("fs/nfsd", NULL); /* somewhere for the nfsd filesystem to be mounted */
 | 
						|
#if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE)
 | 
						|
	/* just give it a mountpoint */
 | 
						|
	proc_mkdir("openprom", NULL);
 | 
						|
#endif
 | 
						|
	proc_tty_init();
 | 
						|
#ifdef CONFIG_PROC_DEVICETREE
 | 
						|
	proc_device_tree_init();
 | 
						|
#endif
 | 
						|
	proc_bus = proc_mkdir("bus", NULL);
 | 
						|
	proc_sys_init();
 | 
						|
}
 | 
						|
 | 
						|
static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat
 | 
						|
)
 | 
						|
{
 | 
						|
	generic_fillattr(dentry->d_inode, stat);
 | 
						|
	stat->nlink = proc_root.nlink + nr_processes();
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
 | 
						|
static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd)
 | 
						|
{
 | 
						|
	if (!proc_lookup(dir, dentry, nd)) {
 | 
						|
		return NULL;
 | 
						|
	}
 | 
						|
	
 | 
						|
	return proc_pid_lookup(dir, dentry, nd);
 | 
						|
}
 | 
						|
 | 
						|
static int proc_root_readdir(struct file * filp,
 | 
						|
	void * dirent, filldir_t filldir)
 | 
						|
{
 | 
						|
	unsigned int nr = filp->f_pos;
 | 
						|
	int ret;
 | 
						|
 | 
						|
	lock_kernel();
 | 
						|
 | 
						|
	if (nr < FIRST_PROCESS_ENTRY) {
 | 
						|
		int error = proc_readdir(filp, dirent, filldir);
 | 
						|
		if (error <= 0) {
 | 
						|
			unlock_kernel();
 | 
						|
			return error;
 | 
						|
		}
 | 
						|
		filp->f_pos = FIRST_PROCESS_ENTRY;
 | 
						|
	}
 | 
						|
	unlock_kernel();
 | 
						|
 | 
						|
	ret = proc_pid_readdir(filp, dirent, filldir);
 | 
						|
	return ret;
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * The root /proc directory is special, as it has the
 | 
						|
 * <pid> directories. Thus we don't use the generic
 | 
						|
 * directory handling functions for that..
 | 
						|
 */
 | 
						|
static const struct file_operations proc_root_operations = {
 | 
						|
	.read		 = generic_read_dir,
 | 
						|
	.readdir	 = proc_root_readdir,
 | 
						|
};
 | 
						|
 | 
						|
/*
 | 
						|
 * proc root can do almost nothing..
 | 
						|
 */
 | 
						|
static const struct inode_operations proc_root_inode_operations = {
 | 
						|
	.lookup		= proc_root_lookup,
 | 
						|
	.getattr	= proc_root_getattr,
 | 
						|
};
 | 
						|
 | 
						|
/*
 | 
						|
 * This is the root "inode" in the /proc tree..
 | 
						|
 */
 | 
						|
struct proc_dir_entry proc_root = {
 | 
						|
	.low_ino	= PROC_ROOT_INO, 
 | 
						|
	.namelen	= 5, 
 | 
						|
	.name		= "/proc",
 | 
						|
	.mode		= S_IFDIR | S_IRUGO | S_IXUGO, 
 | 
						|
	.nlink		= 2, 
 | 
						|
	.count		= ATOMIC_INIT(1),
 | 
						|
	.proc_iops	= &proc_root_inode_operations, 
 | 
						|
	.proc_fops	= &proc_root_operations,
 | 
						|
	.parent		= &proc_root,
 | 
						|
};
 | 
						|
 | 
						|
int pid_ns_prepare_proc(struct pid_namespace *ns)
 | 
						|
{
 | 
						|
	struct vfsmount *mnt;
 | 
						|
 | 
						|
	mnt = kern_mount_data(&proc_fs_type, ns);
 | 
						|
	if (IS_ERR(mnt))
 | 
						|
		return PTR_ERR(mnt);
 | 
						|
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
 | 
						|
void pid_ns_release_proc(struct pid_namespace *ns)
 | 
						|
{
 | 
						|
	mntput(ns->proc_mnt);
 | 
						|
}
 | 
						|
 | 
						|
EXPORT_SYMBOL(proc_symlink);
 | 
						|
EXPORT_SYMBOL(proc_mkdir);
 | 
						|
EXPORT_SYMBOL(create_proc_entry);
 | 
						|
EXPORT_SYMBOL(proc_create);
 | 
						|
EXPORT_SYMBOL(remove_proc_entry);
 | 
						|
EXPORT_SYMBOL(proc_root);
 | 
						|
EXPORT_SYMBOL(proc_root_fs);
 | 
						|
EXPORT_SYMBOL(proc_bus);
 | 
						|
EXPORT_SYMBOL(proc_root_driver);
 |