mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	sysctl: Improve the sysctl sanity checks
- Stop validating subdirectories now that we only register leaf tables
- Cleanup and improve the duplicate filename check.
  * Run the duplicate filename check under the sysctl_lock to guarantee
    we never add duplicate names.
  * Reduce the duplicate filename check to nearly O(M*N) where M is the
    number of entries in tthe table we are registering and N is the
    number of entries in the directory before we got there.
- Move the duplicate filename check into it's own function and call
  it directtly from __register_sysctl_table
- Kill the config option as the sanity checks are now cheap enough
  the config option is unnecessary. The original reason for the config
  option was because we had a huge table used to verify the proc filename
  to binary sysctl mapping.  That table has now evolved into the binary_sysctl
  translation layer and is no longer part of the sysctl_check code.
- Tighten up the permission checks.  Guarnateeing that files only have read
  or write permissions.
- Removed redudant check for parents having a procname as now everything has
  a procname.
- Generalize the backtrace logic so that we print a backtrace from
  any failure of __register_sysctl_table that was not caused by
  a memmory allocation failure.  The backtrace allows us to track
  down who erroneously registered a sysctl table.
Bechmark before (CONFIG_SYSCTL_CHECK=y):
    make-dummies 0 999 -> 12s
    rmmod dummy        -> 0.08s
Bechmark before (CONFIG_SYSCTL_CHECK=n):
    make-dummies 0 999 -> 0.7s
    rmmod dummy        -> 0.06s
    make-dummies 0 99999 -> 1m13s
    rmmod dummy          -> 0.38s
Benchmark after:
    make-dummies 0 999 -> 0.65s
    rmmod dummy        -> 0.055s
    make-dummies 0 9999 -> 1m10s
    rmmod dummy         -> 0.39s
The sysctl sanity checks now impose no measurable cost.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
			
			
This commit is contained in:
		
							parent
							
								
									f728019bb7
								
							
						
					
					
						commit
						7c60c48f58
					
				
					 2 changed files with 102 additions and 160 deletions
				
			
		| 
						 | 
				
			
			@ -726,160 +726,106 @@ static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q)
 | 
			
		|||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#ifdef CONFIG_SYSCTL_SYSCALL_CHECK
 | 
			
		||||
static int sysctl_depth(struct ctl_table *table)
 | 
			
		||||
{
 | 
			
		||||
	struct ctl_table *tmp;
 | 
			
		||||
	int depth;
 | 
			
		||||
 | 
			
		||||
	depth = 0;
 | 
			
		||||
	for (tmp = table; tmp->parent; tmp = tmp->parent)
 | 
			
		||||
		depth++;
 | 
			
		||||
 | 
			
		||||
	return depth;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static struct ctl_table *sysctl_parent(struct ctl_table *table, int n)
 | 
			
		||||
{
 | 
			
		||||
	int i;
 | 
			
		||||
 | 
			
		||||
	for (i = 0; table && i < n; i++)
 | 
			
		||||
		table = table->parent;
 | 
			
		||||
 | 
			
		||||
	return table;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
static void sysctl_print_path(struct ctl_table *table)
 | 
			
		||||
{
 | 
			
		||||
	struct ctl_table *tmp;
 | 
			
		||||
	int depth, i;
 | 
			
		||||
	depth = sysctl_depth(table);
 | 
			
		||||
	if (table->procname) {
 | 
			
		||||
		for (i = depth; i >= 0; i--) {
 | 
			
		||||
			tmp = sysctl_parent(table, i);
 | 
			
		||||
			printk("/%s", tmp->procname?tmp->procname:"");
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	printk(" ");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static struct ctl_table *sysctl_check_lookup(struct nsproxy *namespaces,
 | 
			
		||||
						struct ctl_table *table)
 | 
			
		||||
{
 | 
			
		||||
	struct ctl_table_header *head;
 | 
			
		||||
	struct ctl_table *ref, *test;
 | 
			
		||||
	int depth, cur_depth;
 | 
			
		||||
 | 
			
		||||
	depth = sysctl_depth(table);
 | 
			
		||||
 | 
			
		||||
	for (head = __sysctl_head_next(namespaces, NULL); head;
 | 
			
		||||
	     head = __sysctl_head_next(namespaces, head)) {
 | 
			
		||||
		cur_depth = depth;
 | 
			
		||||
		ref = head->ctl_table;
 | 
			
		||||
repeat:
 | 
			
		||||
		test = sysctl_parent(table, cur_depth);
 | 
			
		||||
		for (; ref->procname; ref++) {
 | 
			
		||||
			int match = 0;
 | 
			
		||||
			if (cur_depth && !ref->child)
 | 
			
		||||
				continue;
 | 
			
		||||
 | 
			
		||||
			if (test->procname && ref->procname &&
 | 
			
		||||
			    (strcmp(test->procname, ref->procname) == 0))
 | 
			
		||||
					match++;
 | 
			
		||||
 | 
			
		||||
			if (match) {
 | 
			
		||||
				if (cur_depth != 0) {
 | 
			
		||||
					cur_depth--;
 | 
			
		||||
					ref = ref->child;
 | 
			
		||||
					goto repeat;
 | 
			
		||||
				}
 | 
			
		||||
				goto out;
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	ref = NULL;
 | 
			
		||||
out:
 | 
			
		||||
	sysctl_head_finish(head);
 | 
			
		||||
	return ref;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void set_fail(const char **fail, struct ctl_table *table, const char *str)
 | 
			
		||||
{
 | 
			
		||||
	if (*fail) {
 | 
			
		||||
		printk(KERN_ERR "sysctl table check failed: ");
 | 
			
		||||
		sysctl_print_path(table);
 | 
			
		||||
		printk(" %s\n", *fail);
 | 
			
		||||
		dump_stack();
 | 
			
		||||
	}
 | 
			
		||||
	*fail = str;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void sysctl_check_leaf(struct nsproxy *namespaces,
 | 
			
		||||
				struct ctl_table *table, const char **fail)
 | 
			
		||||
{
 | 
			
		||||
	struct ctl_table *ref;
 | 
			
		||||
 | 
			
		||||
	ref = sysctl_check_lookup(namespaces, table);
 | 
			
		||||
	if (ref && (ref != table))
 | 
			
		||||
		set_fail(fail, table, "Sysctl already exists");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table)
 | 
			
		||||
static int sysctl_check_table_dups(const char *path, struct ctl_table *old,
 | 
			
		||||
	struct ctl_table *table)
 | 
			
		||||
{
 | 
			
		||||
	struct ctl_table *entry, *test;
 | 
			
		||||
	int error = 0;
 | 
			
		||||
	for (; table->procname; table++) {
 | 
			
		||||
		const char *fail = NULL;
 | 
			
		||||
 | 
			
		||||
		if (table->parent) {
 | 
			
		||||
			if (!table->parent->procname)
 | 
			
		||||
				set_fail(&fail, table, "Parent without procname");
 | 
			
		||||
		}
 | 
			
		||||
		if (table->child) {
 | 
			
		||||
			if (table->data)
 | 
			
		||||
				set_fail(&fail, table, "Directory with data?");
 | 
			
		||||
			if (table->maxlen)
 | 
			
		||||
				set_fail(&fail, table, "Directory with maxlen?");
 | 
			
		||||
			if ((table->mode & (S_IRUGO|S_IXUGO)) != table->mode)
 | 
			
		||||
				set_fail(&fail, table, "Writable sysctl directory");
 | 
			
		||||
			if (table->proc_handler)
 | 
			
		||||
				set_fail(&fail, table, "Directory with proc_handler");
 | 
			
		||||
			if (table->extra1)
 | 
			
		||||
				set_fail(&fail, table, "Directory with extra1");
 | 
			
		||||
			if (table->extra2)
 | 
			
		||||
				set_fail(&fail, table, "Directory with extra2");
 | 
			
		||||
		} else {
 | 
			
		||||
			if ((table->proc_handler == proc_dostring) ||
 | 
			
		||||
			    (table->proc_handler == proc_dointvec) ||
 | 
			
		||||
			    (table->proc_handler == proc_dointvec_minmax) ||
 | 
			
		||||
			    (table->proc_handler == proc_dointvec_jiffies) ||
 | 
			
		||||
			    (table->proc_handler == proc_dointvec_userhz_jiffies) ||
 | 
			
		||||
			    (table->proc_handler == proc_dointvec_ms_jiffies) ||
 | 
			
		||||
			    (table->proc_handler == proc_doulongvec_minmax) ||
 | 
			
		||||
			    (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) {
 | 
			
		||||
				if (!table->data)
 | 
			
		||||
					set_fail(&fail, table, "No data");
 | 
			
		||||
				if (!table->maxlen)
 | 
			
		||||
					set_fail(&fail, table, "No maxlen");
 | 
			
		||||
	for (entry = old; entry->procname; entry++) {
 | 
			
		||||
		for (test = table; test->procname; test++) {
 | 
			
		||||
			if (strcmp(entry->procname, test->procname) == 0) {
 | 
			
		||||
				printk(KERN_ERR "sysctl duplicate entry: %s/%s\n",
 | 
			
		||||
					path, test->procname);
 | 
			
		||||
				error = -EEXIST;
 | 
			
		||||
			}
 | 
			
		||||
#ifdef CONFIG_PROC_SYSCTL
 | 
			
		||||
			if (!table->proc_handler)
 | 
			
		||||
				set_fail(&fail, table, "No proc_handler");
 | 
			
		||||
#endif
 | 
			
		||||
			sysctl_check_leaf(namespaces, table, &fail);
 | 
			
		||||
		}
 | 
			
		||||
		if (table->mode > 0777)
 | 
			
		||||
			set_fail(&fail, table, "bogus .mode");
 | 
			
		||||
		if (fail) {
 | 
			
		||||
			set_fail(&fail, table, NULL);
 | 
			
		||||
			error = -EINVAL;
 | 
			
		||||
		}
 | 
			
		||||
		if (table->child)
 | 
			
		||||
			error |= sysctl_check_table(namespaces, table->child);
 | 
			
		||||
	}
 | 
			
		||||
	return error;
 | 
			
		||||
}
 | 
			
		||||
#endif /* CONFIG_SYSCTL_SYSCALL_CHECK */
 | 
			
		||||
 | 
			
		||||
static int sysctl_check_dups(struct nsproxy *namespaces,
 | 
			
		||||
	struct ctl_table_header *header,
 | 
			
		||||
	const char *path, struct ctl_table *table)
 | 
			
		||||
{
 | 
			
		||||
	struct ctl_table_root *root;
 | 
			
		||||
	struct ctl_table_set *set;
 | 
			
		||||
	struct ctl_table_header *dir_head, *head;
 | 
			
		||||
	struct ctl_table *dir_table;
 | 
			
		||||
	int error = 0;
 | 
			
		||||
 | 
			
		||||
	/* No dups if we are the only member of our directory */
 | 
			
		||||
	if (header->attached_by != table)
 | 
			
		||||
		return 0;
 | 
			
		||||
 | 
			
		||||
	dir_head = header->parent;
 | 
			
		||||
	dir_table = header->attached_to;
 | 
			
		||||
 | 
			
		||||
	error = sysctl_check_table_dups(path, dir_table, table);
 | 
			
		||||
 | 
			
		||||
	root = &sysctl_table_root;
 | 
			
		||||
	do {
 | 
			
		||||
		set = lookup_header_set(root, namespaces);
 | 
			
		||||
 | 
			
		||||
		list_for_each_entry(head, &set->list, ctl_entry) {
 | 
			
		||||
			if (head->unregistering)
 | 
			
		||||
				continue;
 | 
			
		||||
			if (head->attached_to != dir_table)
 | 
			
		||||
				continue;
 | 
			
		||||
			error = sysctl_check_table_dups(path, head->attached_by,
 | 
			
		||||
							table);
 | 
			
		||||
		}
 | 
			
		||||
		root = list_entry(root->root_list.next,
 | 
			
		||||
				  struct ctl_table_root, root_list);
 | 
			
		||||
	} while (root != &sysctl_table_root);
 | 
			
		||||
	return error;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...)
 | 
			
		||||
{
 | 
			
		||||
	struct va_format vaf;
 | 
			
		||||
	va_list args;
 | 
			
		||||
 | 
			
		||||
	va_start(args, fmt);
 | 
			
		||||
	vaf.fmt = fmt;
 | 
			
		||||
	vaf.va = &args;
 | 
			
		||||
 | 
			
		||||
	printk(KERN_ERR "sysctl table check failed: %s/%s %pV\n",
 | 
			
		||||
		path, table->procname, &vaf);
 | 
			
		||||
 | 
			
		||||
	va_end(args);
 | 
			
		||||
	return -EINVAL;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int sysctl_check_table(const char *path, struct ctl_table *table)
 | 
			
		||||
{
 | 
			
		||||
	int err = 0;
 | 
			
		||||
	for (; table->procname; table++) {
 | 
			
		||||
		if (table->child)
 | 
			
		||||
			err = sysctl_err(path, table, "Not a file");
 | 
			
		||||
 | 
			
		||||
		if ((table->proc_handler == proc_dostring) ||
 | 
			
		||||
		    (table->proc_handler == proc_dointvec) ||
 | 
			
		||||
		    (table->proc_handler == proc_dointvec_minmax) ||
 | 
			
		||||
		    (table->proc_handler == proc_dointvec_jiffies) ||
 | 
			
		||||
		    (table->proc_handler == proc_dointvec_userhz_jiffies) ||
 | 
			
		||||
		    (table->proc_handler == proc_dointvec_ms_jiffies) ||
 | 
			
		||||
		    (table->proc_handler == proc_doulongvec_minmax) ||
 | 
			
		||||
		    (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) {
 | 
			
		||||
			if (!table->data)
 | 
			
		||||
				err = sysctl_err(path, table, "No data");
 | 
			
		||||
			if (!table->maxlen)
 | 
			
		||||
				err = sysctl_err(path, table, "No maxlen");
 | 
			
		||||
		}
 | 
			
		||||
		if (!table->proc_handler)
 | 
			
		||||
			err = sysctl_err(path, table, "No proc_handler");
 | 
			
		||||
 | 
			
		||||
		if ((table->mode & (S_IRUGO|S_IWUGO)) != table->mode)
 | 
			
		||||
			err = sysctl_err(path, table, "bogus .mode 0%o",
 | 
			
		||||
				table->mode);
 | 
			
		||||
	}
 | 
			
		||||
	return err;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * __register_sysctl_table - register a leaf sysctl table
 | 
			
		||||
| 
						 | 
				
			
			@ -1003,12 +949,8 @@ struct ctl_table_header *__register_sysctl_table(
 | 
			
		|||
	header->root = root;
 | 
			
		||||
	sysctl_set_parent(NULL, header->ctl_table);
 | 
			
		||||
	header->count = 1;
 | 
			
		||||
#ifdef CONFIG_SYSCTL_SYSCALL_CHECK
 | 
			
		||||
	if (sysctl_check_table(namespaces, header->ctl_table)) {
 | 
			
		||||
		kfree(header);
 | 
			
		||||
		return NULL;
 | 
			
		||||
	}
 | 
			
		||||
#endif
 | 
			
		||||
	if (sysctl_check_table(path, table))
 | 
			
		||||
		goto fail;
 | 
			
		||||
	spin_lock(&sysctl_lock);
 | 
			
		||||
	header->set = lookup_header_set(root, namespaces);
 | 
			
		||||
	header->attached_by = header->ctl_table;
 | 
			
		||||
| 
						 | 
				
			
			@ -1029,11 +971,19 @@ struct ctl_table_header *__register_sysctl_table(
 | 
			
		|||
				  struct ctl_table_root, root_list);
 | 
			
		||||
		set = lookup_header_set(root, namespaces);
 | 
			
		||||
	}
 | 
			
		||||
	if (sysctl_check_dups(namespaces, header, path, table))
 | 
			
		||||
		goto fail_locked;
 | 
			
		||||
	header->parent->count++;
 | 
			
		||||
	list_add_tail(&header->ctl_entry, &header->set->list);
 | 
			
		||||
	spin_unlock(&sysctl_lock);
 | 
			
		||||
 | 
			
		||||
	return header;
 | 
			
		||||
fail_locked:
 | 
			
		||||
	spin_unlock(&sysctl_lock);
 | 
			
		||||
fail:
 | 
			
		||||
	kfree(header);
 | 
			
		||||
	dump_stack();
 | 
			
		||||
	return NULL;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static char *append_path(const char *path, char *pos, const char *name)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1113,14 +1113,6 @@ config LATENCYTOP
 | 
			
		|||
	  Enable this option if you want to use the LatencyTOP tool
 | 
			
		||||
	  to find out which userspace is blocking on what kernel operations.
 | 
			
		||||
 | 
			
		||||
config SYSCTL_SYSCALL_CHECK
 | 
			
		||||
	bool "Sysctl checks"
 | 
			
		||||
	depends on SYSCTL
 | 
			
		||||
	---help---
 | 
			
		||||
	  sys_sysctl uses binary paths that have been found challenging
 | 
			
		||||
	  to properly maintain and use. This enables checks that help
 | 
			
		||||
	  you to keep things correct.
 | 
			
		||||
 | 
			
		||||
source mm/Kconfig.debug
 | 
			
		||||
source kernel/trace/Kconfig
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue