forked from mirrors/linux
		
	[PATCH] Implement sys_* do_* layering in the memory policy layer.
- Do a separation between do_xxx and sys_xxx functions. sys_xxx functions take variable sized bitmaps from user space as arguments. do_xxx functions take fixed sized nodemask_t as arguments and may be used from inside the kernel. Doing so simplifies the initialization code. There is no fs = kernel_ds assumption anymore. - Split up get_nodes into get_nodes (which gets the node list) and contextualize_policy which restricts the nodes to those accessible to the task and updates cpusets. - Add comments explaining limitations of bind policy Signed-off-by: Christoph Lameter <clameter@sgi.com> Cc: Andi Kleen <ak@muc.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
		
							parent
							
								
									bb7e7e032d
								
							
						
					
					
						commit
						8bccd85ffb
					
				
					 1 changed files with 162 additions and 114 deletions
				
			
		
							
								
								
									
										276
									
								
								mm/mempolicy.c
									
									
									
									
									
								
							
							
						
						
									
										276
									
								
								mm/mempolicy.c
									
									
									
									
									
								
							| 
						 | 
					@ -2,6 +2,7 @@
 | 
				
			||||||
 * Simple NUMA memory policy for the Linux kernel.
 | 
					 * Simple NUMA memory policy for the Linux kernel.
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * Copyright 2003,2004 Andi Kleen, SuSE Labs.
 | 
					 * Copyright 2003,2004 Andi Kleen, SuSE Labs.
 | 
				
			||||||
 | 
					 * (C) Copyright 2005 Christoph Lameter, Silicon Graphics, Inc.
 | 
				
			||||||
 * Subject to the GNU Public License, version 2.
 | 
					 * Subject to the GNU Public License, version 2.
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * NUMA policy allows the user to give hints in which node(s) memory should
 | 
					 * NUMA policy allows the user to give hints in which node(s) memory should
 | 
				
			||||||
| 
						 | 
					@ -17,13 +18,19 @@
 | 
				
			||||||
 *                offset into the backing object or offset into the mapping
 | 
					 *                offset into the backing object or offset into the mapping
 | 
				
			||||||
 *                for anonymous memory. For process policy an process counter
 | 
					 *                for anonymous memory. For process policy an process counter
 | 
				
			||||||
 *                is used.
 | 
					 *                is used.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 * bind           Only allocate memory on a specific set of nodes,
 | 
					 * bind           Only allocate memory on a specific set of nodes,
 | 
				
			||||||
 *                no fallback.
 | 
					 *                no fallback.
 | 
				
			||||||
 | 
					 *                FIXME: memory is allocated starting with the first node
 | 
				
			||||||
 | 
					 *                to the last. It would be better if bind would truly restrict
 | 
				
			||||||
 | 
					 *                the allocation to memory nodes instead
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 * preferred       Try a specific node first before normal fallback.
 | 
					 * preferred       Try a specific node first before normal fallback.
 | 
				
			||||||
 *                As a special case node -1 here means do the allocation
 | 
					 *                As a special case node -1 here means do the allocation
 | 
				
			||||||
 *                on the local CPU. This is normally identical to default,
 | 
					 *                on the local CPU. This is normally identical to default,
 | 
				
			||||||
 *                but useful to set in a VMA when you have a non default
 | 
					 *                but useful to set in a VMA when you have a non default
 | 
				
			||||||
 *                process policy.
 | 
					 *                process policy.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 * default        Allocate on the local node first, or when on a VMA
 | 
					 * default        Allocate on the local node first, or when on a VMA
 | 
				
			||||||
 *                use the process policy. This is what Linux always did
 | 
					 *                use the process policy. This is what Linux always did
 | 
				
			||||||
 *		  in a NUMA aware kernel and still does by, ahem, default.
 | 
					 *		  in a NUMA aware kernel and still does by, ahem, default.
 | 
				
			||||||
| 
						 | 
					@ -113,56 +120,6 @@ static int mpol_check_policy(int mode, nodemask_t *nodes)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	return nodes_subset(*nodes, node_online_map) ? 0 : -EINVAL;
 | 
						return nodes_subset(*nodes, node_online_map) ? 0 : -EINVAL;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					 | 
				
			||||||
/* Copy a node mask from user space. */
 | 
					 | 
				
			||||||
static int get_nodes(nodemask_t *nodes, unsigned long __user *nmask,
 | 
					 | 
				
			||||||
		     unsigned long maxnode, int mode)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	unsigned long k;
 | 
					 | 
				
			||||||
	unsigned long nlongs;
 | 
					 | 
				
			||||||
	unsigned long endmask;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	--maxnode;
 | 
					 | 
				
			||||||
	nodes_clear(*nodes);
 | 
					 | 
				
			||||||
	if (maxnode == 0 || !nmask)
 | 
					 | 
				
			||||||
		return 0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	nlongs = BITS_TO_LONGS(maxnode);
 | 
					 | 
				
			||||||
	if ((maxnode % BITS_PER_LONG) == 0)
 | 
					 | 
				
			||||||
		endmask = ~0UL;
 | 
					 | 
				
			||||||
	else
 | 
					 | 
				
			||||||
		endmask = (1UL << (maxnode % BITS_PER_LONG)) - 1;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/* When the user specified more nodes than supported just check
 | 
					 | 
				
			||||||
	   if the non supported part is all zero. */
 | 
					 | 
				
			||||||
	if (nlongs > BITS_TO_LONGS(MAX_NUMNODES)) {
 | 
					 | 
				
			||||||
		if (nlongs > PAGE_SIZE/sizeof(long))
 | 
					 | 
				
			||||||
			return -EINVAL;
 | 
					 | 
				
			||||||
		for (k = BITS_TO_LONGS(MAX_NUMNODES); k < nlongs; k++) {
 | 
					 | 
				
			||||||
			unsigned long t;
 | 
					 | 
				
			||||||
			if (get_user(t, nmask + k))
 | 
					 | 
				
			||||||
				return -EFAULT;
 | 
					 | 
				
			||||||
			if (k == nlongs - 1) {
 | 
					 | 
				
			||||||
				if (t & endmask)
 | 
					 | 
				
			||||||
					return -EINVAL;
 | 
					 | 
				
			||||||
			} else if (t)
 | 
					 | 
				
			||||||
				return -EINVAL;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
		nlongs = BITS_TO_LONGS(MAX_NUMNODES);
 | 
					 | 
				
			||||||
		endmask = ~0UL;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (copy_from_user(nodes_addr(*nodes), nmask, nlongs*sizeof(unsigned long)))
 | 
					 | 
				
			||||||
		return -EFAULT;
 | 
					 | 
				
			||||||
	nodes_addr(*nodes)[nlongs-1] &= endmask;
 | 
					 | 
				
			||||||
	/* Update current mems_allowed */
 | 
					 | 
				
			||||||
	cpuset_update_current_mems_allowed();
 | 
					 | 
				
			||||||
	/* Ignore nodes not set in current->mems_allowed */
 | 
					 | 
				
			||||||
	/* AK: shouldn't this error out instead? */
 | 
					 | 
				
			||||||
	cpuset_restrict_to_mems_allowed(nodes_addr(*nodes));
 | 
					 | 
				
			||||||
	return mpol_check_policy(mode, nodes);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/* Generate a custom zonelist for the BIND policy. */
 | 
					/* Generate a custom zonelist for the BIND policy. */
 | 
				
			||||||
static struct zonelist *bind_zonelist(nodemask_t *nodes)
 | 
					static struct zonelist *bind_zonelist(nodemask_t *nodes)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					@ -380,17 +337,25 @@ static int mbind_range(struct vm_area_struct *vma, unsigned long start,
 | 
				
			||||||
	return err;
 | 
						return err;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* Change policy for a memory range */
 | 
					static int contextualize_policy(int mode, nodemask_t *nodes)
 | 
				
			||||||
asmlinkage long sys_mbind(unsigned long start, unsigned long len,
 | 
					{
 | 
				
			||||||
			  unsigned long mode,
 | 
						if (!nodes)
 | 
				
			||||||
			  unsigned long __user *nmask, unsigned long maxnode,
 | 
							return 0;
 | 
				
			||||||
			  unsigned flags)
 | 
					
 | 
				
			||||||
 | 
						/* Update current mems_allowed */
 | 
				
			||||||
 | 
						cpuset_update_current_mems_allowed();
 | 
				
			||||||
 | 
						/* Ignore nodes not set in current->mems_allowed */
 | 
				
			||||||
 | 
						cpuset_restrict_to_mems_allowed(nodes->bits);
 | 
				
			||||||
 | 
						return mpol_check_policy(mode, nodes);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					long do_mbind(unsigned long start, unsigned long len,
 | 
				
			||||||
 | 
							unsigned long mode, nodemask_t *nmask, unsigned long flags)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct vm_area_struct *vma;
 | 
						struct vm_area_struct *vma;
 | 
				
			||||||
	struct mm_struct *mm = current->mm;
 | 
						struct mm_struct *mm = current->mm;
 | 
				
			||||||
	struct mempolicy *new;
 | 
						struct mempolicy *new;
 | 
				
			||||||
	unsigned long end;
 | 
						unsigned long end;
 | 
				
			||||||
	nodemask_t nodes;
 | 
					 | 
				
			||||||
	int err;
 | 
						int err;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if ((flags & ~(unsigned long)(MPOL_MF_STRICT)) || mode > MPOL_MAX)
 | 
						if ((flags & ~(unsigned long)(MPOL_MF_STRICT)) || mode > MPOL_MAX)
 | 
				
			||||||
| 
						 | 
					@ -405,12 +370,9 @@ asmlinkage long sys_mbind(unsigned long start, unsigned long len,
 | 
				
			||||||
		return -EINVAL;
 | 
							return -EINVAL;
 | 
				
			||||||
	if (end == start)
 | 
						if (end == start)
 | 
				
			||||||
		return 0;
 | 
							return 0;
 | 
				
			||||||
 | 
						if (contextualize_policy(mode, nmask))
 | 
				
			||||||
	err = get_nodes(&nodes, nmask, maxnode, mode);
 | 
							return -EINVAL;
 | 
				
			||||||
	if (err)
 | 
						new = mpol_new(mode, nmask);
 | 
				
			||||||
		return err;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	new = mpol_new(mode, &nodes);
 | 
					 | 
				
			||||||
	if (IS_ERR(new))
 | 
						if (IS_ERR(new))
 | 
				
			||||||
		return PTR_ERR(new);
 | 
							return PTR_ERR(new);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -418,7 +380,7 @@ asmlinkage long sys_mbind(unsigned long start, unsigned long len,
 | 
				
			||||||
			mode,nodes_addr(nodes)[0]);
 | 
								mode,nodes_addr(nodes)[0]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	down_write(&mm->mmap_sem);
 | 
						down_write(&mm->mmap_sem);
 | 
				
			||||||
	vma = check_range(mm, start, end, &nodes, flags);
 | 
						vma = check_range(mm, start, end, nmask, flags);
 | 
				
			||||||
	err = PTR_ERR(vma);
 | 
						err = PTR_ERR(vma);
 | 
				
			||||||
	if (!IS_ERR(vma))
 | 
						if (!IS_ERR(vma))
 | 
				
			||||||
		err = mbind_range(vma, start, end, new);
 | 
							err = mbind_range(vma, start, end, new);
 | 
				
			||||||
| 
						 | 
					@ -428,19 +390,13 @@ asmlinkage long sys_mbind(unsigned long start, unsigned long len,
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* Set the process memory policy */
 | 
					/* Set the process memory policy */
 | 
				
			||||||
asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask,
 | 
					long do_set_mempolicy(int mode, nodemask_t *nodes)
 | 
				
			||||||
				   unsigned long maxnode)
 | 
					 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	int err;
 | 
					 | 
				
			||||||
	struct mempolicy *new;
 | 
						struct mempolicy *new;
 | 
				
			||||||
	nodemask_t nodes;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (mode < 0 || mode > MPOL_MAX)
 | 
						if (contextualize_policy(mode, nodes))
 | 
				
			||||||
		return -EINVAL;
 | 
							return -EINVAL;
 | 
				
			||||||
	err = get_nodes(&nodes, nmask, maxnode, mode);
 | 
						new = mpol_new(mode, nodes);
 | 
				
			||||||
	if (err)
 | 
					 | 
				
			||||||
		return err;
 | 
					 | 
				
			||||||
	new = mpol_new(mode, &nodes);
 | 
					 | 
				
			||||||
	if (IS_ERR(new))
 | 
						if (IS_ERR(new))
 | 
				
			||||||
		return PTR_ERR(new);
 | 
							return PTR_ERR(new);
 | 
				
			||||||
	mpol_free(current->mempolicy);
 | 
						mpol_free(current->mempolicy);
 | 
				
			||||||
| 
						 | 
					@ -459,7 +415,8 @@ static void get_zonemask(struct mempolicy *p, nodemask_t *nodes)
 | 
				
			||||||
	switch (p->policy) {
 | 
						switch (p->policy) {
 | 
				
			||||||
	case MPOL_BIND:
 | 
						case MPOL_BIND:
 | 
				
			||||||
		for (i = 0; p->v.zonelist->zones[i]; i++)
 | 
							for (i = 0; p->v.zonelist->zones[i]; i++)
 | 
				
			||||||
			node_set(p->v.zonelist->zones[i]->zone_pgdat->node_id, *nodes);
 | 
								node_set(p->v.zonelist->zones[i]->zone_pgdat->node_id,
 | 
				
			||||||
 | 
									*nodes);
 | 
				
			||||||
		break;
 | 
							break;
 | 
				
			||||||
	case MPOL_DEFAULT:
 | 
						case MPOL_DEFAULT:
 | 
				
			||||||
		break;
 | 
							break;
 | 
				
			||||||
| 
						 | 
					@ -491,38 +448,17 @@ static int lookup_node(struct mm_struct *mm, unsigned long addr)
 | 
				
			||||||
	return err;
 | 
						return err;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* Copy a kernel node mask to user space */
 | 
					 | 
				
			||||||
static int copy_nodes_to_user(unsigned long __user *mask, unsigned long maxnode,
 | 
					 | 
				
			||||||
			      nodemask_t *nodes)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	unsigned long copy = ALIGN(maxnode-1, 64) / 8;
 | 
					 | 
				
			||||||
	const int nbytes = BITS_TO_LONGS(MAX_NUMNODES) * sizeof(long);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (copy > nbytes) {
 | 
					 | 
				
			||||||
		if (copy > PAGE_SIZE)
 | 
					 | 
				
			||||||
			return -EINVAL;
 | 
					 | 
				
			||||||
		if (clear_user((char __user *)mask + nbytes, copy - nbytes))
 | 
					 | 
				
			||||||
			return -EFAULT;
 | 
					 | 
				
			||||||
		copy = nbytes;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	return copy_to_user(mask, nodes_addr(*nodes), copy) ? -EFAULT : 0;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/* Retrieve NUMA policy */
 | 
					/* Retrieve NUMA policy */
 | 
				
			||||||
asmlinkage long sys_get_mempolicy(int __user *policy,
 | 
					long do_get_mempolicy(int *policy, nodemask_t *nmask,
 | 
				
			||||||
				  unsigned long __user *nmask,
 | 
								unsigned long addr, unsigned long flags)
 | 
				
			||||||
				  unsigned long maxnode,
 | 
					 | 
				
			||||||
				  unsigned long addr, unsigned long flags)
 | 
					 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	int err, pval;
 | 
						int err;
 | 
				
			||||||
	struct mm_struct *mm = current->mm;
 | 
						struct mm_struct *mm = current->mm;
 | 
				
			||||||
	struct vm_area_struct *vma = NULL;
 | 
						struct vm_area_struct *vma = NULL;
 | 
				
			||||||
	struct mempolicy *pol = current->mempolicy;
 | 
						struct mempolicy *pol = current->mempolicy;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (flags & ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR))
 | 
						if (flags & ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR))
 | 
				
			||||||
		return -EINVAL;
 | 
							return -EINVAL;
 | 
				
			||||||
	if (nmask != NULL && maxnode < MAX_NUMNODES)
 | 
					 | 
				
			||||||
		return -EINVAL;
 | 
					 | 
				
			||||||
	if (flags & MPOL_F_ADDR) {
 | 
						if (flags & MPOL_F_ADDR) {
 | 
				
			||||||
		down_read(&mm->mmap_sem);
 | 
							down_read(&mm->mmap_sem);
 | 
				
			||||||
		vma = find_vma_intersection(mm, addr, addr+1);
 | 
							vma = find_vma_intersection(mm, addr, addr+1);
 | 
				
			||||||
| 
						 | 
					@ -545,31 +481,25 @@ asmlinkage long sys_get_mempolicy(int __user *policy,
 | 
				
			||||||
			err = lookup_node(mm, addr);
 | 
								err = lookup_node(mm, addr);
 | 
				
			||||||
			if (err < 0)
 | 
								if (err < 0)
 | 
				
			||||||
				goto out;
 | 
									goto out;
 | 
				
			||||||
			pval = err;
 | 
								*policy = err;
 | 
				
			||||||
		} else if (pol == current->mempolicy &&
 | 
							} else if (pol == current->mempolicy &&
 | 
				
			||||||
				pol->policy == MPOL_INTERLEAVE) {
 | 
									pol->policy == MPOL_INTERLEAVE) {
 | 
				
			||||||
			pval = current->il_next;
 | 
								*policy = current->il_next;
 | 
				
			||||||
		} else {
 | 
							} else {
 | 
				
			||||||
			err = -EINVAL;
 | 
								err = -EINVAL;
 | 
				
			||||||
			goto out;
 | 
								goto out;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	} else
 | 
						} else
 | 
				
			||||||
		pval = pol->policy;
 | 
							*policy = pol->policy;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (vma) {
 | 
						if (vma) {
 | 
				
			||||||
		up_read(¤t->mm->mmap_sem);
 | 
							up_read(¤t->mm->mmap_sem);
 | 
				
			||||||
		vma = NULL;
 | 
							vma = NULL;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (policy && put_user(pval, policy))
 | 
					 | 
				
			||||||
		return -EFAULT;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	err = 0;
 | 
						err = 0;
 | 
				
			||||||
	if (nmask) {
 | 
						if (nmask)
 | 
				
			||||||
		nodemask_t nodes;
 | 
							get_zonemask(pol, nmask);
 | 
				
			||||||
		get_zonemask(pol, &nodes);
 | 
					 | 
				
			||||||
		err = copy_nodes_to_user(nmask, maxnode, &nodes);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 out:
 | 
					 out:
 | 
				
			||||||
	if (vma)
 | 
						if (vma)
 | 
				
			||||||
| 
						 | 
					@ -577,6 +507,126 @@ asmlinkage long sys_get_mempolicy(int __user *policy,
 | 
				
			||||||
	return err;
 | 
						return err;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * User space interface with variable sized bitmaps for nodelists.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* Copy a node mask from user space. */
 | 
				
			||||||
 | 
					static int get_nodes(nodemask_t *nodes, unsigned long __user *nmask,
 | 
				
			||||||
 | 
							     unsigned long maxnode)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						unsigned long k;
 | 
				
			||||||
 | 
						unsigned long nlongs;
 | 
				
			||||||
 | 
						unsigned long endmask;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						--maxnode;
 | 
				
			||||||
 | 
						nodes_clear(*nodes);
 | 
				
			||||||
 | 
						if (maxnode == 0 || !nmask)
 | 
				
			||||||
 | 
							return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						nlongs = BITS_TO_LONGS(maxnode);
 | 
				
			||||||
 | 
						if ((maxnode % BITS_PER_LONG) == 0)
 | 
				
			||||||
 | 
							endmask = ~0UL;
 | 
				
			||||||
 | 
						else
 | 
				
			||||||
 | 
							endmask = (1UL << (maxnode % BITS_PER_LONG)) - 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* When the user specified more nodes than supported just check
 | 
				
			||||||
 | 
						   if the non supported part is all zero. */
 | 
				
			||||||
 | 
						if (nlongs > BITS_TO_LONGS(MAX_NUMNODES)) {
 | 
				
			||||||
 | 
							if (nlongs > PAGE_SIZE/sizeof(long))
 | 
				
			||||||
 | 
								return -EINVAL;
 | 
				
			||||||
 | 
							for (k = BITS_TO_LONGS(MAX_NUMNODES); k < nlongs; k++) {
 | 
				
			||||||
 | 
								unsigned long t;
 | 
				
			||||||
 | 
								if (get_user(t, nmask + k))
 | 
				
			||||||
 | 
									return -EFAULT;
 | 
				
			||||||
 | 
								if (k == nlongs - 1) {
 | 
				
			||||||
 | 
									if (t & endmask)
 | 
				
			||||||
 | 
										return -EINVAL;
 | 
				
			||||||
 | 
								} else if (t)
 | 
				
			||||||
 | 
									return -EINVAL;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							nlongs = BITS_TO_LONGS(MAX_NUMNODES);
 | 
				
			||||||
 | 
							endmask = ~0UL;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (copy_from_user(nodes_addr(*nodes), nmask, nlongs*sizeof(unsigned long)))
 | 
				
			||||||
 | 
							return -EFAULT;
 | 
				
			||||||
 | 
						nodes_addr(*nodes)[nlongs-1] &= endmask;
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* Copy a kernel node mask to user space */
 | 
				
			||||||
 | 
					static int copy_nodes_to_user(unsigned long __user *mask, unsigned long maxnode,
 | 
				
			||||||
 | 
								      nodemask_t *nodes)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						unsigned long copy = ALIGN(maxnode-1, 64) / 8;
 | 
				
			||||||
 | 
						const int nbytes = BITS_TO_LONGS(MAX_NUMNODES) * sizeof(long);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (copy > nbytes) {
 | 
				
			||||||
 | 
							if (copy > PAGE_SIZE)
 | 
				
			||||||
 | 
								return -EINVAL;
 | 
				
			||||||
 | 
							if (clear_user((char __user *)mask + nbytes, copy - nbytes))
 | 
				
			||||||
 | 
								return -EFAULT;
 | 
				
			||||||
 | 
							copy = nbytes;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return copy_to_user(mask, nodes_addr(*nodes), copy) ? -EFAULT : 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					asmlinkage long sys_mbind(unsigned long start, unsigned long len,
 | 
				
			||||||
 | 
								unsigned long mode,
 | 
				
			||||||
 | 
								unsigned long __user *nmask, unsigned long maxnode,
 | 
				
			||||||
 | 
								unsigned flags)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						nodemask_t nodes;
 | 
				
			||||||
 | 
						int err;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						err = get_nodes(&nodes, nmask, maxnode);
 | 
				
			||||||
 | 
						if (err)
 | 
				
			||||||
 | 
							return err;
 | 
				
			||||||
 | 
						return do_mbind(start, len, mode, &nodes, flags);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* Set the process memory policy */
 | 
				
			||||||
 | 
					asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask,
 | 
				
			||||||
 | 
							unsigned long maxnode)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int err;
 | 
				
			||||||
 | 
						nodemask_t nodes;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (mode < 0 || mode > MPOL_MAX)
 | 
				
			||||||
 | 
							return -EINVAL;
 | 
				
			||||||
 | 
						err = get_nodes(&nodes, nmask, maxnode);
 | 
				
			||||||
 | 
						if (err)
 | 
				
			||||||
 | 
							return err;
 | 
				
			||||||
 | 
						return do_set_mempolicy(mode, &nodes);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* Retrieve NUMA policy */
 | 
				
			||||||
 | 
					asmlinkage long sys_get_mempolicy(int __user *policy,
 | 
				
			||||||
 | 
									unsigned long __user *nmask,
 | 
				
			||||||
 | 
									unsigned long maxnode,
 | 
				
			||||||
 | 
									unsigned long addr, unsigned long flags)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int err, pval;
 | 
				
			||||||
 | 
						nodemask_t nodes;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (nmask != NULL && maxnode < MAX_NUMNODES)
 | 
				
			||||||
 | 
							return -EINVAL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						err = do_get_mempolicy(&pval, &nodes, addr, flags);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (err)
 | 
				
			||||||
 | 
							return err;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (policy && put_user(pval, policy))
 | 
				
			||||||
 | 
							return -EFAULT;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (nmask)
 | 
				
			||||||
 | 
							err = copy_nodes_to_user(nmask, maxnode, &nodes);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return err;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_COMPAT
 | 
					#ifdef CONFIG_COMPAT
 | 
				
			||||||
 | 
					
 | 
				
			||||||
asmlinkage long compat_sys_get_mempolicy(int __user *policy,
 | 
					asmlinkage long compat_sys_get_mempolicy(int __user *policy,
 | 
				
			||||||
| 
						 | 
					@ -664,7 +714,7 @@ get_vma_policy(struct task_struct *task, struct vm_area_struct *vma, unsigned lo
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (vma) {
 | 
						if (vma) {
 | 
				
			||||||
		if (vma->vm_ops && vma->vm_ops->get_policy)
 | 
							if (vma->vm_ops && vma->vm_ops->get_policy)
 | 
				
			||||||
		        pol = vma->vm_ops->get_policy(vma, addr);
 | 
								pol = vma->vm_ops->get_policy(vma, addr);
 | 
				
			||||||
		else if (vma->vm_policy &&
 | 
							else if (vma->vm_policy &&
 | 
				
			||||||
				vma->vm_policy->policy != MPOL_DEFAULT)
 | 
									vma->vm_policy->policy != MPOL_DEFAULT)
 | 
				
			||||||
			pol = vma->vm_policy;
 | 
								pol = vma->vm_policy;
 | 
				
			||||||
| 
						 | 
					@ -1147,14 +1197,12 @@ void __init numa_policy_init(void)
 | 
				
			||||||
	/* Set interleaving policy for system init. This way not all
 | 
						/* Set interleaving policy for system init. This way not all
 | 
				
			||||||
	   the data structures allocated at system boot end up in node zero. */
 | 
						   the data structures allocated at system boot end up in node zero. */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (sys_set_mempolicy(MPOL_INTERLEAVE, nodes_addr(node_online_map),
 | 
						if (do_set_mempolicy(MPOL_INTERLEAVE, &node_online_map))
 | 
				
			||||||
							MAX_NUMNODES) < 0)
 | 
					 | 
				
			||||||
		printk("numa_policy_init: interleaving failed\n");
 | 
							printk("numa_policy_init: interleaving failed\n");
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* Reset policy of current process to default.
 | 
					/* Reset policy of current process to default */
 | 
				
			||||||
 * Assumes fs == KERNEL_DS */
 | 
					 | 
				
			||||||
void numa_default_policy(void)
 | 
					void numa_default_policy(void)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	sys_set_mempolicy(MPOL_DEFAULT, NULL, 0);
 | 
						do_set_mempolicy(MPOL_DEFAULT, NULL);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue