mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	[PATCH] cpusets: confine oom_killer to mem_exclusive cpuset
Now the real motivation for this cpuset mem_exclusive patch series seems trivial. This patch keeps a task in or under one mem_exclusive cpuset from provoking an oom kill of a task under a non-overlapping mem_exclusive cpuset. Since only interrupt and GFP_ATOMIC allocations are allowed to escape mem_exclusive containment, there is little to gain from oom killing a task under a non-overlapping mem_exclusive cpuset, as almost all kernel and user memory allocation must come from disjoint memory nodes. This patch enables configuring a system so that a runaway job under one mem_exclusive cpuset cannot cause the killing of a job in another such cpuset that might be using very high compute and memory resources for a prolonged time. Signed-off-by: Paul Jackson <pj@sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
		
							parent
							
								
									9bf2229f88
								
							
						
					
					
						commit
						ef08e3b498
					
				
					 3 changed files with 44 additions and 0 deletions
				
			
		| 
						 | 
				
			
			@ -24,6 +24,7 @@ void cpuset_update_current_mems_allowed(void);
 | 
			
		|||
void cpuset_restrict_to_mems_allowed(unsigned long *nodes);
 | 
			
		||||
int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl);
 | 
			
		||||
extern int cpuset_zone_allowed(struct zone *z, unsigned int __nocast gfp_mask);
 | 
			
		||||
extern int cpuset_excl_nodes_overlap(const struct task_struct *p);
 | 
			
		||||
extern struct file_operations proc_cpuset_operations;
 | 
			
		||||
extern char *cpuset_task_status_allowed(struct task_struct *task, char *buffer);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -54,6 +55,11 @@ static inline int cpuset_zone_allowed(struct zone *z,
 | 
			
		|||
	return 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline int cpuset_excl_nodes_overlap(const struct task_struct *p)
 | 
			
		||||
{
 | 
			
		||||
	return 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline char *cpuset_task_status_allowed(struct task_struct *task,
 | 
			
		||||
							char *buffer)
 | 
			
		||||
{
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1688,6 +1688,39 @@ int cpuset_zone_allowed(struct zone *z, unsigned int __nocast gfp_mask)
 | 
			
		|||
	return allowed;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * cpuset_excl_nodes_overlap - Do we overlap @p's mem_exclusive ancestors?
 | 
			
		||||
 * @p: pointer to task_struct of some other task.
 | 
			
		||||
 *
 | 
			
		||||
 * Description: Return true if the nearest mem_exclusive ancestor
 | 
			
		||||
 * cpusets of tasks @p and current overlap.  Used by oom killer to
 | 
			
		||||
 * determine if task @p's memory usage might impact the memory
 | 
			
		||||
 * available to the current task.
 | 
			
		||||
 *
 | 
			
		||||
 * Acquires cpuset_sem - not suitable for calling from a fast path.
 | 
			
		||||
 **/
 | 
			
		||||
 | 
			
		||||
int cpuset_excl_nodes_overlap(const struct task_struct *p)
 | 
			
		||||
{
 | 
			
		||||
	const struct cpuset *cs1, *cs2;	/* my and p's cpuset ancestors */
 | 
			
		||||
	int overlap = 0;		/* do cpusets overlap? */
 | 
			
		||||
 | 
			
		||||
	down(&cpuset_sem);
 | 
			
		||||
	cs1 = current->cpuset;
 | 
			
		||||
	if (!cs1)
 | 
			
		||||
		goto done;		/* current task exiting */
 | 
			
		||||
	cs2 = p->cpuset;
 | 
			
		||||
	if (!cs2)
 | 
			
		||||
		goto done;		/* task p is exiting */
 | 
			
		||||
	cs1 = nearest_exclusive_ancestor(cs1);
 | 
			
		||||
	cs2 = nearest_exclusive_ancestor(cs2);
 | 
			
		||||
	overlap = nodes_intersects(cs1->mems_allowed, cs2->mems_allowed);
 | 
			
		||||
done:
 | 
			
		||||
	up(&cpuset_sem);
 | 
			
		||||
 | 
			
		||||
	return overlap;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * proc_cpuset_show()
 | 
			
		||||
 *  - Print tasks cpuset path into seq_file.
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -20,6 +20,7 @@
 | 
			
		|||
#include <linux/swap.h>
 | 
			
		||||
#include <linux/timex.h>
 | 
			
		||||
#include <linux/jiffies.h>
 | 
			
		||||
#include <linux/cpuset.h>
 | 
			
		||||
 | 
			
		||||
/* #define DEBUG */
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -152,6 +153,10 @@ static struct task_struct * select_bad_process(void)
 | 
			
		|||
			continue;
 | 
			
		||||
		if (p->oomkilladj == OOM_DISABLE)
 | 
			
		||||
			continue;
 | 
			
		||||
		/* If p's nodes don't overlap ours, it won't help to kill p. */
 | 
			
		||||
		if (!cpuset_excl_nodes_overlap(p))
 | 
			
		||||
			continue;
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
		 * This is in the process of releasing memory so for wait it
 | 
			
		||||
		 * to finish before killing some other task by mistake.
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue