mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	oom: make oom_score to per-process value
oom-killer kills a process, not task. Then oom_score should be calculated as per-process too. it makes consistency more and makes speed up select_bad_process(). Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Paul Menage <menage@google.com> Cc: David Rientjes <rientjes@google.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Oleg Nesterov <oleg@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									28b83c5193
								
							
						
					
					
						commit
						495789a51a
					
				
					 3 changed files with 31 additions and 8 deletions
				
			
		| 
						 | 
					@ -1205,7 +1205,7 @@ The following heuristics are then applied:
 | 
				
			||||||
 * if the task was reniced, its score doubles
 | 
					 * if the task was reniced, its score doubles
 | 
				
			||||||
 * superuser or direct hardware access tasks (CAP_SYS_ADMIN, CAP_SYS_RESOURCE
 | 
					 * superuser or direct hardware access tasks (CAP_SYS_ADMIN, CAP_SYS_RESOURCE
 | 
				
			||||||
 	or CAP_SYS_RAWIO) have their score divided by 4
 | 
					 	or CAP_SYS_RAWIO) have their score divided by 4
 | 
				
			||||||
 * if oom condition happened in one cpuset and checked task does not belong
 | 
					 * if oom condition happened in one cpuset and checked process does not belong
 | 
				
			||||||
 	to it, its score is divided by 8
 | 
					 	to it, its score is divided by 8
 | 
				
			||||||
 * the resulting score is multiplied by two to the power of oom_adj, i.e.
 | 
					 * the resulting score is multiplied by two to the power of oom_adj, i.e.
 | 
				
			||||||
	points <<= oom_adj when it is positive and
 | 
						points <<= oom_adj when it is positive and
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -447,7 +447,7 @@ static int proc_oom_score(struct task_struct *task, char *buffer)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	do_posix_clock_monotonic_gettime(&uptime);
 | 
						do_posix_clock_monotonic_gettime(&uptime);
 | 
				
			||||||
	read_lock(&tasklist_lock);
 | 
						read_lock(&tasklist_lock);
 | 
				
			||||||
	points = badness(task, uptime.tv_sec);
 | 
						points = badness(task->group_leader, uptime.tv_sec);
 | 
				
			||||||
	read_unlock(&tasklist_lock);
 | 
						read_unlock(&tasklist_lock);
 | 
				
			||||||
	return sprintf(buffer, "%lu\n", points);
 | 
						return sprintf(buffer, "%lu\n", points);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -34,6 +34,23 @@ int sysctl_oom_dump_tasks;
 | 
				
			||||||
static DEFINE_SPINLOCK(zone_scan_lock);
 | 
					static DEFINE_SPINLOCK(zone_scan_lock);
 | 
				
			||||||
/* #define DEBUG */
 | 
					/* #define DEBUG */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Is all threads of the target process nodes overlap ours?
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					static int has_intersects_mems_allowed(struct task_struct *tsk)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct task_struct *t;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						t = tsk;
 | 
				
			||||||
 | 
						do {
 | 
				
			||||||
 | 
							if (cpuset_mems_allowed_intersects(current, t))
 | 
				
			||||||
 | 
								return 1;
 | 
				
			||||||
 | 
							t = next_thread(t);
 | 
				
			||||||
 | 
						} while (t != tsk);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/**
 | 
					/**
 | 
				
			||||||
 * badness - calculate a numeric value for how bad this task has been
 | 
					 * badness - calculate a numeric value for how bad this task has been
 | 
				
			||||||
 * @p: task struct of which task we should calculate
 | 
					 * @p: task struct of which task we should calculate
 | 
				
			||||||
| 
						 | 
					@ -59,6 +76,9 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
 | 
				
			||||||
	struct mm_struct *mm;
 | 
						struct mm_struct *mm;
 | 
				
			||||||
	struct task_struct *child;
 | 
						struct task_struct *child;
 | 
				
			||||||
	int oom_adj = p->signal->oom_adj;
 | 
						int oom_adj = p->signal->oom_adj;
 | 
				
			||||||
 | 
						struct task_cputime task_time;
 | 
				
			||||||
 | 
						unsigned long utime;
 | 
				
			||||||
 | 
						unsigned long stime;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (oom_adj == OOM_DISABLE)
 | 
						if (oom_adj == OOM_DISABLE)
 | 
				
			||||||
		return 0;
 | 
							return 0;
 | 
				
			||||||
| 
						 | 
					@ -106,8 +126,11 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
 | 
				
			||||||
         * of seconds. There is no particular reason for this other than
 | 
					         * of seconds. There is no particular reason for this other than
 | 
				
			||||||
         * that it turned out to work very well in practice.
 | 
					         * that it turned out to work very well in practice.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	cpu_time = (cputime_to_jiffies(p->utime) + cputime_to_jiffies(p->stime))
 | 
						thread_group_cputime(p, &task_time);
 | 
				
			||||||
		>> (SHIFT_HZ + 3);
 | 
						utime = cputime_to_jiffies(task_time.utime);
 | 
				
			||||||
 | 
						stime = cputime_to_jiffies(task_time.stime);
 | 
				
			||||||
 | 
						cpu_time = (utime + stime) >> (SHIFT_HZ + 3);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (uptime >= p->start_time.tv_sec)
 | 
						if (uptime >= p->start_time.tv_sec)
 | 
				
			||||||
		run_time = (uptime - p->start_time.tv_sec) >> 10;
 | 
							run_time = (uptime - p->start_time.tv_sec) >> 10;
 | 
				
			||||||
| 
						 | 
					@ -148,7 +171,7 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
 | 
				
			||||||
	 * because p may have allocated or otherwise mapped memory on
 | 
						 * because p may have allocated or otherwise mapped memory on
 | 
				
			||||||
	 * this node before. However it will be less likely.
 | 
						 * this node before. However it will be less likely.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	if (!cpuset_mems_allowed_intersects(current, p))
 | 
						if (!has_intersects_mems_allowed(p))
 | 
				
			||||||
		points /= 8;
 | 
							points /= 8;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
| 
						 | 
					@ -204,13 +227,13 @@ static inline enum oom_constraint constrained_alloc(struct zonelist *zonelist,
 | 
				
			||||||
static struct task_struct *select_bad_process(unsigned long *ppoints,
 | 
					static struct task_struct *select_bad_process(unsigned long *ppoints,
 | 
				
			||||||
						struct mem_cgroup *mem)
 | 
											struct mem_cgroup *mem)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct task_struct *g, *p;
 | 
						struct task_struct *p;
 | 
				
			||||||
	struct task_struct *chosen = NULL;
 | 
						struct task_struct *chosen = NULL;
 | 
				
			||||||
	struct timespec uptime;
 | 
						struct timespec uptime;
 | 
				
			||||||
	*ppoints = 0;
 | 
						*ppoints = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	do_posix_clock_monotonic_gettime(&uptime);
 | 
						do_posix_clock_monotonic_gettime(&uptime);
 | 
				
			||||||
	do_each_thread(g, p) {
 | 
						for_each_process(p) {
 | 
				
			||||||
		unsigned long points;
 | 
							unsigned long points;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		/*
 | 
							/*
 | 
				
			||||||
| 
						 | 
					@ -263,7 +286,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints,
 | 
				
			||||||
			chosen = p;
 | 
								chosen = p;
 | 
				
			||||||
			*ppoints = points;
 | 
								*ppoints = points;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	} while_each_thread(g, p);
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return chosen;
 | 
						return chosen;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue