mirror of
https://github.com/torvalds/linux.git
synced 2025-10-29 07:46:20 +02:00
Patch series "Improvements to Victim Process Thawing and OOM Reaper Traversal Order", v10. This patch series focuses on optimizing victim process thawing and refining the traversal order of the OOM reaper. Since __thaw_task() is used to thaw a single thread of the victim, thawing only one thread cannot guarantee the exit of the OOM victim when it is frozen. Patch 1 thaw the entire process of the OOM victim to ensure that OOM victims are able to terminate themselves. Even if the oom_reaper is delayed, patch 2 is still beneficial for reaping processes with a large address space footprint, and it also greatly improves process_mrelease. This patch (of 10): OOM killer is a mechanism that selects and kills processes when the system runs out of memory to reclaim resources and keep the system stable. But the oom victim cannot terminate on its own when it is frozen, even if the OOM victim task is thawed through __thaw_task(). This is because __thaw_task() can only thaw a single OOM victim thread, and cannot thaw the entire OOM victim process. In addition, freezing_slow_path() determines whether a task is an OOM victim by checking the task's TIF_MEMDIE flag. When a task is identified as an OOM victim, the freezer bypasses both PM freezing and cgroup freezing states to thaw it. Historically, TIF_MEMDIE was a "this is the oom victim & it has access to memory reserves" flag in the past. It has that thread vs. process problems and tsk_is_oom_victim was introduced later to get rid of them and other issues as well as the guarantee that we can identify the oom victim's mm reliably for other oom_reaper. Therefore, thaw_process() is introduced to unfreeze all threads within the OOM victim process, ensuring that every thread is properly thawed. The freezer now uses tsk_is_oom_victim() to determine OOM victim status, allowing all victim threads to be unfrozen as necessary. With this change, the entire OOM victim process will be thawed when an OOM event occurs, ensuring that the victim can terminate on its own. Link: https://lkml.kernel.org/r/20250915162946.5515-1-zhongjinji@honor.com Link: https://lkml.kernel.org/r/20250915162946.5515-2-zhongjinji@honor.com Signed-off-by: zhongjinji <zhongjinji@honor.com> Reviewed-by: Suren Baghdasaryan <surenb@google.com> Acked-by: Shakeel Butt <shakeel.butt@linux.dev> Acked-by: Michal Hocko <mhocko@suse.com> Acked-by: Liam R. Howlett <Liam.Howlett@oracle.com> Cc: David Rientjes <rientjes@google.com> Cc: Len Brown <lenb@kernel.org> Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> Cc: Thomas Gleinxer <tglx@linutronix.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
247 lines
5.9 KiB
C
247 lines
5.9 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* kernel/freezer.c - Function to freeze a process
|
|
*
|
|
* Originally from kernel/power/process.c
|
|
*/
|
|
|
|
#include <linux/interrupt.h>
|
|
#include <linux/suspend.h>
|
|
#include <linux/export.h>
|
|
#include <linux/syscalls.h>
|
|
#include <linux/freezer.h>
|
|
#include <linux/oom.h>
|
|
#include <linux/kthread.h>
|
|
|
|
/* total number of freezing conditions in effect */
|
|
DEFINE_STATIC_KEY_FALSE(freezer_active);
|
|
EXPORT_SYMBOL(freezer_active);
|
|
|
|
/*
|
|
* indicate whether PM freezing is in effect, protected by
|
|
* system_transition_mutex
|
|
*/
|
|
bool pm_freezing;
|
|
bool pm_nosig_freezing;
|
|
|
|
/* protects freezing and frozen transitions */
|
|
static DEFINE_SPINLOCK(freezer_lock);
|
|
|
|
/**
|
|
* freezing_slow_path - slow path for testing whether a task needs to be frozen
|
|
* @p: task to be tested
|
|
*
|
|
* This function is called by freezing() if freezer_active isn't zero
|
|
* and tests whether @p needs to enter and stay in frozen state. Can be
|
|
* called under any context. The freezers are responsible for ensuring the
|
|
* target tasks see the updated state.
|
|
*/
|
|
bool freezing_slow_path(struct task_struct *p)
|
|
{
|
|
if (p->flags & (PF_NOFREEZE | PF_SUSPEND_TASK))
|
|
return false;
|
|
|
|
if (tsk_is_oom_victim(p))
|
|
return false;
|
|
|
|
if (pm_nosig_freezing || cgroup_freezing(p))
|
|
return true;
|
|
|
|
if (pm_freezing && !(p->flags & PF_KTHREAD))
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
EXPORT_SYMBOL(freezing_slow_path);
|
|
|
|
bool frozen(struct task_struct *p)
|
|
{
|
|
return READ_ONCE(p->__state) & TASK_FROZEN;
|
|
}
|
|
|
|
/* Refrigerator is place where frozen processes are stored :-). */
|
|
bool __refrigerator(bool check_kthr_stop)
|
|
{
|
|
unsigned int state = get_current_state();
|
|
bool was_frozen = false;
|
|
|
|
pr_debug("%s entered refrigerator\n", current->comm);
|
|
|
|
WARN_ON_ONCE(state && !(state & TASK_NORMAL));
|
|
|
|
for (;;) {
|
|
bool freeze;
|
|
|
|
raw_spin_lock_irq(¤t->pi_lock);
|
|
WRITE_ONCE(current->__state, TASK_FROZEN);
|
|
/* unstale saved_state so that __thaw_task() will wake us up */
|
|
current->saved_state = TASK_RUNNING;
|
|
raw_spin_unlock_irq(¤t->pi_lock);
|
|
|
|
spin_lock_irq(&freezer_lock);
|
|
freeze = freezing(current) && !(check_kthr_stop && kthread_should_stop());
|
|
spin_unlock_irq(&freezer_lock);
|
|
|
|
if (!freeze)
|
|
break;
|
|
|
|
was_frozen = true;
|
|
schedule();
|
|
}
|
|
__set_current_state(TASK_RUNNING);
|
|
|
|
pr_debug("%s left refrigerator\n", current->comm);
|
|
|
|
return was_frozen;
|
|
}
|
|
EXPORT_SYMBOL(__refrigerator);
|
|
|
|
static void fake_signal_wake_up(struct task_struct *p)
|
|
{
|
|
unsigned long flags;
|
|
|
|
if (lock_task_sighand(p, &flags)) {
|
|
signal_wake_up(p, 0);
|
|
unlock_task_sighand(p, &flags);
|
|
}
|
|
}
|
|
|
|
static int __set_task_frozen(struct task_struct *p, void *arg)
|
|
{
|
|
unsigned int state = READ_ONCE(p->__state);
|
|
|
|
/*
|
|
* Allow freezing the sched_delayed tasks; they will not execute until
|
|
* ttwu() fixes them up, so it is safe to swap their state now, instead
|
|
* of waiting for them to get fully dequeued.
|
|
*/
|
|
if (task_is_runnable(p))
|
|
return 0;
|
|
|
|
if (p != current && task_curr(p))
|
|
return 0;
|
|
|
|
if (!(state & (TASK_FREEZABLE | __TASK_STOPPED | __TASK_TRACED)))
|
|
return 0;
|
|
|
|
/*
|
|
* Only TASK_NORMAL can be augmented with TASK_FREEZABLE, since they
|
|
* can suffer spurious wakeups.
|
|
*/
|
|
if (state & TASK_FREEZABLE)
|
|
WARN_ON_ONCE(!(state & TASK_NORMAL));
|
|
|
|
#ifdef CONFIG_LOCKDEP
|
|
/*
|
|
* It's dangerous to freeze with locks held; there be dragons there.
|
|
*/
|
|
if (!(state & __TASK_FREEZABLE_UNSAFE))
|
|
WARN_ON_ONCE(debug_locks && p->lockdep_depth);
|
|
#endif
|
|
|
|
p->saved_state = p->__state;
|
|
WRITE_ONCE(p->__state, TASK_FROZEN);
|
|
return TASK_FROZEN;
|
|
}
|
|
|
|
static bool __freeze_task(struct task_struct *p)
|
|
{
|
|
/* TASK_FREEZABLE|TASK_STOPPED|TASK_TRACED -> TASK_FROZEN */
|
|
return task_call_func(p, __set_task_frozen, NULL);
|
|
}
|
|
|
|
/**
|
|
* freeze_task - send a freeze request to given task
|
|
* @p: task to send the request to
|
|
*
|
|
* If @p is freezing, the freeze request is sent either by sending a fake
|
|
* signal (if it's not a kernel thread) or waking it up (if it's a kernel
|
|
* thread).
|
|
*
|
|
* RETURNS:
|
|
* %false, if @p is not freezing or already frozen; %true, otherwise
|
|
*/
|
|
bool freeze_task(struct task_struct *p)
|
|
{
|
|
unsigned long flags;
|
|
|
|
spin_lock_irqsave(&freezer_lock, flags);
|
|
if (!freezing(p) || frozen(p) || __freeze_task(p)) {
|
|
spin_unlock_irqrestore(&freezer_lock, flags);
|
|
return false;
|
|
}
|
|
|
|
if (!(p->flags & PF_KTHREAD))
|
|
fake_signal_wake_up(p);
|
|
else
|
|
wake_up_state(p, TASK_NORMAL);
|
|
|
|
spin_unlock_irqrestore(&freezer_lock, flags);
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* Restore the saved_state before the task entered freezer. For typical task
|
|
* in the __refrigerator(), saved_state == TASK_RUNNING so nothing happens
|
|
* here. For tasks which were TASK_NORMAL | TASK_FREEZABLE, their initial state
|
|
* is restored unless they got an expected wakeup (see ttwu_state_match()).
|
|
* Returns 1 if the task state was restored.
|
|
*/
|
|
static int __restore_freezer_state(struct task_struct *p, void *arg)
|
|
{
|
|
unsigned int state = p->saved_state;
|
|
|
|
if (state != TASK_RUNNING) {
|
|
WRITE_ONCE(p->__state, state);
|
|
p->saved_state = TASK_RUNNING;
|
|
return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void __thaw_task(struct task_struct *p)
|
|
{
|
|
guard(spinlock_irqsave)(&freezer_lock);
|
|
if (frozen(p) && !task_call_func(p, __restore_freezer_state, NULL))
|
|
wake_up_state(p, TASK_FROZEN);
|
|
}
|
|
|
|
/*
|
|
* thaw_process - Thaw a frozen process
|
|
* @p: the process to be thawed
|
|
*
|
|
* Iterate over all threads of @p and call __thaw_task() on each.
|
|
*/
|
|
void thaw_process(struct task_struct *p)
|
|
{
|
|
struct task_struct *t;
|
|
|
|
rcu_read_lock();
|
|
for_each_thread(p, t) {
|
|
__thaw_task(t);
|
|
}
|
|
rcu_read_unlock();
|
|
}
|
|
|
|
/**
|
|
* set_freezable - make %current freezable
|
|
*
|
|
* Mark %current freezable and enter refrigerator if necessary.
|
|
*/
|
|
bool set_freezable(void)
|
|
{
|
|
might_sleep();
|
|
|
|
/*
|
|
* Modify flags while holding freezer_lock. This ensures the
|
|
* freezer notices that we aren't frozen yet or the freezing
|
|
* condition is visible to try_to_freeze() below.
|
|
*/
|
|
spin_lock_irq(&freezer_lock);
|
|
current->flags &= ~PF_NOFREEZE;
|
|
spin_unlock_irq(&freezer_lock);
|
|
|
|
return try_to_freeze();
|
|
}
|
|
EXPORT_SYMBOL(set_freezable);
|