linux/kernel/freezer.c
zhongjinji 59d4d36158 mm/oom_kill: thaw the entire OOM victim process
Patch series "Improvements to Victim Process Thawing and OOM Reaper
Traversal Order", v10.

This patch series focuses on optimizing victim process thawing and
refining the traversal order of the OOM reaper.  Since __thaw_task() is
used to thaw a single thread of the victim, thawing only one thread cannot
guarantee the exit of the OOM victim when it is frozen.  Patch 1 thaw the
entire process of the OOM victim to ensure that OOM victims are able to
terminate themselves.  Even if the oom_reaper is delayed, patch 2 is still
beneficial for reaping processes with a large address space footprint, and
it also greatly improves process_mrelease.


This patch (of 10):

OOM killer is a mechanism that selects and kills processes when the system
runs out of memory to reclaim resources and keep the system stable.  But
the oom victim cannot terminate on its own when it is frozen, even if the
OOM victim task is thawed through __thaw_task().  This is because
__thaw_task() can only thaw a single OOM victim thread, and cannot thaw
the entire OOM victim process.

In addition, freezing_slow_path() determines whether a task is an OOM
victim by checking the task's TIF_MEMDIE flag.  When a task is identified
as an OOM victim, the freezer bypasses both PM freezing and cgroup
freezing states to thaw it.

Historically, TIF_MEMDIE was a "this is the oom victim & it has access to
memory reserves" flag in the past.  It has that thread vs.  process
problems and tsk_is_oom_victim was introduced later to get rid of them and
other issues as well as the guarantee that we can identify the oom
victim's mm reliably for other oom_reaper.

Therefore, thaw_process() is introduced to unfreeze all threads within the
OOM victim process, ensuring that every thread is properly thawed.  The
freezer now uses tsk_is_oom_victim() to determine OOM victim status,
allowing all victim threads to be unfrozen as necessary.

With this change, the entire OOM victim process will be thawed when an OOM
event occurs, ensuring that the victim can terminate on its own.

Link: https://lkml.kernel.org/r/20250915162946.5515-1-zhongjinji@honor.com
Link: https://lkml.kernel.org/r/20250915162946.5515-2-zhongjinji@honor.com
Signed-off-by: zhongjinji <zhongjinji@honor.com>
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Len Brown <lenb@kernel.org>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2025-09-21 14:22:35 -07:00

247 lines
5.9 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* kernel/freezer.c - Function to freeze a process
*
* Originally from kernel/power/process.c
*/
#include <linux/interrupt.h>
#include <linux/suspend.h>
#include <linux/export.h>
#include <linux/syscalls.h>
#include <linux/freezer.h>
#include <linux/oom.h>
#include <linux/kthread.h>
/* total number of freezing conditions in effect */
DEFINE_STATIC_KEY_FALSE(freezer_active);
EXPORT_SYMBOL(freezer_active);
/*
* indicate whether PM freezing is in effect, protected by
* system_transition_mutex
*/
bool pm_freezing;
bool pm_nosig_freezing;
/* protects freezing and frozen transitions */
static DEFINE_SPINLOCK(freezer_lock);
/**
* freezing_slow_path - slow path for testing whether a task needs to be frozen
* @p: task to be tested
*
* This function is called by freezing() if freezer_active isn't zero
* and tests whether @p needs to enter and stay in frozen state. Can be
* called under any context. The freezers are responsible for ensuring the
* target tasks see the updated state.
*/
bool freezing_slow_path(struct task_struct *p)
{
if (p->flags & (PF_NOFREEZE | PF_SUSPEND_TASK))
return false;
if (tsk_is_oom_victim(p))
return false;
if (pm_nosig_freezing || cgroup_freezing(p))
return true;
if (pm_freezing && !(p->flags & PF_KTHREAD))
return true;
return false;
}
EXPORT_SYMBOL(freezing_slow_path);
bool frozen(struct task_struct *p)
{
return READ_ONCE(p->__state) & TASK_FROZEN;
}
/* Refrigerator is place where frozen processes are stored :-). */
bool __refrigerator(bool check_kthr_stop)
{
unsigned int state = get_current_state();
bool was_frozen = false;
pr_debug("%s entered refrigerator\n", current->comm);
WARN_ON_ONCE(state && !(state & TASK_NORMAL));
for (;;) {
bool freeze;
raw_spin_lock_irq(&current->pi_lock);
WRITE_ONCE(current->__state, TASK_FROZEN);
/* unstale saved_state so that __thaw_task() will wake us up */
current->saved_state = TASK_RUNNING;
raw_spin_unlock_irq(&current->pi_lock);
spin_lock_irq(&freezer_lock);
freeze = freezing(current) && !(check_kthr_stop && kthread_should_stop());
spin_unlock_irq(&freezer_lock);
if (!freeze)
break;
was_frozen = true;
schedule();
}
__set_current_state(TASK_RUNNING);
pr_debug("%s left refrigerator\n", current->comm);
return was_frozen;
}
EXPORT_SYMBOL(__refrigerator);
static void fake_signal_wake_up(struct task_struct *p)
{
unsigned long flags;
if (lock_task_sighand(p, &flags)) {
signal_wake_up(p, 0);
unlock_task_sighand(p, &flags);
}
}
static int __set_task_frozen(struct task_struct *p, void *arg)
{
unsigned int state = READ_ONCE(p->__state);
/*
* Allow freezing the sched_delayed tasks; they will not execute until
* ttwu() fixes them up, so it is safe to swap their state now, instead
* of waiting for them to get fully dequeued.
*/
if (task_is_runnable(p))
return 0;
if (p != current && task_curr(p))
return 0;
if (!(state & (TASK_FREEZABLE | __TASK_STOPPED | __TASK_TRACED)))
return 0;
/*
* Only TASK_NORMAL can be augmented with TASK_FREEZABLE, since they
* can suffer spurious wakeups.
*/
if (state & TASK_FREEZABLE)
WARN_ON_ONCE(!(state & TASK_NORMAL));
#ifdef CONFIG_LOCKDEP
/*
* It's dangerous to freeze with locks held; there be dragons there.
*/
if (!(state & __TASK_FREEZABLE_UNSAFE))
WARN_ON_ONCE(debug_locks && p->lockdep_depth);
#endif
p->saved_state = p->__state;
WRITE_ONCE(p->__state, TASK_FROZEN);
return TASK_FROZEN;
}
static bool __freeze_task(struct task_struct *p)
{
/* TASK_FREEZABLE|TASK_STOPPED|TASK_TRACED -> TASK_FROZEN */
return task_call_func(p, __set_task_frozen, NULL);
}
/**
* freeze_task - send a freeze request to given task
* @p: task to send the request to
*
* If @p is freezing, the freeze request is sent either by sending a fake
* signal (if it's not a kernel thread) or waking it up (if it's a kernel
* thread).
*
* RETURNS:
* %false, if @p is not freezing or already frozen; %true, otherwise
*/
bool freeze_task(struct task_struct *p)
{
unsigned long flags;
spin_lock_irqsave(&freezer_lock, flags);
if (!freezing(p) || frozen(p) || __freeze_task(p)) {
spin_unlock_irqrestore(&freezer_lock, flags);
return false;
}
if (!(p->flags & PF_KTHREAD))
fake_signal_wake_up(p);
else
wake_up_state(p, TASK_NORMAL);
spin_unlock_irqrestore(&freezer_lock, flags);
return true;
}
/*
* Restore the saved_state before the task entered freezer. For typical task
* in the __refrigerator(), saved_state == TASK_RUNNING so nothing happens
* here. For tasks which were TASK_NORMAL | TASK_FREEZABLE, their initial state
* is restored unless they got an expected wakeup (see ttwu_state_match()).
* Returns 1 if the task state was restored.
*/
static int __restore_freezer_state(struct task_struct *p, void *arg)
{
unsigned int state = p->saved_state;
if (state != TASK_RUNNING) {
WRITE_ONCE(p->__state, state);
p->saved_state = TASK_RUNNING;
return 1;
}
return 0;
}
void __thaw_task(struct task_struct *p)
{
guard(spinlock_irqsave)(&freezer_lock);
if (frozen(p) && !task_call_func(p, __restore_freezer_state, NULL))
wake_up_state(p, TASK_FROZEN);
}
/*
* thaw_process - Thaw a frozen process
* @p: the process to be thawed
*
* Iterate over all threads of @p and call __thaw_task() on each.
*/
void thaw_process(struct task_struct *p)
{
struct task_struct *t;
rcu_read_lock();
for_each_thread(p, t) {
__thaw_task(t);
}
rcu_read_unlock();
}
/**
* set_freezable - make %current freezable
*
* Mark %current freezable and enter refrigerator if necessary.
*/
bool set_freezable(void)
{
might_sleep();
/*
* Modify flags while holding freezer_lock. This ensures the
* freezer notices that we aren't frozen yet or the freezing
* condition is visible to try_to_freeze() below.
*/
spin_lock_irq(&freezer_lock);
current->flags &= ~PF_NOFREEZE;
spin_unlock_irq(&freezer_lock);
return try_to_freeze();
}
EXPORT_SYMBOL(set_freezable);