forked from mirrors/linux
		
	mm: mmap_lock: use local locks instead of disabling preemption
mmap_lock will explicitly disable/enable preemption upon manipulating its
local CPU variables.  This is to be expected, but in this case, it doesn't
play well with PREEMPT_RT.  The preemption disabled code section also
takes a spin-lock.  Spin-locks in RT systems will try to schedule, which
is exactly what we're trying to avoid.
To mitigate this, convert the explicit preemption handling to local_locks.
Which are RT aware, and will disable migration instead of preemption when
PREEMPT_RT=y.
The faulty call trace looks like the following:
    __mmap_lock_do_trace_*()
      preempt_disable()
      get_mm_memcg_path()
        cgroup_path()
          kernfs_path_from_node()
            spin_lock_irqsave() /* Scheduling while atomic! */
Link: https://lkml.kernel.org/r/20210604163506.2103900-1-nsaenzju@redhat.com
Fixes: 2b5067a814 ("mm: mmap_lock: add tracepoints around lock acquisition ")
Signed-off-by: Nicolas Saenz Julienne <nsaenzju@redhat.com>
Tested-by: Axel Rasmussen <axelrasmussen@google.com>
Reviewed-by: Axel Rasmussen <axelrasmussen@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
			
			
This commit is contained in:
		
							parent
							
								
									65ac1a60a5
								
							
						
					
					
						commit
						832b507253
					
				
					 1 changed files with 22 additions and 11 deletions
				
			
		|  | @ -11,6 +11,7 @@ | |||
| #include <linux/rcupdate.h> | ||||
| #include <linux/smp.h> | ||||
| #include <linux/trace_events.h> | ||||
| #include <linux/local_lock.h> | ||||
| 
 | ||||
| EXPORT_TRACEPOINT_SYMBOL(mmap_lock_start_locking); | ||||
| EXPORT_TRACEPOINT_SYMBOL(mmap_lock_acquire_returned); | ||||
|  | @ -39,21 +40,30 @@ static int reg_refcount; /* Protected by reg_lock. */ | |||
|  */ | ||||
| #define CONTEXT_COUNT 4 | ||||
| 
 | ||||
| static DEFINE_PER_CPU(char __rcu *, memcg_path_buf); | ||||
| struct memcg_path { | ||||
| 	local_lock_t lock; | ||||
| 	char __rcu *buf; | ||||
| 	local_t buf_idx; | ||||
| }; | ||||
| static DEFINE_PER_CPU(struct memcg_path, memcg_paths) = { | ||||
| 	.lock = INIT_LOCAL_LOCK(lock), | ||||
| 	.buf_idx = LOCAL_INIT(0), | ||||
| }; | ||||
| 
 | ||||
| static char **tmp_bufs; | ||||
| static DEFINE_PER_CPU(int, memcg_path_buf_idx); | ||||
| 
 | ||||
| /* Called with reg_lock held. */ | ||||
| static void free_memcg_path_bufs(void) | ||||
| { | ||||
| 	struct memcg_path *memcg_path; | ||||
| 	int cpu; | ||||
| 	char **old = tmp_bufs; | ||||
| 
 | ||||
| 	for_each_possible_cpu(cpu) { | ||||
| 		*(old++) = rcu_dereference_protected( | ||||
| 			per_cpu(memcg_path_buf, cpu), | ||||
| 		memcg_path = per_cpu_ptr(&memcg_paths, cpu); | ||||
| 		*(old++) = rcu_dereference_protected(memcg_path->buf, | ||||
| 			lockdep_is_held(®_lock)); | ||||
| 		rcu_assign_pointer(per_cpu(memcg_path_buf, cpu), NULL); | ||||
| 		rcu_assign_pointer(memcg_path->buf, NULL); | ||||
| 	} | ||||
| 
 | ||||
| 	/* Wait for inflight memcg_path_buf users to finish. */ | ||||
|  | @ -88,7 +98,7 @@ int trace_mmap_lock_reg(void) | |||
| 		new = kmalloc(MEMCG_PATH_BUF_SIZE * CONTEXT_COUNT, GFP_KERNEL); | ||||
| 		if (new == NULL) | ||||
| 			goto out_fail_free; | ||||
| 		rcu_assign_pointer(per_cpu(memcg_path_buf, cpu), new); | ||||
| 		rcu_assign_pointer(per_cpu_ptr(&memcg_paths, cpu)->buf, new); | ||||
| 		/* Don't need to wait for inflights, they'd have gotten NULL. */ | ||||
| 	} | ||||
| 
 | ||||
|  | @ -122,23 +132,24 @@ void trace_mmap_lock_unreg(void) | |||
| 
 | ||||
| static inline char *get_memcg_path_buf(void) | ||||
| { | ||||
| 	struct memcg_path *memcg_path = this_cpu_ptr(&memcg_paths); | ||||
| 	char *buf; | ||||
| 	int idx; | ||||
| 
 | ||||
| 	rcu_read_lock(); | ||||
| 	buf = rcu_dereference(*this_cpu_ptr(&memcg_path_buf)); | ||||
| 	buf = rcu_dereference(memcg_path->buf); | ||||
| 	if (buf == NULL) { | ||||
| 		rcu_read_unlock(); | ||||
| 		return NULL; | ||||
| 	} | ||||
| 	idx = this_cpu_add_return(memcg_path_buf_idx, MEMCG_PATH_BUF_SIZE) - | ||||
| 	idx = local_add_return(MEMCG_PATH_BUF_SIZE, &memcg_path->buf_idx) - | ||||
| 	      MEMCG_PATH_BUF_SIZE; | ||||
| 	return &buf[idx]; | ||||
| } | ||||
| 
 | ||||
| static inline void put_memcg_path_buf(void) | ||||
| { | ||||
| 	this_cpu_sub(memcg_path_buf_idx, MEMCG_PATH_BUF_SIZE); | ||||
| 	local_sub(MEMCG_PATH_BUF_SIZE, &this_cpu_ptr(&memcg_paths)->buf_idx); | ||||
| 	rcu_read_unlock(); | ||||
| } | ||||
| 
 | ||||
|  | @ -179,14 +190,14 @@ static const char *get_mm_memcg_path(struct mm_struct *mm) | |||
| #define TRACE_MMAP_LOCK_EVENT(type, mm, ...)                                   \ | ||||
| 	do {                                                                   \ | ||||
| 		const char *memcg_path;                                        \ | ||||
| 		preempt_disable();                                             \ | ||||
| 		local_lock(&memcg_paths.lock);				       \ | ||||
| 		memcg_path = get_mm_memcg_path(mm);                            \ | ||||
| 		trace_mmap_lock_##type(mm,                                     \ | ||||
| 				       memcg_path != NULL ? memcg_path : "",   \ | ||||
| 				       ##__VA_ARGS__);                         \ | ||||
| 		if (likely(memcg_path != NULL))                                \ | ||||
| 			put_memcg_path_buf();                                  \ | ||||
| 		preempt_enable();                                              \ | ||||
| 		local_unlock(&memcg_paths.lock);			       \ | ||||
| 	} while (0) | ||||
| 
 | ||||
| #else /* !CONFIG_MEMCG */ | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Nicolas Saenz Julienne
						Nicolas Saenz Julienne