mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	padata: allocate work structures for parallel jobs from a pool
padata allocates per-CPU, per-instance work structs for parallel jobs.  A
do_parallel call assigns a job to a sequence number and hashes the number
to a CPU, where the job will eventually run using the corresponding work.
This approach fit with how padata used to bind a job to each CPU
round-robin, makes less sense after commit bfde23ce20 ("padata: unbind
parallel jobs from specific CPUs") because a work isn't bound to a
particular CPU anymore, and isn't needed at all for multithreaded jobs
because they don't have sequence numbers.
Replace the per-CPU works with a preallocated pool, which allows sharing
them between existing padata users and the upcoming multithreaded user.
The pool will also facilitate setting NUMA-aware concurrency limits with
later users.
The pool is sized according to the number of possible CPUs.  With this
limit, MAX_OBJ_NUM no longer makes sense, so remove it.
If the global pool is exhausted, a parallel job is run in the current task
instead to throttle a system trying to do too much in parallel.
Signed-off-by: Daniel Jordan <daniel.m.jordan@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Tested-by: Josh Triplett <josh@joshtriplett.org>
Cc: Alexander Duyck <alexander.h.duyck@linux.intel.com>
Cc: Alex Williamson <alex.williamson@redhat.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Kirill Tkhai <ktkhai@virtuozzo.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Robert Elliott <elliott@hpe.com>
Cc: Shile Zhang <shile.zhang@linux.alibaba.com>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Cc: Steven Sistare <steven.sistare@oracle.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Zi Yan <ziy@nvidia.com>
Link: http://lkml.kernel.org/r/20200527173608.2885243-4-daniel.m.jordan@oracle.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
			
			
This commit is contained in:
		
							parent
							
								
									f1b192b117
								
							
						
					
					
						commit
						4611ce2246
					
				
					 2 changed files with 82 additions and 52 deletions
				
			
		| 
						 | 
					@ -24,7 +24,6 @@
 | 
				
			||||||
 * @list: List entry, to attach to the padata lists.
 | 
					 * @list: List entry, to attach to the padata lists.
 | 
				
			||||||
 * @pd: Pointer to the internal control structure.
 | 
					 * @pd: Pointer to the internal control structure.
 | 
				
			||||||
 * @cb_cpu: Callback cpu for serializatioon.
 | 
					 * @cb_cpu: Callback cpu for serializatioon.
 | 
				
			||||||
 * @cpu: Cpu for parallelization.
 | 
					 | 
				
			||||||
 * @seq_nr: Sequence number of the parallelized data object.
 | 
					 * @seq_nr: Sequence number of the parallelized data object.
 | 
				
			||||||
 * @info: Used to pass information from the parallel to the serial function.
 | 
					 * @info: Used to pass information from the parallel to the serial function.
 | 
				
			||||||
 * @parallel: Parallel execution function.
 | 
					 * @parallel: Parallel execution function.
 | 
				
			||||||
| 
						 | 
					@ -34,7 +33,6 @@ struct padata_priv {
 | 
				
			||||||
	struct list_head	list;
 | 
						struct list_head	list;
 | 
				
			||||||
	struct parallel_data	*pd;
 | 
						struct parallel_data	*pd;
 | 
				
			||||||
	int			cb_cpu;
 | 
						int			cb_cpu;
 | 
				
			||||||
	int			cpu;
 | 
					 | 
				
			||||||
	unsigned int		seq_nr;
 | 
						unsigned int		seq_nr;
 | 
				
			||||||
	int			info;
 | 
						int			info;
 | 
				
			||||||
	void                    (*parallel)(struct padata_priv *padata);
 | 
						void                    (*parallel)(struct padata_priv *padata);
 | 
				
			||||||
| 
						 | 
					@ -68,15 +66,11 @@ struct padata_serial_queue {
 | 
				
			||||||
/**
 | 
					/**
 | 
				
			||||||
 * struct padata_parallel_queue - The percpu padata parallel queue
 | 
					 * struct padata_parallel_queue - The percpu padata parallel queue
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * @parallel: List to wait for parallelization.
 | 
					 | 
				
			||||||
 * @reorder: List to wait for reordering after parallel processing.
 | 
					 * @reorder: List to wait for reordering after parallel processing.
 | 
				
			||||||
 * @work: work struct for parallelization.
 | 
					 | 
				
			||||||
 * @num_obj: Number of objects that are processed by this cpu.
 | 
					 * @num_obj: Number of objects that are processed by this cpu.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
struct padata_parallel_queue {
 | 
					struct padata_parallel_queue {
 | 
				
			||||||
       struct padata_list    parallel;
 | 
					 | 
				
			||||||
       struct padata_list    reorder;
 | 
					       struct padata_list    reorder;
 | 
				
			||||||
       struct work_struct    work;
 | 
					 | 
				
			||||||
       atomic_t              num_obj;
 | 
					       atomic_t              num_obj;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -111,7 +105,7 @@ struct parallel_data {
 | 
				
			||||||
	struct padata_parallel_queue	__percpu *pqueue;
 | 
						struct padata_parallel_queue	__percpu *pqueue;
 | 
				
			||||||
	struct padata_serial_queue	__percpu *squeue;
 | 
						struct padata_serial_queue	__percpu *squeue;
 | 
				
			||||||
	atomic_t			refcnt;
 | 
						atomic_t			refcnt;
 | 
				
			||||||
	atomic_t			seq_nr;
 | 
						unsigned int			seq_nr;
 | 
				
			||||||
	unsigned int			processed;
 | 
						unsigned int			processed;
 | 
				
			||||||
	int				cpu;
 | 
						int				cpu;
 | 
				
			||||||
	struct padata_cpumask		cpumask;
 | 
						struct padata_cpumask		cpumask;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										122
									
								
								kernel/padata.c
									
									
									
									
									
								
							
							
						
						
									
										122
									
								
								kernel/padata.c
									
									
									
									
									
								
							| 
						 | 
					@ -32,7 +32,15 @@
 | 
				
			||||||
#include <linux/sysfs.h>
 | 
					#include <linux/sysfs.h>
 | 
				
			||||||
#include <linux/rcupdate.h>
 | 
					#include <linux/rcupdate.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define MAX_OBJ_NUM 1000
 | 
					struct padata_work {
 | 
				
			||||||
 | 
						struct work_struct	pw_work;
 | 
				
			||||||
 | 
						struct list_head	pw_list;  /* padata_free_works linkage */
 | 
				
			||||||
 | 
						void			*pw_data;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static DEFINE_SPINLOCK(padata_works_lock);
 | 
				
			||||||
 | 
					static struct padata_work *padata_works;
 | 
				
			||||||
 | 
					static LIST_HEAD(padata_free_works);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void padata_free_pd(struct parallel_data *pd);
 | 
					static void padata_free_pd(struct parallel_data *pd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -58,30 +66,44 @@ static int padata_cpu_hash(struct parallel_data *pd, unsigned int seq_nr)
 | 
				
			||||||
	return padata_index_to_cpu(pd, cpu_index);
 | 
						return padata_index_to_cpu(pd, cpu_index);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void padata_parallel_worker(struct work_struct *parallel_work)
 | 
					static struct padata_work *padata_work_alloc(void)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct padata_parallel_queue *pqueue;
 | 
						struct padata_work *pw;
 | 
				
			||||||
	LIST_HEAD(local_list);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	local_bh_disable();
 | 
						lockdep_assert_held(&padata_works_lock);
 | 
				
			||||||
	pqueue = container_of(parallel_work,
 | 
					 | 
				
			||||||
			      struct padata_parallel_queue, work);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	spin_lock(&pqueue->parallel.lock);
 | 
						if (list_empty(&padata_free_works))
 | 
				
			||||||
	list_replace_init(&pqueue->parallel.list, &local_list);
 | 
							return NULL;	/* No more work items allowed to be queued. */
 | 
				
			||||||
	spin_unlock(&pqueue->parallel.lock);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	while (!list_empty(&local_list)) {
 | 
						pw = list_first_entry(&padata_free_works, struct padata_work, pw_list);
 | 
				
			||||||
		struct padata_priv *padata;
 | 
						list_del(&pw->pw_list);
 | 
				
			||||||
 | 
						return pw;
 | 
				
			||||||
		padata = list_entry(local_list.next,
 | 
					 | 
				
			||||||
				    struct padata_priv, list);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		list_del_init(&padata->list);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		padata->parallel(padata);
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void padata_work_init(struct padata_work *pw, work_func_t work_fn,
 | 
				
			||||||
 | 
								     void *data)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						INIT_WORK(&pw->pw_work, work_fn);
 | 
				
			||||||
 | 
						pw->pw_data = data;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void padata_work_free(struct padata_work *pw)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						lockdep_assert_held(&padata_works_lock);
 | 
				
			||||||
 | 
						list_add(&pw->pw_list, &padata_free_works);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void padata_parallel_worker(struct work_struct *parallel_work)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct padata_work *pw = container_of(parallel_work, struct padata_work,
 | 
				
			||||||
 | 
										      pw_work);
 | 
				
			||||||
 | 
						struct padata_priv *padata = pw->pw_data;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						local_bh_disable();
 | 
				
			||||||
 | 
						padata->parallel(padata);
 | 
				
			||||||
 | 
						spin_lock(&padata_works_lock);
 | 
				
			||||||
 | 
						padata_work_free(pw);
 | 
				
			||||||
 | 
						spin_unlock(&padata_works_lock);
 | 
				
			||||||
	local_bh_enable();
 | 
						local_bh_enable();
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -105,9 +127,9 @@ int padata_do_parallel(struct padata_shell *ps,
 | 
				
			||||||
		       struct padata_priv *padata, int *cb_cpu)
 | 
							       struct padata_priv *padata, int *cb_cpu)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct padata_instance *pinst = ps->pinst;
 | 
						struct padata_instance *pinst = ps->pinst;
 | 
				
			||||||
	int i, cpu, cpu_index, target_cpu, err;
 | 
						int i, cpu, cpu_index, err;
 | 
				
			||||||
	struct padata_parallel_queue *queue;
 | 
					 | 
				
			||||||
	struct parallel_data *pd;
 | 
						struct parallel_data *pd;
 | 
				
			||||||
 | 
						struct padata_work *pw;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	rcu_read_lock_bh();
 | 
						rcu_read_lock_bh();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -135,25 +157,25 @@ int padata_do_parallel(struct padata_shell *ps,
 | 
				
			||||||
	if ((pinst->flags & PADATA_RESET))
 | 
						if ((pinst->flags & PADATA_RESET))
 | 
				
			||||||
		goto out;
 | 
							goto out;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (atomic_read(&pd->refcnt) >= MAX_OBJ_NUM)
 | 
					 | 
				
			||||||
		goto out;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	err = 0;
 | 
					 | 
				
			||||||
	atomic_inc(&pd->refcnt);
 | 
						atomic_inc(&pd->refcnt);
 | 
				
			||||||
	padata->pd = pd;
 | 
						padata->pd = pd;
 | 
				
			||||||
	padata->cb_cpu = *cb_cpu;
 | 
						padata->cb_cpu = *cb_cpu;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	padata->seq_nr = atomic_inc_return(&pd->seq_nr);
 | 
						rcu_read_unlock_bh();
 | 
				
			||||||
	target_cpu = padata_cpu_hash(pd, padata->seq_nr);
 | 
					 | 
				
			||||||
	padata->cpu = target_cpu;
 | 
					 | 
				
			||||||
	queue = per_cpu_ptr(pd->pqueue, target_cpu);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	spin_lock(&queue->parallel.lock);
 | 
						spin_lock(&padata_works_lock);
 | 
				
			||||||
	list_add_tail(&padata->list, &queue->parallel.list);
 | 
						padata->seq_nr = ++pd->seq_nr;
 | 
				
			||||||
	spin_unlock(&queue->parallel.lock);
 | 
						pw = padata_work_alloc();
 | 
				
			||||||
 | 
						spin_unlock(&padata_works_lock);
 | 
				
			||||||
	queue_work(pinst->parallel_wq, &queue->work);
 | 
						if (pw) {
 | 
				
			||||||
 | 
							padata_work_init(pw, padata_parallel_worker, padata);
 | 
				
			||||||
 | 
							queue_work(pinst->parallel_wq, &pw->pw_work);
 | 
				
			||||||
 | 
						} else {
 | 
				
			||||||
 | 
							/* Maximum works limit exceeded, run in the current task. */
 | 
				
			||||||
 | 
							padata->parallel(padata);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
out:
 | 
					out:
 | 
				
			||||||
	rcu_read_unlock_bh();
 | 
						rcu_read_unlock_bh();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -324,8 +346,9 @@ static void padata_serial_worker(struct work_struct *serial_work)
 | 
				
			||||||
void padata_do_serial(struct padata_priv *padata)
 | 
					void padata_do_serial(struct padata_priv *padata)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct parallel_data *pd = padata->pd;
 | 
						struct parallel_data *pd = padata->pd;
 | 
				
			||||||
 | 
						int hashed_cpu = padata_cpu_hash(pd, padata->seq_nr);
 | 
				
			||||||
	struct padata_parallel_queue *pqueue = per_cpu_ptr(pd->pqueue,
 | 
						struct padata_parallel_queue *pqueue = per_cpu_ptr(pd->pqueue,
 | 
				
			||||||
							   padata->cpu);
 | 
												   hashed_cpu);
 | 
				
			||||||
	struct padata_priv *cur;
 | 
						struct padata_priv *cur;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	spin_lock(&pqueue->reorder.lock);
 | 
						spin_lock(&pqueue->reorder.lock);
 | 
				
			||||||
| 
						 | 
					@ -416,8 +439,6 @@ static void padata_init_pqueues(struct parallel_data *pd)
 | 
				
			||||||
		pqueue = per_cpu_ptr(pd->pqueue, cpu);
 | 
							pqueue = per_cpu_ptr(pd->pqueue, cpu);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		__padata_list_init(&pqueue->reorder);
 | 
							__padata_list_init(&pqueue->reorder);
 | 
				
			||||||
		__padata_list_init(&pqueue->parallel);
 | 
					 | 
				
			||||||
		INIT_WORK(&pqueue->work, padata_parallel_worker);
 | 
					 | 
				
			||||||
		atomic_set(&pqueue->num_obj, 0);
 | 
							atomic_set(&pqueue->num_obj, 0);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -451,7 +472,7 @@ static struct parallel_data *padata_alloc_pd(struct padata_shell *ps)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	padata_init_pqueues(pd);
 | 
						padata_init_pqueues(pd);
 | 
				
			||||||
	padata_init_squeues(pd);
 | 
						padata_init_squeues(pd);
 | 
				
			||||||
	atomic_set(&pd->seq_nr, -1);
 | 
						pd->seq_nr = -1;
 | 
				
			||||||
	atomic_set(&pd->refcnt, 1);
 | 
						atomic_set(&pd->refcnt, 1);
 | 
				
			||||||
	spin_lock_init(&pd->lock);
 | 
						spin_lock_init(&pd->lock);
 | 
				
			||||||
	pd->cpu = cpumask_first(pd->cpumask.pcpu);
 | 
						pd->cpu = cpumask_first(pd->cpumask.pcpu);
 | 
				
			||||||
| 
						 | 
					@ -1053,6 +1074,7 @@ EXPORT_SYMBOL(padata_free_shell);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void __init padata_init(void)
 | 
					void __init padata_init(void)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
						unsigned int i, possible_cpus;
 | 
				
			||||||
#ifdef CONFIG_HOTPLUG_CPU
 | 
					#ifdef CONFIG_HOTPLUG_CPU
 | 
				
			||||||
	int ret;
 | 
						int ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1064,13 +1086,27 @@ void __init padata_init(void)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ret = cpuhp_setup_state_multi(CPUHP_PADATA_DEAD, "padata:dead",
 | 
						ret = cpuhp_setup_state_multi(CPUHP_PADATA_DEAD, "padata:dead",
 | 
				
			||||||
				      NULL, padata_cpu_dead);
 | 
									      NULL, padata_cpu_dead);
 | 
				
			||||||
	if (ret < 0) {
 | 
						if (ret < 0)
 | 
				
			||||||
		cpuhp_remove_multi_state(hp_online);
 | 
							goto remove_online_state;
 | 
				
			||||||
		goto err;
 | 
					#endif
 | 
				
			||||||
	}
 | 
					
 | 
				
			||||||
 | 
						possible_cpus = num_possible_cpus();
 | 
				
			||||||
 | 
						padata_works = kmalloc_array(possible_cpus, sizeof(struct padata_work),
 | 
				
			||||||
 | 
									     GFP_KERNEL);
 | 
				
			||||||
 | 
						if (!padata_works)
 | 
				
			||||||
 | 
							goto remove_dead_state;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for (i = 0; i < possible_cpus; ++i)
 | 
				
			||||||
 | 
							list_add(&padata_works[i].pw_list, &padata_free_works);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return;
 | 
						return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					remove_dead_state:
 | 
				
			||||||
 | 
					#ifdef CONFIG_HOTPLUG_CPU
 | 
				
			||||||
 | 
						cpuhp_remove_multi_state(CPUHP_PADATA_DEAD);
 | 
				
			||||||
 | 
					remove_online_state:
 | 
				
			||||||
 | 
						cpuhp_remove_multi_state(hp_online);
 | 
				
			||||||
err:
 | 
					err:
 | 
				
			||||||
	pr_warn("padata: initialization failed\n");
 | 
					 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
						pr_warn("padata: initialization failed\n");
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue