mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	bpf: Optimize program stats
Move bpf_prog_stats from prog->aux into prog to avoid one extra load in critical path of program execution. Signed-off-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Andrii Nakryiko <andrii@kernel.org> Link: https://lore.kernel.org/bpf/20210210033634.62081-2-alexei.starovoitov@gmail.com
This commit is contained in:
		
							parent
							
								
									6df8fb8330
								
							
						
					
					
						commit
						700d4796ef
					
				
					 6 changed files with 18 additions and 18 deletions
				
			
		| 
						 | 
					@ -14,7 +14,6 @@
 | 
				
			||||||
#include <linux/numa.h>
 | 
					#include <linux/numa.h>
 | 
				
			||||||
#include <linux/mm_types.h>
 | 
					#include <linux/mm_types.h>
 | 
				
			||||||
#include <linux/wait.h>
 | 
					#include <linux/wait.h>
 | 
				
			||||||
#include <linux/u64_stats_sync.h>
 | 
					 | 
				
			||||||
#include <linux/refcount.h>
 | 
					#include <linux/refcount.h>
 | 
				
			||||||
#include <linux/mutex.h>
 | 
					#include <linux/mutex.h>
 | 
				
			||||||
#include <linux/module.h>
 | 
					#include <linux/module.h>
 | 
				
			||||||
| 
						 | 
					@ -507,12 +506,6 @@ enum bpf_cgroup_storage_type {
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
#define MAX_BPF_FUNC_ARGS 12
 | 
					#define MAX_BPF_FUNC_ARGS 12
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct bpf_prog_stats {
 | 
					 | 
				
			||||||
	u64 cnt;
 | 
					 | 
				
			||||||
	u64 nsecs;
 | 
					 | 
				
			||||||
	struct u64_stats_sync syncp;
 | 
					 | 
				
			||||||
} __aligned(2 * sizeof(u64));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
struct btf_func_model {
 | 
					struct btf_func_model {
 | 
				
			||||||
	u8 ret_size;
 | 
						u8 ret_size;
 | 
				
			||||||
	u8 nr_args;
 | 
						u8 nr_args;
 | 
				
			||||||
| 
						 | 
					@ -845,7 +838,6 @@ struct bpf_prog_aux {
 | 
				
			||||||
	u32 linfo_idx;
 | 
						u32 linfo_idx;
 | 
				
			||||||
	u32 num_exentries;
 | 
						u32 num_exentries;
 | 
				
			||||||
	struct exception_table_entry *extable;
 | 
						struct exception_table_entry *extable;
 | 
				
			||||||
	struct bpf_prog_stats __percpu *stats;
 | 
					 | 
				
			||||||
	union {
 | 
						union {
 | 
				
			||||||
		struct work_struct work;
 | 
							struct work_struct work;
 | 
				
			||||||
		struct rcu_head	rcu;
 | 
							struct rcu_head	rcu;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -22,6 +22,7 @@
 | 
				
			||||||
#include <linux/vmalloc.h>
 | 
					#include <linux/vmalloc.h>
 | 
				
			||||||
#include <linux/sockptr.h>
 | 
					#include <linux/sockptr.h>
 | 
				
			||||||
#include <crypto/sha1.h>
 | 
					#include <crypto/sha1.h>
 | 
				
			||||||
 | 
					#include <linux/u64_stats_sync.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <net/sch_generic.h>
 | 
					#include <net/sch_generic.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -539,6 +540,12 @@ struct bpf_binary_header {
 | 
				
			||||||
	u8 image[] __aligned(BPF_IMAGE_ALIGNMENT);
 | 
						u8 image[] __aligned(BPF_IMAGE_ALIGNMENT);
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					struct bpf_prog_stats {
 | 
				
			||||||
 | 
						u64 cnt;
 | 
				
			||||||
 | 
						u64 nsecs;
 | 
				
			||||||
 | 
						struct u64_stats_sync syncp;
 | 
				
			||||||
 | 
					} __aligned(2 * sizeof(u64));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct bpf_prog {
 | 
					struct bpf_prog {
 | 
				
			||||||
	u16			pages;		/* Number of allocated pages */
 | 
						u16			pages;		/* Number of allocated pages */
 | 
				
			||||||
	u16			jited:1,	/* Is our filter JIT'ed? */
 | 
						u16			jited:1,	/* Is our filter JIT'ed? */
 | 
				
			||||||
| 
						 | 
					@ -557,10 +564,11 @@ struct bpf_prog {
 | 
				
			||||||
	u32			len;		/* Number of filter blocks */
 | 
						u32			len;		/* Number of filter blocks */
 | 
				
			||||||
	u32			jited_len;	/* Size of jited insns in bytes */
 | 
						u32			jited_len;	/* Size of jited insns in bytes */
 | 
				
			||||||
	u8			tag[BPF_TAG_SIZE];
 | 
						u8			tag[BPF_TAG_SIZE];
 | 
				
			||||||
	struct bpf_prog_aux	*aux;		/* Auxiliary fields */
 | 
						struct bpf_prog_stats __percpu *stats;
 | 
				
			||||||
	struct sock_fprog_kern	*orig_prog;	/* Original BPF program */
 | 
					 | 
				
			||||||
	unsigned int		(*bpf_func)(const void *ctx,
 | 
						unsigned int		(*bpf_func)(const void *ctx,
 | 
				
			||||||
					    const struct bpf_insn *insn);
 | 
										    const struct bpf_insn *insn);
 | 
				
			||||||
 | 
						struct bpf_prog_aux	*aux;		/* Auxiliary fields */
 | 
				
			||||||
 | 
						struct sock_fprog_kern	*orig_prog;	/* Original BPF program */
 | 
				
			||||||
	/* Instructions for interpreter */
 | 
						/* Instructions for interpreter */
 | 
				
			||||||
	struct sock_filter	insns[0];
 | 
						struct sock_filter	insns[0];
 | 
				
			||||||
	struct bpf_insn		insnsi[];
 | 
						struct bpf_insn		insnsi[];
 | 
				
			||||||
| 
						 | 
					@ -581,7 +589,7 @@ DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
 | 
				
			||||||
		struct bpf_prog_stats *__stats;				\
 | 
							struct bpf_prog_stats *__stats;				\
 | 
				
			||||||
		u64 __start = sched_clock();				\
 | 
							u64 __start = sched_clock();				\
 | 
				
			||||||
		__ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func);	\
 | 
							__ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func);	\
 | 
				
			||||||
		__stats = this_cpu_ptr(prog->aux->stats);		\
 | 
							__stats = this_cpu_ptr(prog->stats);			\
 | 
				
			||||||
		u64_stats_update_begin(&__stats->syncp);		\
 | 
							u64_stats_update_begin(&__stats->syncp);		\
 | 
				
			||||||
		__stats->cnt++;						\
 | 
							__stats->cnt++;						\
 | 
				
			||||||
		__stats->nsecs += sched_clock() - __start;		\
 | 
							__stats->nsecs += sched_clock() - __start;		\
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -114,8 +114,8 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
 | 
				
			||||||
	if (!prog)
 | 
						if (!prog)
 | 
				
			||||||
		return NULL;
 | 
							return NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	prog->aux->stats = alloc_percpu_gfp(struct bpf_prog_stats, gfp_flags);
 | 
						prog->stats = alloc_percpu_gfp(struct bpf_prog_stats, gfp_flags);
 | 
				
			||||||
	if (!prog->aux->stats) {
 | 
						if (!prog->stats) {
 | 
				
			||||||
		kfree(prog->aux);
 | 
							kfree(prog->aux);
 | 
				
			||||||
		vfree(prog);
 | 
							vfree(prog);
 | 
				
			||||||
		return NULL;
 | 
							return NULL;
 | 
				
			||||||
| 
						 | 
					@ -124,7 +124,7 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
 | 
				
			||||||
	for_each_possible_cpu(cpu) {
 | 
						for_each_possible_cpu(cpu) {
 | 
				
			||||||
		struct bpf_prog_stats *pstats;
 | 
							struct bpf_prog_stats *pstats;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		pstats = per_cpu_ptr(prog->aux->stats, cpu);
 | 
							pstats = per_cpu_ptr(prog->stats, cpu);
 | 
				
			||||||
		u64_stats_init(&pstats->syncp);
 | 
							u64_stats_init(&pstats->syncp);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	return prog;
 | 
						return prog;
 | 
				
			||||||
| 
						 | 
					@ -249,10 +249,10 @@ void __bpf_prog_free(struct bpf_prog *fp)
 | 
				
			||||||
	if (fp->aux) {
 | 
						if (fp->aux) {
 | 
				
			||||||
		mutex_destroy(&fp->aux->used_maps_mutex);
 | 
							mutex_destroy(&fp->aux->used_maps_mutex);
 | 
				
			||||||
		mutex_destroy(&fp->aux->dst_mutex);
 | 
							mutex_destroy(&fp->aux->dst_mutex);
 | 
				
			||||||
		free_percpu(fp->aux->stats);
 | 
					 | 
				
			||||||
		kfree(fp->aux->poke_tab);
 | 
							kfree(fp->aux->poke_tab);
 | 
				
			||||||
		kfree(fp->aux);
 | 
							kfree(fp->aux);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
						free_percpu(fp->stats);
 | 
				
			||||||
	vfree(fp);
 | 
						vfree(fp);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1739,7 +1739,7 @@ static void bpf_prog_get_stats(const struct bpf_prog *prog,
 | 
				
			||||||
		unsigned int start;
 | 
							unsigned int start;
 | 
				
			||||||
		u64 tnsecs, tcnt;
 | 
							u64 tnsecs, tcnt;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		st = per_cpu_ptr(prog->aux->stats, cpu);
 | 
							st = per_cpu_ptr(prog->stats, cpu);
 | 
				
			||||||
		do {
 | 
							do {
 | 
				
			||||||
			start = u64_stats_fetch_begin_irq(&st->syncp);
 | 
								start = u64_stats_fetch_begin_irq(&st->syncp);
 | 
				
			||||||
			tnsecs = st->nsecs;
 | 
								tnsecs = st->nsecs;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -412,7 +412,7 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
 | 
				
			||||||
	     * Hence check that 'start' is not zero.
 | 
						     * Hence check that 'start' is not zero.
 | 
				
			||||||
	     */
 | 
						     */
 | 
				
			||||||
	    start) {
 | 
						    start) {
 | 
				
			||||||
		stats = this_cpu_ptr(prog->aux->stats);
 | 
							stats = this_cpu_ptr(prog->stats);
 | 
				
			||||||
		u64_stats_update_begin(&stats->syncp);
 | 
							u64_stats_update_begin(&stats->syncp);
 | 
				
			||||||
		stats->cnt++;
 | 
							stats->cnt++;
 | 
				
			||||||
		stats->nsecs += sched_clock() - start;
 | 
							stats->nsecs += sched_clock() - start;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -11253,7 +11253,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
 | 
				
			||||||
		/* BPF_PROG_RUN doesn't call subprogs directly,
 | 
							/* BPF_PROG_RUN doesn't call subprogs directly,
 | 
				
			||||||
		 * hence main prog stats include the runtime of subprogs.
 | 
							 * hence main prog stats include the runtime of subprogs.
 | 
				
			||||||
		 * subprogs don't have IDs and not reachable via prog_get_next_id
 | 
							 * subprogs don't have IDs and not reachable via prog_get_next_id
 | 
				
			||||||
		 * func[i]->aux->stats will never be accessed and stays NULL
 | 
							 * func[i]->stats will never be accessed and stays NULL
 | 
				
			||||||
		 */
 | 
							 */
 | 
				
			||||||
		func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
 | 
							func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
 | 
				
			||||||
		if (!func[i])
 | 
							if (!func[i])
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue