mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-03 18:20:25 +02:00 
			
		
		
		
	This commit introduced per-cpu cgroup local storage. Per-cpu cgroup local storage is very similar to simple cgroup storage (let's call it shared), except all the data is per-cpu. The main goal of per-cpu variant is to implement super fast counters (e.g. packet counters), which don't require neither lookups, neither atomic operations. >From userspace's point of view, accessing a per-cpu cgroup storage is similar to other per-cpu map types (e.g. per-cpu hashmaps and arrays). Writing to a per-cpu cgroup storage is not atomic, but is performed by copying longs, so some minimal atomicity is here, exactly as with other per-cpu maps. Signed-off-by: Roman Gushchin <guro@fb.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: Alexei Starovoitov <ast@kernel.org> Acked-by: Song Liu <songliubraving@fb.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
		
			
				
	
	
		
			229 lines
		
	
	
	
		
			5.7 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			229 lines
		
	
	
	
		
			5.7 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
 | 
						|
 *
 | 
						|
 * This program is free software; you can redistribute it and/or
 | 
						|
 * modify it under the terms of version 2 of the GNU General Public
 | 
						|
 * License as published by the Free Software Foundation.
 | 
						|
 *
 | 
						|
 * This program is distributed in the hope that it will be useful, but
 | 
						|
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 | 
						|
 * General Public License for more details.
 | 
						|
 */
 | 
						|
#include <linux/bpf.h>
 | 
						|
#include <linux/rcupdate.h>
 | 
						|
#include <linux/random.h>
 | 
						|
#include <linux/smp.h>
 | 
						|
#include <linux/topology.h>
 | 
						|
#include <linux/ktime.h>
 | 
						|
#include <linux/sched.h>
 | 
						|
#include <linux/uidgid.h>
 | 
						|
#include <linux/filter.h>
 | 
						|
 | 
						|
/* If kernel subsystem is allowing eBPF programs to call this function,
 | 
						|
 * inside its own verifier_ops->get_func_proto() callback it should return
 | 
						|
 * bpf_map_lookup_elem_proto, so that verifier can properly check the arguments
 | 
						|
 *
 | 
						|
 * Different map implementations will rely on rcu in map methods
 | 
						|
 * lookup/update/delete, therefore eBPF programs must run under rcu lock
 | 
						|
 * if program is allowed to access maps, so check rcu_read_lock_held in
 | 
						|
 * all three functions.
 | 
						|
 */
 | 
						|
BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key)
 | 
						|
{
 | 
						|
	WARN_ON_ONCE(!rcu_read_lock_held());
 | 
						|
	return (unsigned long) map->ops->map_lookup_elem(map, key);
 | 
						|
}
 | 
						|
 | 
						|
const struct bpf_func_proto bpf_map_lookup_elem_proto = {
 | 
						|
	.func		= bpf_map_lookup_elem,
 | 
						|
	.gpl_only	= false,
 | 
						|
	.pkt_access	= true,
 | 
						|
	.ret_type	= RET_PTR_TO_MAP_VALUE_OR_NULL,
 | 
						|
	.arg1_type	= ARG_CONST_MAP_PTR,
 | 
						|
	.arg2_type	= ARG_PTR_TO_MAP_KEY,
 | 
						|
};
 | 
						|
 | 
						|
BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key,
 | 
						|
	   void *, value, u64, flags)
 | 
						|
{
 | 
						|
	WARN_ON_ONCE(!rcu_read_lock_held());
 | 
						|
	return map->ops->map_update_elem(map, key, value, flags);
 | 
						|
}
 | 
						|
 | 
						|
const struct bpf_func_proto bpf_map_update_elem_proto = {
 | 
						|
	.func		= bpf_map_update_elem,
 | 
						|
	.gpl_only	= false,
 | 
						|
	.pkt_access	= true,
 | 
						|
	.ret_type	= RET_INTEGER,
 | 
						|
	.arg1_type	= ARG_CONST_MAP_PTR,
 | 
						|
	.arg2_type	= ARG_PTR_TO_MAP_KEY,
 | 
						|
	.arg3_type	= ARG_PTR_TO_MAP_VALUE,
 | 
						|
	.arg4_type	= ARG_ANYTHING,
 | 
						|
};
 | 
						|
 | 
						|
BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key)
 | 
						|
{
 | 
						|
	WARN_ON_ONCE(!rcu_read_lock_held());
 | 
						|
	return map->ops->map_delete_elem(map, key);
 | 
						|
}
 | 
						|
 | 
						|
const struct bpf_func_proto bpf_map_delete_elem_proto = {
 | 
						|
	.func		= bpf_map_delete_elem,
 | 
						|
	.gpl_only	= false,
 | 
						|
	.pkt_access	= true,
 | 
						|
	.ret_type	= RET_INTEGER,
 | 
						|
	.arg1_type	= ARG_CONST_MAP_PTR,
 | 
						|
	.arg2_type	= ARG_PTR_TO_MAP_KEY,
 | 
						|
};
 | 
						|
 | 
						|
const struct bpf_func_proto bpf_get_prandom_u32_proto = {
 | 
						|
	.func		= bpf_user_rnd_u32,
 | 
						|
	.gpl_only	= false,
 | 
						|
	.ret_type	= RET_INTEGER,
 | 
						|
};
 | 
						|
 | 
						|
BPF_CALL_0(bpf_get_smp_processor_id)
 | 
						|
{
 | 
						|
	return smp_processor_id();
 | 
						|
}
 | 
						|
 | 
						|
const struct bpf_func_proto bpf_get_smp_processor_id_proto = {
 | 
						|
	.func		= bpf_get_smp_processor_id,
 | 
						|
	.gpl_only	= false,
 | 
						|
	.ret_type	= RET_INTEGER,
 | 
						|
};
 | 
						|
 | 
						|
BPF_CALL_0(bpf_get_numa_node_id)
 | 
						|
{
 | 
						|
	return numa_node_id();
 | 
						|
}
 | 
						|
 | 
						|
const struct bpf_func_proto bpf_get_numa_node_id_proto = {
 | 
						|
	.func		= bpf_get_numa_node_id,
 | 
						|
	.gpl_only	= false,
 | 
						|
	.ret_type	= RET_INTEGER,
 | 
						|
};
 | 
						|
 | 
						|
BPF_CALL_0(bpf_ktime_get_ns)
 | 
						|
{
 | 
						|
	/* NMI safe access to clock monotonic */
 | 
						|
	return ktime_get_mono_fast_ns();
 | 
						|
}
 | 
						|
 | 
						|
const struct bpf_func_proto bpf_ktime_get_ns_proto = {
 | 
						|
	.func		= bpf_ktime_get_ns,
 | 
						|
	.gpl_only	= true,
 | 
						|
	.ret_type	= RET_INTEGER,
 | 
						|
};
 | 
						|
 | 
						|
BPF_CALL_0(bpf_get_current_pid_tgid)
 | 
						|
{
 | 
						|
	struct task_struct *task = current;
 | 
						|
 | 
						|
	if (unlikely(!task))
 | 
						|
		return -EINVAL;
 | 
						|
 | 
						|
	return (u64) task->tgid << 32 | task->pid;
 | 
						|
}
 | 
						|
 | 
						|
const struct bpf_func_proto bpf_get_current_pid_tgid_proto = {
 | 
						|
	.func		= bpf_get_current_pid_tgid,
 | 
						|
	.gpl_only	= false,
 | 
						|
	.ret_type	= RET_INTEGER,
 | 
						|
};
 | 
						|
 | 
						|
BPF_CALL_0(bpf_get_current_uid_gid)
 | 
						|
{
 | 
						|
	struct task_struct *task = current;
 | 
						|
	kuid_t uid;
 | 
						|
	kgid_t gid;
 | 
						|
 | 
						|
	if (unlikely(!task))
 | 
						|
		return -EINVAL;
 | 
						|
 | 
						|
	current_uid_gid(&uid, &gid);
 | 
						|
	return (u64) from_kgid(&init_user_ns, gid) << 32 |
 | 
						|
		     from_kuid(&init_user_ns, uid);
 | 
						|
}
 | 
						|
 | 
						|
const struct bpf_func_proto bpf_get_current_uid_gid_proto = {
 | 
						|
	.func		= bpf_get_current_uid_gid,
 | 
						|
	.gpl_only	= false,
 | 
						|
	.ret_type	= RET_INTEGER,
 | 
						|
};
 | 
						|
 | 
						|
BPF_CALL_2(bpf_get_current_comm, char *, buf, u32, size)
 | 
						|
{
 | 
						|
	struct task_struct *task = current;
 | 
						|
 | 
						|
	if (unlikely(!task))
 | 
						|
		goto err_clear;
 | 
						|
 | 
						|
	strncpy(buf, task->comm, size);
 | 
						|
 | 
						|
	/* Verifier guarantees that size > 0. For task->comm exceeding
 | 
						|
	 * size, guarantee that buf is %NUL-terminated. Unconditionally
 | 
						|
	 * done here to save the size test.
 | 
						|
	 */
 | 
						|
	buf[size - 1] = 0;
 | 
						|
	return 0;
 | 
						|
err_clear:
 | 
						|
	memset(buf, 0, size);
 | 
						|
	return -EINVAL;
 | 
						|
}
 | 
						|
 | 
						|
const struct bpf_func_proto bpf_get_current_comm_proto = {
 | 
						|
	.func		= bpf_get_current_comm,
 | 
						|
	.gpl_only	= false,
 | 
						|
	.ret_type	= RET_INTEGER,
 | 
						|
	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
 | 
						|
	.arg2_type	= ARG_CONST_SIZE,
 | 
						|
};
 | 
						|
 | 
						|
#ifdef CONFIG_CGROUPS
 | 
						|
BPF_CALL_0(bpf_get_current_cgroup_id)
 | 
						|
{
 | 
						|
	struct cgroup *cgrp = task_dfl_cgroup(current);
 | 
						|
 | 
						|
	return cgrp->kn->id.id;
 | 
						|
}
 | 
						|
 | 
						|
const struct bpf_func_proto bpf_get_current_cgroup_id_proto = {
 | 
						|
	.func		= bpf_get_current_cgroup_id,
 | 
						|
	.gpl_only	= false,
 | 
						|
	.ret_type	= RET_INTEGER,
 | 
						|
};
 | 
						|
 | 
						|
#ifdef CONFIG_CGROUP_BPF
 | 
						|
DECLARE_PER_CPU(struct bpf_cgroup_storage*,
 | 
						|
		bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
 | 
						|
 | 
						|
BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
 | 
						|
{
 | 
						|
	/* flags argument is not used now,
 | 
						|
	 * but provides an ability to extend the API.
 | 
						|
	 * verifier checks that its value is correct.
 | 
						|
	 */
 | 
						|
	enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
 | 
						|
	struct bpf_cgroup_storage *storage;
 | 
						|
	void *ptr;
 | 
						|
 | 
						|
	storage = this_cpu_read(bpf_cgroup_storage[stype]);
 | 
						|
 | 
						|
	if (stype == BPF_CGROUP_STORAGE_SHARED)
 | 
						|
		ptr = &READ_ONCE(storage->buf)->data[0];
 | 
						|
	else
 | 
						|
		ptr = this_cpu_ptr(storage->percpu_buf);
 | 
						|
 | 
						|
	return (unsigned long)ptr;
 | 
						|
}
 | 
						|
 | 
						|
const struct bpf_func_proto bpf_get_local_storage_proto = {
 | 
						|
	.func		= bpf_get_local_storage,
 | 
						|
	.gpl_only	= false,
 | 
						|
	.ret_type	= RET_PTR_TO_MAP_VALUE,
 | 
						|
	.arg1_type	= ARG_CONST_MAP_PTR,
 | 
						|
	.arg2_type	= ARG_ANYTHING,
 | 
						|
};
 | 
						|
#endif
 | 
						|
#endif
 |