forked from mirrors/linux
		
	bpf: Add support for writing to nf_conn:mark
Support direct writes to nf_conn:mark from TC and XDP prog types. This is useful when applications want to store per-connection metadata. This is also particularly useful for applications that run both bpf and iptables/nftables because the latter can trivially access this metadata. One example use case would be if a bpf prog is responsible for advanced packet classification and iptables/nftables is later used for routing due to pre-existing/legacy code. Signed-off-by: Daniel Xu <dxu@dxuuu.xyz> Link: https://lore.kernel.org/r/ebca06dea366e3e7e861c12f375a548cc4c61108.1662568410.git.dxu@dxuuu.xyz Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
		
							parent
							
								
									84c6ac417c
								
							
						
					
					
						commit
						864b656f82
					
				
					 4 changed files with 143 additions and 1 deletions
				
			
		| 
						 | 
					@ -3,13 +3,22 @@
 | 
				
			||||||
#ifndef _NF_CONNTRACK_BPF_H
 | 
					#ifndef _NF_CONNTRACK_BPF_H
 | 
				
			||||||
#define _NF_CONNTRACK_BPF_H
 | 
					#define _NF_CONNTRACK_BPF_H
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <linux/bpf.h>
 | 
				
			||||||
#include <linux/btf.h>
 | 
					#include <linux/btf.h>
 | 
				
			||||||
#include <linux/kconfig.h>
 | 
					#include <linux/kconfig.h>
 | 
				
			||||||
 | 
					#include <linux/mutex.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#if (IS_BUILTIN(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \
 | 
					#if (IS_BUILTIN(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \
 | 
				
			||||||
    (IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES))
 | 
					    (IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern int register_nf_conntrack_bpf(void);
 | 
					extern int register_nf_conntrack_bpf(void);
 | 
				
			||||||
 | 
					extern void cleanup_nf_conntrack_bpf(void);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					extern struct mutex nf_conn_btf_access_lock;
 | 
				
			||||||
 | 
					extern int (*nfct_bsa)(struct bpf_verifier_log *log, const struct btf *btf,
 | 
				
			||||||
 | 
							       const struct btf_type *t, int off, int size,
 | 
				
			||||||
 | 
							       enum bpf_access_type atype, u32 *next_btf_id,
 | 
				
			||||||
 | 
							       enum bpf_type_flag *flag);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -18,6 +27,20 @@ static inline int register_nf_conntrack_bpf(void)
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline void cleanup_nf_conntrack_bpf(void)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline int nf_conntrack_btf_struct_access(struct bpf_verifier_log *log,
 | 
				
			||||||
 | 
											 const struct btf *btf,
 | 
				
			||||||
 | 
											 const struct btf_type *t, int off,
 | 
				
			||||||
 | 
											 int size, enum bpf_access_type atype,
 | 
				
			||||||
 | 
											 u32 *next_btf_id,
 | 
				
			||||||
 | 
											 enum bpf_type_flag *flag)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return -EACCES;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif /* _NF_CONNTRACK_BPF_H */
 | 
					#endif /* _NF_CONNTRACK_BPF_H */
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -18,6 +18,7 @@
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <linux/atomic.h>
 | 
					#include <linux/atomic.h>
 | 
				
			||||||
 | 
					#include <linux/bpf_verifier.h>
 | 
				
			||||||
#include <linux/module.h>
 | 
					#include <linux/module.h>
 | 
				
			||||||
#include <linux/types.h>
 | 
					#include <linux/types.h>
 | 
				
			||||||
#include <linux/mm.h>
 | 
					#include <linux/mm.h>
 | 
				
			||||||
| 
						 | 
					@ -8604,6 +8605,36 @@ static bool tc_cls_act_is_valid_access(int off, int size,
 | 
				
			||||||
	return bpf_skb_is_valid_access(off, size, type, prog, info);
 | 
						return bpf_skb_is_valid_access(off, size, type, prog, info);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					DEFINE_MUTEX(nf_conn_btf_access_lock);
 | 
				
			||||||
 | 
					EXPORT_SYMBOL_GPL(nf_conn_btf_access_lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int (*nfct_bsa)(struct bpf_verifier_log *log, const struct btf *btf,
 | 
				
			||||||
 | 
							const struct btf_type *t, int off, int size,
 | 
				
			||||||
 | 
							enum bpf_access_type atype, u32 *next_btf_id,
 | 
				
			||||||
 | 
							enum bpf_type_flag *flag);
 | 
				
			||||||
 | 
					EXPORT_SYMBOL_GPL(nfct_bsa);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int tc_cls_act_btf_struct_access(struct bpf_verifier_log *log,
 | 
				
			||||||
 | 
										const struct btf *btf,
 | 
				
			||||||
 | 
										const struct btf_type *t, int off,
 | 
				
			||||||
 | 
										int size, enum bpf_access_type atype,
 | 
				
			||||||
 | 
										u32 *next_btf_id,
 | 
				
			||||||
 | 
										enum bpf_type_flag *flag)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int ret = -EACCES;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (atype == BPF_READ)
 | 
				
			||||||
 | 
							return btf_struct_access(log, btf, t, off, size, atype, next_btf_id,
 | 
				
			||||||
 | 
										 flag);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						mutex_lock(&nf_conn_btf_access_lock);
 | 
				
			||||||
 | 
						if (nfct_bsa)
 | 
				
			||||||
 | 
							ret = nfct_bsa(log, btf, t, off, size, atype, next_btf_id, flag);
 | 
				
			||||||
 | 
						mutex_unlock(&nf_conn_btf_access_lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return ret;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static bool __is_valid_xdp_access(int off, int size)
 | 
					static bool __is_valid_xdp_access(int off, int size)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	if (off < 0 || off >= sizeof(struct xdp_md))
 | 
						if (off < 0 || off >= sizeof(struct xdp_md))
 | 
				
			||||||
| 
						 | 
					@ -8663,6 +8694,27 @@ void bpf_warn_invalid_xdp_action(struct net_device *dev, struct bpf_prog *prog,
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
 | 
					EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int xdp_btf_struct_access(struct bpf_verifier_log *log,
 | 
				
			||||||
 | 
									 const struct btf *btf,
 | 
				
			||||||
 | 
									 const struct btf_type *t, int off,
 | 
				
			||||||
 | 
									 int size, enum bpf_access_type atype,
 | 
				
			||||||
 | 
									 u32 *next_btf_id,
 | 
				
			||||||
 | 
									 enum bpf_type_flag *flag)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int ret = -EACCES;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (atype == BPF_READ)
 | 
				
			||||||
 | 
							return btf_struct_access(log, btf, t, off, size, atype, next_btf_id,
 | 
				
			||||||
 | 
										 flag);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						mutex_lock(&nf_conn_btf_access_lock);
 | 
				
			||||||
 | 
						if (nfct_bsa)
 | 
				
			||||||
 | 
							ret = nfct_bsa(log, btf, t, off, size, atype, next_btf_id, flag);
 | 
				
			||||||
 | 
						mutex_unlock(&nf_conn_btf_access_lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return ret;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static bool sock_addr_is_valid_access(int off, int size,
 | 
					static bool sock_addr_is_valid_access(int off, int size,
 | 
				
			||||||
				      enum bpf_access_type type,
 | 
									      enum bpf_access_type type,
 | 
				
			||||||
				      const struct bpf_prog *prog,
 | 
									      const struct bpf_prog *prog,
 | 
				
			||||||
| 
						 | 
					@ -10557,6 +10609,7 @@ const struct bpf_verifier_ops tc_cls_act_verifier_ops = {
 | 
				
			||||||
	.convert_ctx_access	= tc_cls_act_convert_ctx_access,
 | 
						.convert_ctx_access	= tc_cls_act_convert_ctx_access,
 | 
				
			||||||
	.gen_prologue		= tc_cls_act_prologue,
 | 
						.gen_prologue		= tc_cls_act_prologue,
 | 
				
			||||||
	.gen_ld_abs		= bpf_gen_ld_abs,
 | 
						.gen_ld_abs		= bpf_gen_ld_abs,
 | 
				
			||||||
 | 
						.btf_struct_access	= tc_cls_act_btf_struct_access,
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
const struct bpf_prog_ops tc_cls_act_prog_ops = {
 | 
					const struct bpf_prog_ops tc_cls_act_prog_ops = {
 | 
				
			||||||
| 
						 | 
					@ -10568,6 +10621,7 @@ const struct bpf_verifier_ops xdp_verifier_ops = {
 | 
				
			||||||
	.is_valid_access	= xdp_is_valid_access,
 | 
						.is_valid_access	= xdp_is_valid_access,
 | 
				
			||||||
	.convert_ctx_access	= xdp_convert_ctx_access,
 | 
						.convert_ctx_access	= xdp_convert_ctx_access,
 | 
				
			||||||
	.gen_prologue		= bpf_noop_prologue,
 | 
						.gen_prologue		= bpf_noop_prologue,
 | 
				
			||||||
 | 
						.btf_struct_access	= xdp_btf_struct_access,
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
const struct bpf_prog_ops xdp_prog_ops = {
 | 
					const struct bpf_prog_ops xdp_prog_ops = {
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -6,8 +6,10 @@
 | 
				
			||||||
 * are exposed through to BPF programs is explicitly unstable.
 | 
					 * are exposed through to BPF programs is explicitly unstable.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <linux/bpf_verifier.h>
 | 
				
			||||||
#include <linux/bpf.h>
 | 
					#include <linux/bpf.h>
 | 
				
			||||||
#include <linux/btf.h>
 | 
					#include <linux/btf.h>
 | 
				
			||||||
 | 
					#include <linux/mutex.h>
 | 
				
			||||||
#include <linux/types.h>
 | 
					#include <linux/types.h>
 | 
				
			||||||
#include <linux/btf_ids.h>
 | 
					#include <linux/btf_ids.h>
 | 
				
			||||||
#include <linux/net_namespace.h>
 | 
					#include <linux/net_namespace.h>
 | 
				
			||||||
| 
						 | 
					@ -184,6 +186,54 @@ static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
 | 
				
			||||||
	return ct;
 | 
						return ct;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					BTF_ID_LIST(btf_nf_conn_ids)
 | 
				
			||||||
 | 
					BTF_ID(struct, nf_conn)
 | 
				
			||||||
 | 
					BTF_ID(struct, nf_conn___init)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* Check writes into `struct nf_conn` */
 | 
				
			||||||
 | 
					static int _nf_conntrack_btf_struct_access(struct bpf_verifier_log *log,
 | 
				
			||||||
 | 
										   const struct btf *btf,
 | 
				
			||||||
 | 
										   const struct btf_type *t, int off,
 | 
				
			||||||
 | 
										   int size, enum bpf_access_type atype,
 | 
				
			||||||
 | 
										   u32 *next_btf_id,
 | 
				
			||||||
 | 
										   enum bpf_type_flag *flag)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						const struct btf_type *ncit;
 | 
				
			||||||
 | 
						const struct btf_type *nct;
 | 
				
			||||||
 | 
						size_t end;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						ncit = btf_type_by_id(btf, btf_nf_conn_ids[1]);
 | 
				
			||||||
 | 
						nct = btf_type_by_id(btf, btf_nf_conn_ids[0]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (t != nct && t != ncit) {
 | 
				
			||||||
 | 
							bpf_log(log, "only read is supported\n");
 | 
				
			||||||
 | 
							return -EACCES;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* `struct nf_conn` and `struct nf_conn___init` have the same layout
 | 
				
			||||||
 | 
						 * so we are safe to simply merge offset checks here
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						switch (off) {
 | 
				
			||||||
 | 
					#if defined(CONFIG_NF_CONNTRACK_MARK)
 | 
				
			||||||
 | 
						case offsetof(struct nf_conn, mark):
 | 
				
			||||||
 | 
							end = offsetofend(struct nf_conn, mark);
 | 
				
			||||||
 | 
							break;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
						default:
 | 
				
			||||||
 | 
							bpf_log(log, "no write support to nf_conn at off %d\n", off);
 | 
				
			||||||
 | 
							return -EACCES;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (off + size > end) {
 | 
				
			||||||
 | 
							bpf_log(log,
 | 
				
			||||||
 | 
								"write access at off %d with size %d beyond the member of nf_conn ended at %zu\n",
 | 
				
			||||||
 | 
								off, size, end);
 | 
				
			||||||
 | 
							return -EACCES;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
__diag_push();
 | 
					__diag_push();
 | 
				
			||||||
__diag_ignore_all("-Wmissing-prototypes",
 | 
					__diag_ignore_all("-Wmissing-prototypes",
 | 
				
			||||||
		  "Global functions as their definitions will be in nf_conntrack BTF");
 | 
							  "Global functions as their definitions will be in nf_conntrack BTF");
 | 
				
			||||||
| 
						 | 
					@ -449,5 +499,19 @@ int register_nf_conntrack_bpf(void)
 | 
				
			||||||
	int ret;
 | 
						int ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &nf_conntrack_kfunc_set);
 | 
						ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &nf_conntrack_kfunc_set);
 | 
				
			||||||
	return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_kfunc_set);
 | 
						ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_kfunc_set);
 | 
				
			||||||
 | 
						if (!ret) {
 | 
				
			||||||
 | 
							mutex_lock(&nf_conn_btf_access_lock);
 | 
				
			||||||
 | 
							nfct_bsa = _nf_conntrack_btf_struct_access;
 | 
				
			||||||
 | 
							mutex_unlock(&nf_conn_btf_access_lock);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return ret;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void cleanup_nf_conntrack_bpf(void)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						mutex_lock(&nf_conn_btf_access_lock);
 | 
				
			||||||
 | 
						nfct_bsa = NULL;
 | 
				
			||||||
 | 
						mutex_unlock(&nf_conn_btf_access_lock);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2512,6 +2512,7 @@ static int kill_all(struct nf_conn *i, void *data)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void nf_conntrack_cleanup_start(void)
 | 
					void nf_conntrack_cleanup_start(void)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
						cleanup_nf_conntrack_bpf();
 | 
				
			||||||
	conntrack_gc_work.exiting = true;
 | 
						conntrack_gc_work.exiting = true;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue