mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	Currently, there is a problem with tracex2, as it doesn't print the
histogram properly and the results are misleading. (all results report
as 0)
The problem is caused by a change in arguments of the function to which
the kprobe connects. This tracex2 bpf program uses kprobe (attached
to __x64_sys_write) to figure out the size of the write system call. In
order to achieve this, the third argument 'count' must be intact.
The following is a prototype of the sys_write variant. (checked with
pfunct)
    ~/git/linux$ pfunct -P fs/read_write.o | grep sys_write
    ssize_t ksys_write(unsigned int fd, const char  * buf, size_t count);
    long int __x64_sys_write(const struct pt_regs  * regs);
    ... cross compile with s390x ...
    long int __s390_sys_write(struct pt_regs * regs);
Since the nature of SYSCALL_WRAPPER function wraps the argument once,
additional process of argument extraction is required to properly parse
the argument.
    #define BPF_KSYSCALL(name, args...)
    ... snip ...
    struct pt_regs *regs = LINUX_HAS_SYSCALL_WRAPPER                    \
			   ? (struct pt_regs *)PT_REGS_PARM1(ctx)       \
			   : ctx;                                       \
In order to fix this problem, the BPF_SYSCALL macro has been used. This
reduces the hassle of parsing arguments from pt_regs. Since the macro
uses the CORE version of argument extraction, additional portability
comes too.
Signed-off-by: Daniel T. Lee <danieltimlee@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20221224071527.2292-5-danieltimlee@gmail.com
		
	
			
		
			
				
	
	
		
			99 lines
		
	
	
	
		
			2.3 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			99 lines
		
	
	
	
		
			2.3 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com
 | 
						|
 *
 | 
						|
 * This program is free software; you can redistribute it and/or
 | 
						|
 * modify it under the terms of version 2 of the GNU General Public
 | 
						|
 * License as published by the Free Software Foundation.
 | 
						|
 */
 | 
						|
#include "vmlinux.h"
 | 
						|
#include <linux/version.h>
 | 
						|
#include <bpf/bpf_helpers.h>
 | 
						|
#include <bpf/bpf_tracing.h>
 | 
						|
#include <bpf/bpf_core_read.h>
 | 
						|
 | 
						|
struct {
 | 
						|
	__uint(type, BPF_MAP_TYPE_HASH);
 | 
						|
	__type(key, long);
 | 
						|
	__type(value, long);
 | 
						|
	__uint(max_entries, 1024);
 | 
						|
} my_map SEC(".maps");
 | 
						|
 | 
						|
/* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe
 | 
						|
 * example will no longer be meaningful
 | 
						|
 */
 | 
						|
SEC("kprobe/kfree_skb_reason")
 | 
						|
int bpf_prog2(struct pt_regs *ctx)
 | 
						|
{
 | 
						|
	long loc = 0;
 | 
						|
	long init_val = 1;
 | 
						|
	long *value;
 | 
						|
 | 
						|
	/* read ip of kfree_skb_reason caller.
 | 
						|
	 * non-portable version of __builtin_return_address(0)
 | 
						|
	 */
 | 
						|
	BPF_KPROBE_READ_RET_IP(loc, ctx);
 | 
						|
 | 
						|
	value = bpf_map_lookup_elem(&my_map, &loc);
 | 
						|
	if (value)
 | 
						|
		*value += 1;
 | 
						|
	else
 | 
						|
		bpf_map_update_elem(&my_map, &loc, &init_val, BPF_ANY);
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
 | 
						|
static unsigned int log2(unsigned int v)
 | 
						|
{
 | 
						|
	unsigned int r;
 | 
						|
	unsigned int shift;
 | 
						|
 | 
						|
	r = (v > 0xFFFF) << 4; v >>= r;
 | 
						|
	shift = (v > 0xFF) << 3; v >>= shift; r |= shift;
 | 
						|
	shift = (v > 0xF) << 2; v >>= shift; r |= shift;
 | 
						|
	shift = (v > 0x3) << 1; v >>= shift; r |= shift;
 | 
						|
	r |= (v >> 1);
 | 
						|
	return r;
 | 
						|
}
 | 
						|
 | 
						|
static unsigned int log2l(unsigned long v)
 | 
						|
{
 | 
						|
	unsigned int hi = v >> 32;
 | 
						|
	if (hi)
 | 
						|
		return log2(hi) + 32;
 | 
						|
	else
 | 
						|
		return log2(v);
 | 
						|
}
 | 
						|
 | 
						|
struct hist_key {
 | 
						|
	char comm[16];
 | 
						|
	u64 pid_tgid;
 | 
						|
	u64 uid_gid;
 | 
						|
	u64 index;
 | 
						|
};
 | 
						|
 | 
						|
struct {
 | 
						|
	__uint(type, BPF_MAP_TYPE_PERCPU_HASH);
 | 
						|
	__uint(key_size, sizeof(struct hist_key));
 | 
						|
	__uint(value_size, sizeof(long));
 | 
						|
	__uint(max_entries, 1024);
 | 
						|
} my_hist_map SEC(".maps");
 | 
						|
 | 
						|
SEC("ksyscall/write")
 | 
						|
int BPF_KSYSCALL(bpf_prog3, unsigned int fd, const char *buf, size_t count)
 | 
						|
{
 | 
						|
	long init_val = 1;
 | 
						|
	long *value;
 | 
						|
	struct hist_key key;
 | 
						|
 | 
						|
	key.index = log2l(count);
 | 
						|
	key.pid_tgid = bpf_get_current_pid_tgid();
 | 
						|
	key.uid_gid = bpf_get_current_uid_gid();
 | 
						|
	bpf_get_current_comm(&key.comm, sizeof(key.comm));
 | 
						|
 | 
						|
	value = bpf_map_lookup_elem(&my_hist_map, &key);
 | 
						|
	if (value)
 | 
						|
		__sync_fetch_and_add(value, 1);
 | 
						|
	else
 | 
						|
		bpf_map_update_elem(&my_hist_map, &key, &init_val, BPF_ANY);
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
char _license[] SEC("license") = "GPL";
 | 
						|
u32 _version SEC("version") = LINUX_VERSION_CODE;
 |