mirror of
https://github.com/torvalds/linux.git
synced 2025-10-29 15:56:19 +02:00
bpf-next-6.18
-----BEGIN PGP SIGNATURE----- iQIzBAABCAAdFiEE+soXsSLHKoYyzcli6rmadz2vbToFAmjZH40ACgkQ6rmadz2v bTrG7w//X/5CyDoKIYJCqynYRdMtfqYuCe8Jhud4p5++iBVqkDyS6Y8EFLqZVyg/ UHTqaSE4Nz8/pma0WSjhUYn6Chs1AeH+Rw/g109SovE/YGkek2KNwY3o2hDrtPMX +oD0my8qF2HLKgEyteXXyZ5Ju+AaF92JFiGko4/wNTX8O99F9nyz2pTkrctS9Vl9 VwuTxrEXpmhqrhP3WCxkfNfcbs9HP+AALpgOXZKdMI6T4KI0N1gnJ0ZWJbiXZ8oT tug0MTPkNRidYMl0wHY2LZ6ZG8Q3a7Sgc+M0xFzaHGvGlJbBg1HjsDMtT6j34CrG TIVJ/O8F6EJzAnQ5Hio0FJk8IIgMRgvng5Kd5GXidU+mE6zokTyHIHOXitYkBQNH Hk+lGA7+E2cYqUqKvB5PFoyo+jlucuIH7YwrQlyGfqz+98n65xCgZKcmdVXr0hdB 9v3WmwJFtVIoPErUvBC3KRANQYhFk4eVk1eiGV/20+eIVyUuNbX6wqSWSA9uEXLy n5fm/vlk4RjZmrPZHxcJ0dsl9LTF1VvQQHkgoC1Sz/Cc+jA6k4I+ECVHAqEbk36p 1TUF52yPOD2ViaJKkj+962JaaaXlUn6+Dq7f1GMP6VuyHjz4gsI3mOo4XarqNdWd c7TnYmlGO/cGwqd4DdbmWiF1DDsrBcBzdbC8+FgffxQHLPXGzUg= =LeQi -----END PGP SIGNATURE----- Merge tag 'bpf-next-6.18' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next Pull bpf updates from Alexei Starovoitov: - Support pulling non-linear xdp data with bpf_xdp_pull_data() kfunc (Amery Hung) Applied as a stable branch in bpf-next and net-next trees. - Support reading skb metadata via bpf_dynptr (Jakub Sitnicki) Also a stable branch in bpf-next and net-next trees. - Enforce expected_attach_type for tailcall compatibility (Daniel Borkmann) - Replace path-sensitive with path-insensitive live stack analysis in the verifier (Eduard Zingerman) This is a significant change in the verification logic. More details, motivation, long term plans are in the cover letter/merge commit. - Support signed BPF programs (KP Singh) This is another major feature that took years to materialize. Algorithm details are in the cover letter/marge commit - Add support for may_goto instruction to s390 JIT (Ilya Leoshkevich) - Add support for may_goto instruction to arm64 JIT (Puranjay Mohan) - Fix USDT SIB argument handling in libbpf (Jiawei Zhao) - Allow uprobe-bpf program to change context registers (Jiri Olsa) - Support signed loads from BPF arena (Kumar Kartikeya Dwivedi and Puranjay Mohan) - Allow access to union arguments in tracing programs (Leon Hwang) - Optimize rcu_read_lock() + migrate_disable() combination where it's used in BPF subsystem (Menglong Dong) - Introduce bpf_task_work_schedule*() kfuncs to schedule deferred execution of BPF callback in the context of a specific task using the kernel’s task_work infrastructure (Mykyta Yatsenko) - Enforce RCU protection for KF_RCU_PROTECTED kfuncs (Kumar Kartikeya Dwivedi) - Add stress test for rqspinlock in NMI (Kumar Kartikeya Dwivedi) - Improve the precision of tnum multiplier verifier operation (Nandakumar Edamana) - Use tnums to improve is_branch_taken() logic (Paul Chaignon) - Add support for atomic operations in arena in riscv JIT (Pu Lehui) - Report arena faults to BPF error stream (Puranjay Mohan) - Search for tracefs at /sys/kernel/tracing first in bpftool (Quentin Monnet) - Add bpf_strcasecmp() kfunc (Rong Tao) - Support lookup_and_delete_elem command in BPF_MAP_STACK_TRACE (Tao Chen) * tag 'bpf-next-6.18' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (197 commits) libbpf: Replace AF_ALG with open coded SHA-256 selftests/bpf: Add stress test for rqspinlock in NMI selftests/bpf: Add test case for different expected_attach_type bpf: Enforce expected_attach_type for tailcall compatibility bpftool: Remove duplicate string.h header bpf: Remove duplicate crypto/sha2.h header libbpf: Fix error when st-prefix_ops and ops from differ btf selftests/bpf: Test changing packet data from kfunc selftests/bpf: Add stacktrace map lookup_and_delete_elem test case selftests/bpf: Refactor stacktrace_map case with skeleton bpf: Add lookup_and_delete_elem for BPF_MAP_STACK_TRACE selftests/bpf: Fix flaky bpf_cookie selftest selftests/bpf: Test changing packet data from global functions with a kfunc bpf: Emit struct bpf_xdp_sock type in vmlinux BTF selftests/bpf: Task_work selftest cleanup fixes MAINTAINERS: Delete inactive maintainers from AF_XDP bpf: Mark kfuncs as __noclone selftests/bpf: Add kprobe multi write ctx attach test selftests/bpf: Add kprobe write ctx attach test selftests/bpf: Add uprobe context ip register change test ...
This commit is contained in:
commit
ae28ed4578
254 changed files with 11853 additions and 2817 deletions
6
CREDITS
6
CREDITS
|
|
@ -3912,6 +3912,12 @@ S: C/ Federico Garcia Lorca 1 10-A
|
|||
S: Sevilla 41005
|
||||
S: Spain
|
||||
|
||||
N: Björn Töpel
|
||||
E: bjorn@kernel.org
|
||||
D: AF_XDP
|
||||
S: Gothenburg
|
||||
S: Sweden
|
||||
|
||||
N: Linus Torvalds
|
||||
E: torvalds@linux-foundation.org
|
||||
D: Original kernel hacker
|
||||
|
|
|
|||
|
|
@ -335,9 +335,26 @@ consider doing refcnt != 0 check, especially when returning a KF_ACQUIRE
|
|||
pointer. Note as well that a KF_ACQUIRE kfunc that is KF_RCU should very likely
|
||||
also be KF_RET_NULL.
|
||||
|
||||
2.4.8 KF_RCU_PROTECTED flag
|
||||
---------------------------
|
||||
|
||||
The KF_RCU_PROTECTED flag is used to indicate that the kfunc must be invoked in
|
||||
an RCU critical section. This is assumed by default in non-sleepable programs,
|
||||
and must be explicitly ensured by calling ``bpf_rcu_read_lock`` for sleepable
|
||||
ones.
|
||||
|
||||
If the kfunc returns a pointer value, this flag also enforces that the returned
|
||||
pointer is RCU protected, and can only be used while the RCU critical section is
|
||||
active.
|
||||
|
||||
The flag is distinct from the ``KF_RCU`` flag, which only ensures that its
|
||||
arguments are at least RCU protected pointers. This may transitively imply that
|
||||
RCU protection is ensured, but it does not work in cases of kfuncs which require
|
||||
RCU protection but do not take RCU protected arguments.
|
||||
|
||||
.. _KF_deprecated_flag:
|
||||
|
||||
2.4.8 KF_DEPRECATED flag
|
||||
2.4.9 KF_DEPRECATED flag
|
||||
------------------------
|
||||
|
||||
The KF_DEPRECATED flag is used for kfuncs which are scheduled to be
|
||||
|
|
|
|||
|
|
@ -347,270 +347,6 @@ However, only the value of register ``r1`` is important to successfully finish
|
|||
verification. The goal of the liveness tracking algorithm is to spot this fact
|
||||
and figure out that both states are actually equivalent.
|
||||
|
||||
Data structures
|
||||
~~~~~~~~~~~~~~~
|
||||
|
||||
Liveness is tracked using the following data structures::
|
||||
|
||||
enum bpf_reg_liveness {
|
||||
REG_LIVE_NONE = 0,
|
||||
REG_LIVE_READ32 = 0x1,
|
||||
REG_LIVE_READ64 = 0x2,
|
||||
REG_LIVE_READ = REG_LIVE_READ32 | REG_LIVE_READ64,
|
||||
REG_LIVE_WRITTEN = 0x4,
|
||||
REG_LIVE_DONE = 0x8,
|
||||
};
|
||||
|
||||
struct bpf_reg_state {
|
||||
...
|
||||
struct bpf_reg_state *parent;
|
||||
...
|
||||
enum bpf_reg_liveness live;
|
||||
...
|
||||
};
|
||||
|
||||
struct bpf_stack_state {
|
||||
struct bpf_reg_state spilled_ptr;
|
||||
...
|
||||
};
|
||||
|
||||
struct bpf_func_state {
|
||||
struct bpf_reg_state regs[MAX_BPF_REG];
|
||||
...
|
||||
struct bpf_stack_state *stack;
|
||||
}
|
||||
|
||||
struct bpf_verifier_state {
|
||||
struct bpf_func_state *frame[MAX_CALL_FRAMES];
|
||||
struct bpf_verifier_state *parent;
|
||||
...
|
||||
}
|
||||
|
||||
* ``REG_LIVE_NONE`` is an initial value assigned to ``->live`` fields upon new
|
||||
verifier state creation;
|
||||
|
||||
* ``REG_LIVE_WRITTEN`` means that the value of the register (or stack slot) is
|
||||
defined by some instruction verified between this verifier state's parent and
|
||||
verifier state itself;
|
||||
|
||||
* ``REG_LIVE_READ{32,64}`` means that the value of the register (or stack slot)
|
||||
is read by a some child state of this verifier state;
|
||||
|
||||
* ``REG_LIVE_DONE`` is a marker used by ``clean_verifier_state()`` to avoid
|
||||
processing same verifier state multiple times and for some sanity checks;
|
||||
|
||||
* ``->live`` field values are formed by combining ``enum bpf_reg_liveness``
|
||||
values using bitwise or.
|
||||
|
||||
Register parentage chains
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
In order to propagate information between parent and child states, a *register
|
||||
parentage chain* is established. Each register or stack slot is linked to a
|
||||
corresponding register or stack slot in its parent state via a ``->parent``
|
||||
pointer. This link is established upon state creation in ``is_state_visited()``
|
||||
and might be modified by ``set_callee_state()`` called from
|
||||
``__check_func_call()``.
|
||||
|
||||
The rules for correspondence between registers / stack slots are as follows:
|
||||
|
||||
* For the current stack frame, registers and stack slots of the new state are
|
||||
linked to the registers and stack slots of the parent state with the same
|
||||
indices.
|
||||
|
||||
* For the outer stack frames, only callee saved registers (r6-r9) and stack
|
||||
slots are linked to the registers and stack slots of the parent state with the
|
||||
same indices.
|
||||
|
||||
* When function call is processed a new ``struct bpf_func_state`` instance is
|
||||
allocated, it encapsulates a new set of registers and stack slots. For this
|
||||
new frame, parent links for r6-r9 and stack slots are set to nil, parent links
|
||||
for r1-r5 are set to match caller r1-r5 parent links.
|
||||
|
||||
This could be illustrated by the following diagram (arrows stand for
|
||||
``->parent`` pointers)::
|
||||
|
||||
... ; Frame #0, some instructions
|
||||
--- checkpoint #0 ---
|
||||
1 : r6 = 42 ; Frame #0
|
||||
--- checkpoint #1 ---
|
||||
2 : call foo() ; Frame #0
|
||||
... ; Frame #1, instructions from foo()
|
||||
--- checkpoint #2 ---
|
||||
... ; Frame #1, instructions from foo()
|
||||
--- checkpoint #3 ---
|
||||
exit ; Frame #1, return from foo()
|
||||
3 : r1 = r6 ; Frame #0 <- current state
|
||||
|
||||
+-------------------------------+-------------------------------+
|
||||
| Frame #0 | Frame #1 |
|
||||
Checkpoint +-------------------------------+-------------------------------+
|
||||
#0 | r0 | r1-r5 | r6-r9 | fp-8 ... |
|
||||
+-------------------------------+
|
||||
^ ^ ^ ^
|
||||
| | | |
|
||||
Checkpoint +-------------------------------+
|
||||
#1 | r0 | r1-r5 | r6-r9 | fp-8 ... |
|
||||
+-------------------------------+
|
||||
^ ^ ^
|
||||
|_______|_______|_______________
|
||||
| | |
|
||||
nil nil | | | nil nil
|
||||
| | | | | | |
|
||||
Checkpoint +-------------------------------+-------------------------------+
|
||||
#2 | r0 | r1-r5 | r6-r9 | fp-8 ... | r0 | r1-r5 | r6-r9 | fp-8 ... |
|
||||
+-------------------------------+-------------------------------+
|
||||
^ ^ ^ ^ ^
|
||||
nil nil | | | | |
|
||||
| | | | | | |
|
||||
Checkpoint +-------------------------------+-------------------------------+
|
||||
#3 | r0 | r1-r5 | r6-r9 | fp-8 ... | r0 | r1-r5 | r6-r9 | fp-8 ... |
|
||||
+-------------------------------+-------------------------------+
|
||||
^ ^
|
||||
nil nil | |
|
||||
| | | |
|
||||
Current +-------------------------------+
|
||||
state | r0 | r1-r5 | r6-r9 | fp-8 ... |
|
||||
+-------------------------------+
|
||||
\
|
||||
r6 read mark is propagated via these links
|
||||
all the way up to checkpoint #1.
|
||||
The checkpoint #1 contains a write mark for r6
|
||||
because of instruction (1), thus read propagation
|
||||
does not reach checkpoint #0 (see section below).
|
||||
|
||||
Liveness marks tracking
|
||||
~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
For each processed instruction, the verifier tracks read and written registers
|
||||
and stack slots. The main idea of the algorithm is that read marks propagate
|
||||
back along the state parentage chain until they hit a write mark, which 'screens
|
||||
off' earlier states from the read. The information about reads is propagated by
|
||||
function ``mark_reg_read()`` which could be summarized as follows::
|
||||
|
||||
mark_reg_read(struct bpf_reg_state *state, ...):
|
||||
parent = state->parent
|
||||
while parent:
|
||||
if state->live & REG_LIVE_WRITTEN:
|
||||
break
|
||||
if parent->live & REG_LIVE_READ64:
|
||||
break
|
||||
parent->live |= REG_LIVE_READ64
|
||||
state = parent
|
||||
parent = state->parent
|
||||
|
||||
Notes:
|
||||
|
||||
* The read marks are applied to the **parent** state while write marks are
|
||||
applied to the **current** state. The write mark on a register or stack slot
|
||||
means that it is updated by some instruction in the straight-line code leading
|
||||
from the parent state to the current state.
|
||||
|
||||
* Details about REG_LIVE_READ32 are omitted.
|
||||
|
||||
* Function ``propagate_liveness()`` (see section :ref:`read_marks_for_cache_hits`)
|
||||
might override the first parent link. Please refer to the comments in the
|
||||
``propagate_liveness()`` and ``mark_reg_read()`` source code for further
|
||||
details.
|
||||
|
||||
Because stack writes could have different sizes ``REG_LIVE_WRITTEN`` marks are
|
||||
applied conservatively: stack slots are marked as written only if write size
|
||||
corresponds to the size of the register, e.g. see function ``save_register_state()``.
|
||||
|
||||
Consider the following example::
|
||||
|
||||
0: (*u64)(r10 - 8) = 0 ; define 8 bytes of fp-8
|
||||
--- checkpoint #0 ---
|
||||
1: (*u32)(r10 - 8) = 1 ; redefine lower 4 bytes
|
||||
2: r1 = (*u32)(r10 - 8) ; read lower 4 bytes defined at (1)
|
||||
3: r2 = (*u32)(r10 - 4) ; read upper 4 bytes defined at (0)
|
||||
|
||||
As stated above, the write at (1) does not count as ``REG_LIVE_WRITTEN``. Should
|
||||
it be otherwise, the algorithm above wouldn't be able to propagate the read mark
|
||||
from (3) to checkpoint #0.
|
||||
|
||||
Once the ``BPF_EXIT`` instruction is reached ``update_branch_counts()`` is
|
||||
called to update the ``->branches`` counter for each verifier state in a chain
|
||||
of parent verifier states. When the ``->branches`` counter reaches zero the
|
||||
verifier state becomes a valid entry in a set of cached verifier states.
|
||||
|
||||
Each entry of the verifier states cache is post-processed by a function
|
||||
``clean_live_states()``. This function marks all registers and stack slots
|
||||
without ``REG_LIVE_READ{32,64}`` marks as ``NOT_INIT`` or ``STACK_INVALID``.
|
||||
Registers/stack slots marked in this way are ignored in function ``stacksafe()``
|
||||
called from ``states_equal()`` when a state cache entry is considered for
|
||||
equivalence with a current state.
|
||||
|
||||
Now it is possible to explain how the example from the beginning of the section
|
||||
works::
|
||||
|
||||
0: call bpf_get_prandom_u32()
|
||||
1: r1 = 0
|
||||
2: if r0 == 0 goto +1
|
||||
3: r0 = 1
|
||||
--- checkpoint[0] ---
|
||||
4: r0 = r1
|
||||
5: exit
|
||||
|
||||
* At instruction #2 branching point is reached and state ``{ r0 == 0, r1 == 0, pc == 4 }``
|
||||
is pushed to states processing queue (pc stands for program counter).
|
||||
|
||||
* At instruction #4:
|
||||
|
||||
* ``checkpoint[0]`` states cache entry is created: ``{ r0 == 1, r1 == 0, pc == 4 }``;
|
||||
* ``checkpoint[0].r0`` is marked as written;
|
||||
* ``checkpoint[0].r1`` is marked as read;
|
||||
|
||||
* At instruction #5 exit is reached and ``checkpoint[0]`` can now be processed
|
||||
by ``clean_live_states()``. After this processing ``checkpoint[0].r1`` has a
|
||||
read mark and all other registers and stack slots are marked as ``NOT_INIT``
|
||||
or ``STACK_INVALID``
|
||||
|
||||
* The state ``{ r0 == 0, r1 == 0, pc == 4 }`` is popped from the states queue
|
||||
and is compared against a cached state ``{ r1 == 0, pc == 4 }``, the states
|
||||
are considered equivalent.
|
||||
|
||||
.. _read_marks_for_cache_hits:
|
||||
|
||||
Read marks propagation for cache hits
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Another point is the handling of read marks when a previously verified state is
|
||||
found in the states cache. Upon cache hit verifier must behave in the same way
|
||||
as if the current state was verified to the program exit. This means that all
|
||||
read marks, present on registers and stack slots of the cached state, must be
|
||||
propagated over the parentage chain of the current state. Example below shows
|
||||
why this is important. Function ``propagate_liveness()`` handles this case.
|
||||
|
||||
Consider the following state parentage chain (S is a starting state, A-E are
|
||||
derived states, -> arrows show which state is derived from which)::
|
||||
|
||||
r1 read
|
||||
<------------- A[r1] == 0
|
||||
C[r1] == 0
|
||||
S ---> A ---> B ---> exit E[r1] == 1
|
||||
|
|
||||
` ---> C ---> D
|
||||
|
|
||||
` ---> E ^
|
||||
|___ suppose all these
|
||||
^ states are at insn #Y
|
||||
|
|
||||
suppose all these
|
||||
states are at insn #X
|
||||
|
||||
* Chain of states ``S -> A -> B -> exit`` is verified first.
|
||||
|
||||
* While ``B -> exit`` is verified, register ``r1`` is read and this read mark is
|
||||
propagated up to state ``A``.
|
||||
|
||||
* When chain of states ``C -> D`` is verified the state ``D`` turns out to be
|
||||
equivalent to state ``B``.
|
||||
|
||||
* The read mark for ``r1`` has to be propagated to state ``C``, otherwise state
|
||||
``C`` might get mistakenly marked as equivalent to state ``E`` even though
|
||||
values for register ``r1`` differ between ``C`` and ``E``.
|
||||
|
||||
Understanding eBPF verifier messages
|
||||
====================================
|
||||
|
||||
|
|
|
|||
|
|
@ -27466,10 +27466,8 @@ F: tools/testing/selftests/bpf/*xdp*
|
|||
K: (?:\b|_)xdp(?:\b|_)
|
||||
|
||||
XDP SOCKETS (AF_XDP)
|
||||
M: Björn Töpel <bjorn@kernel.org>
|
||||
M: Magnus Karlsson <magnus.karlsson@intel.com>
|
||||
M: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
|
||||
R: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||
R: Stanislav Fomichev <sdf@fomichev.me>
|
||||
L: netdev@vger.kernel.org
|
||||
L: bpf@vger.kernel.org
|
||||
|
|
|
|||
|
|
@ -2,4 +2,4 @@
|
|||
#
|
||||
# ARM64 networking code
|
||||
#
|
||||
obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o
|
||||
obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o bpf_timed_may_goto.o
|
||||
|
|
|
|||
|
|
@ -1066,19 +1066,53 @@ static void build_epilogue(struct jit_ctx *ctx, bool was_classic)
|
|||
emit(A64_RET(A64_LR), ctx);
|
||||
}
|
||||
|
||||
#define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0)
|
||||
/*
|
||||
* Metadata encoding for exception handling in JITed code.
|
||||
*
|
||||
* Format of `fixup` field in `struct exception_table_entry`:
|
||||
*
|
||||
* Bit layout of `fixup` (32-bit):
|
||||
*
|
||||
* +-----------+--------+-----------+-----------+----------+
|
||||
* | 31-27 | 26-22 | 21 | 20-16 | 15-0 |
|
||||
* | | | | | |
|
||||
* | FIXUP_REG | Unused | ARENA_ACC | ARENA_REG | OFFSET |
|
||||
* +-----------+--------+-----------+-----------+----------+
|
||||
*
|
||||
* - OFFSET (16 bits): Offset used to compute address for Load/Store instruction.
|
||||
* - ARENA_REG (5 bits): Register that is used to calculate the address for load/store when
|
||||
* accessing the arena region.
|
||||
* - ARENA_ACCESS (1 bit): This bit is set when the faulting instruction accessed the arena region.
|
||||
* - FIXUP_REG (5 bits): Destination register for the load instruction (cleared on fault) or set to
|
||||
* DONT_CLEAR if it is a store instruction.
|
||||
*/
|
||||
|
||||
#define BPF_FIXUP_OFFSET_MASK GENMASK(15, 0)
|
||||
#define BPF_FIXUP_ARENA_REG_MASK GENMASK(20, 16)
|
||||
#define BPF_ARENA_ACCESS BIT(21)
|
||||
#define BPF_FIXUP_REG_MASK GENMASK(31, 27)
|
||||
#define DONT_CLEAR 5 /* Unused ARM64 register from BPF's POV */
|
||||
|
||||
bool ex_handler_bpf(const struct exception_table_entry *ex,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup);
|
||||
int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup);
|
||||
s16 off = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup);
|
||||
int arena_reg = FIELD_GET(BPF_FIXUP_ARENA_REG_MASK, ex->fixup);
|
||||
bool is_arena = !!(ex->fixup & BPF_ARENA_ACCESS);
|
||||
bool is_write = (dst_reg == DONT_CLEAR);
|
||||
unsigned long addr;
|
||||
|
||||
if (is_arena) {
|
||||
addr = regs->regs[arena_reg] + off;
|
||||
bpf_prog_report_arena_violation(is_write, addr, regs->pc);
|
||||
}
|
||||
|
||||
if (dst_reg != DONT_CLEAR)
|
||||
regs->regs[dst_reg] = 0;
|
||||
regs->pc = (unsigned long)&ex->fixup - offset;
|
||||
/* Skip the faulting instruction */
|
||||
regs->pc += AARCH64_INSN_SIZE;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -1088,7 +1122,9 @@ static int add_exception_handler(const struct bpf_insn *insn,
|
|||
int dst_reg)
|
||||
{
|
||||
off_t ins_offset;
|
||||
off_t fixup_offset;
|
||||
s16 off = insn->off;
|
||||
bool is_arena;
|
||||
int arena_reg;
|
||||
unsigned long pc;
|
||||
struct exception_table_entry *ex;
|
||||
|
||||
|
|
@ -1097,11 +1133,16 @@ static int add_exception_handler(const struct bpf_insn *insn,
|
|||
return 0;
|
||||
|
||||
if (BPF_MODE(insn->code) != BPF_PROBE_MEM &&
|
||||
BPF_MODE(insn->code) != BPF_PROBE_MEMSX &&
|
||||
BPF_MODE(insn->code) != BPF_PROBE_MEM32 &&
|
||||
BPF_MODE(insn->code) != BPF_PROBE_ATOMIC)
|
||||
BPF_MODE(insn->code) != BPF_PROBE_MEMSX &&
|
||||
BPF_MODE(insn->code) != BPF_PROBE_MEM32 &&
|
||||
BPF_MODE(insn->code) != BPF_PROBE_MEM32SX &&
|
||||
BPF_MODE(insn->code) != BPF_PROBE_ATOMIC)
|
||||
return 0;
|
||||
|
||||
is_arena = (BPF_MODE(insn->code) == BPF_PROBE_MEM32) ||
|
||||
(BPF_MODE(insn->code) == BPF_PROBE_MEM32SX) ||
|
||||
(BPF_MODE(insn->code) == BPF_PROBE_ATOMIC);
|
||||
|
||||
if (!ctx->prog->aux->extable ||
|
||||
WARN_ON_ONCE(ctx->exentry_idx >= ctx->prog->aux->num_exentries))
|
||||
return -EINVAL;
|
||||
|
|
@ -1119,22 +1160,6 @@ static int add_exception_handler(const struct bpf_insn *insn,
|
|||
if (WARN_ON_ONCE(ins_offset >= 0 || ins_offset < INT_MIN))
|
||||
return -ERANGE;
|
||||
|
||||
/*
|
||||
* Since the extable follows the program, the fixup offset is always
|
||||
* negative and limited to BPF_JIT_REGION_SIZE. Store a positive value
|
||||
* to keep things simple, and put the destination register in the upper
|
||||
* bits. We don't need to worry about buildtime or runtime sort
|
||||
* modifying the upper bits because the table is already sorted, and
|
||||
* isn't part of the main exception table.
|
||||
*
|
||||
* The fixup_offset is set to the next instruction from the instruction
|
||||
* that may fault. The execution will jump to this after handling the
|
||||
* fault.
|
||||
*/
|
||||
fixup_offset = (long)&ex->fixup - (pc + AARCH64_INSN_SIZE);
|
||||
if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, fixup_offset))
|
||||
return -ERANGE;
|
||||
|
||||
/*
|
||||
* The offsets above have been calculated using the RO buffer but we
|
||||
* need to use the R/W buffer for writes.
|
||||
|
|
@ -1147,8 +1172,26 @@ static int add_exception_handler(const struct bpf_insn *insn,
|
|||
if (BPF_CLASS(insn->code) != BPF_LDX)
|
||||
dst_reg = DONT_CLEAR;
|
||||
|
||||
ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, fixup_offset) |
|
||||
FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
|
||||
ex->fixup = FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
|
||||
|
||||
if (is_arena) {
|
||||
ex->fixup |= BPF_ARENA_ACCESS;
|
||||
/*
|
||||
* insn->src_reg/dst_reg holds the address in the arena region with upper 32-bits
|
||||
* being zero because of a preceding addr_space_cast(r<n>, 0x0, 0x1) instruction.
|
||||
* This address is adjusted with the addition of arena_vm_start (see the
|
||||
* implementation of BPF_PROBE_MEM32 and BPF_PROBE_ATOMIC) before being used for the
|
||||
* memory access. Pass the reg holding the unmodified 32-bit address to
|
||||
* ex_handler_bpf.
|
||||
*/
|
||||
if (BPF_CLASS(insn->code) == BPF_LDX)
|
||||
arena_reg = bpf2a64[insn->src_reg];
|
||||
else
|
||||
arena_reg = bpf2a64[insn->dst_reg];
|
||||
|
||||
ex->fixup |= FIELD_PREP(BPF_FIXUP_OFFSET_MASK, off) |
|
||||
FIELD_PREP(BPF_FIXUP_ARENA_REG_MASK, arena_reg);
|
||||
}
|
||||
|
||||
ex->type = EX_TYPE_BPF;
|
||||
|
||||
|
|
@ -1558,7 +1601,13 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
|
|||
if (ret < 0)
|
||||
return ret;
|
||||
emit_call(func_addr, ctx);
|
||||
emit(A64_MOV(1, r0, A64_R(0)), ctx);
|
||||
/*
|
||||
* Call to arch_bpf_timed_may_goto() is emitted by the
|
||||
* verifier and called with custom calling convention with
|
||||
* first argument and return value in BPF_REG_AX (x9).
|
||||
*/
|
||||
if (func_addr != (u64)arch_bpf_timed_may_goto)
|
||||
emit(A64_MOV(1, r0, A64_R(0)), ctx);
|
||||
break;
|
||||
}
|
||||
/* tail call */
|
||||
|
|
@ -1612,7 +1661,11 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
|
|||
case BPF_LDX | BPF_PROBE_MEM32 | BPF_H:
|
||||
case BPF_LDX | BPF_PROBE_MEM32 | BPF_W:
|
||||
case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW:
|
||||
if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) {
|
||||
case BPF_LDX | BPF_PROBE_MEM32SX | BPF_B:
|
||||
case BPF_LDX | BPF_PROBE_MEM32SX | BPF_H:
|
||||
case BPF_LDX | BPF_PROBE_MEM32SX | BPF_W:
|
||||
if (BPF_MODE(insn->code) == BPF_PROBE_MEM32 ||
|
||||
BPF_MODE(insn->code) == BPF_PROBE_MEM32SX) {
|
||||
emit(A64_ADD(1, tmp2, src, arena_vm_base), ctx);
|
||||
src = tmp2;
|
||||
}
|
||||
|
|
@ -1624,7 +1677,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
|
|||
off_adj = off;
|
||||
}
|
||||
sign_extend = (BPF_MODE(insn->code) == BPF_MEMSX ||
|
||||
BPF_MODE(insn->code) == BPF_PROBE_MEMSX);
|
||||
BPF_MODE(insn->code) == BPF_PROBE_MEMSX ||
|
||||
BPF_MODE(insn->code) == BPF_PROBE_MEM32SX);
|
||||
switch (BPF_SIZE(code)) {
|
||||
case BPF_W:
|
||||
if (is_lsi_offset(off_adj, 2)) {
|
||||
|
|
@ -1832,9 +1886,11 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
|
|||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = add_exception_handler(insn, ctx, dst);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (BPF_MODE(insn->code) == BPF_PROBE_ATOMIC) {
|
||||
ret = add_exception_handler(insn, ctx, dst);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
|
|
@ -2767,7 +2823,6 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
|
|||
goto out;
|
||||
}
|
||||
|
||||
bpf_flush_icache(ro_image, ro_image + size);
|
||||
out:
|
||||
kvfree(image);
|
||||
return ret;
|
||||
|
|
@ -3038,6 +3093,11 @@ bool bpf_jit_bypass_spec_v4(void)
|
|||
return true;
|
||||
}
|
||||
|
||||
bool bpf_jit_supports_timed_may_goto(void)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
bool bpf_jit_inlines_helper_call(s32 imm)
|
||||
{
|
||||
switch (imm) {
|
||||
|
|
@ -3064,8 +3124,7 @@ void bpf_jit_free(struct bpf_prog *prog)
|
|||
* before freeing it.
|
||||
*/
|
||||
if (jit_data) {
|
||||
bpf_arch_text_copy(&jit_data->ro_header->size, &jit_data->header->size,
|
||||
sizeof(jit_data->header->size));
|
||||
bpf_jit_binary_pack_finalize(jit_data->ro_header, jit_data->header);
|
||||
kfree(jit_data);
|
||||
}
|
||||
prog->bpf_func -= cfi_get_offset();
|
||||
|
|
|
|||
40
arch/arm64/net/bpf_timed_may_goto.S
Normal file
40
arch/arm64/net/bpf_timed_may_goto.S
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/* Copyright (c) 2025 Puranjay Mohan <puranjay@kernel.org> */
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
SYM_FUNC_START(arch_bpf_timed_may_goto)
|
||||
/* Allocate stack space and emit frame record */
|
||||
stp x29, x30, [sp, #-64]!
|
||||
mov x29, sp
|
||||
|
||||
/* Save BPF registers R0 - R5 (x7, x0-x4)*/
|
||||
stp x7, x0, [sp, #16]
|
||||
stp x1, x2, [sp, #32]
|
||||
stp x3, x4, [sp, #48]
|
||||
|
||||
/*
|
||||
* Stack depth was passed in BPF_REG_AX (x9), add it to the BPF_FP
|
||||
* (x25) to get the pointer to count and timestamp and pass it as the
|
||||
* first argument in x0.
|
||||
*
|
||||
* Before generating the call to arch_bpf_timed_may_goto, the verifier
|
||||
* generates a load instruction using FP, i.e. REG_AX = *(u64 *)(FP -
|
||||
* stack_off_cnt), so BPF_REG_FP (x25) is always set up by the arm64
|
||||
* jit in this case.
|
||||
*/
|
||||
add x0, x9, x25
|
||||
bl bpf_check_timed_may_goto
|
||||
/* BPF_REG_AX(x9) will be stored into count, so move return value to it. */
|
||||
mov x9, x0
|
||||
|
||||
/* Restore BPF registers R0 - R5 (x7, x0-x4) */
|
||||
ldp x7, x0, [sp, #16]
|
||||
ldp x1, x2, [sp, #32]
|
||||
ldp x3, x4, [sp, #48]
|
||||
|
||||
/* Restore FP and LR */
|
||||
ldp x29, x30, [sp], #64
|
||||
|
||||
ret
|
||||
SYM_FUNC_END(arch_bpf_timed_may_goto)
|
||||
|
|
@ -708,7 +708,6 @@ config TOOLCHAIN_HAS_ZACAS
|
|||
|
||||
config RISCV_ISA_ZACAS
|
||||
bool "Zacas extension support for atomic CAS"
|
||||
depends on TOOLCHAIN_HAS_ZACAS
|
||||
depends on RISCV_ALTERNATIVE
|
||||
default y
|
||||
help
|
||||
|
|
|
|||
|
|
@ -134,6 +134,7 @@
|
|||
({ \
|
||||
if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \
|
||||
IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \
|
||||
IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZACAS) && \
|
||||
riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA) && \
|
||||
riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) { \
|
||||
r = o; \
|
||||
|
|
@ -181,6 +182,7 @@
|
|||
r, p, co, o, n) \
|
||||
({ \
|
||||
if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \
|
||||
IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZACAS) && \
|
||||
riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) { \
|
||||
r = o; \
|
||||
\
|
||||
|
|
@ -316,7 +318,7 @@
|
|||
arch_cmpxchg_release((ptr), (o), (n)); \
|
||||
})
|
||||
|
||||
#if defined(CONFIG_64BIT) && defined(CONFIG_RISCV_ISA_ZACAS)
|
||||
#if defined(CONFIG_64BIT) && defined(CONFIG_RISCV_ISA_ZACAS) && defined(CONFIG_TOOLCHAIN_HAS_ZACAS)
|
||||
|
||||
#define system_has_cmpxchg128() riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)
|
||||
|
||||
|
|
@ -352,7 +354,7 @@ union __u128_halves {
|
|||
#define arch_cmpxchg128_local(ptr, o, n) \
|
||||
__arch_cmpxchg128((ptr), (o), (n), "")
|
||||
|
||||
#endif /* CONFIG_64BIT && CONFIG_RISCV_ISA_ZACAS */
|
||||
#endif /* CONFIG_64BIT && CONFIG_RISCV_ISA_ZACAS && CONFIG_TOOLCHAIN_HAS_ZACAS */
|
||||
|
||||
#ifdef CONFIG_RISCV_ISA_ZAWRS
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -290,6 +290,7 @@ static void __init riscv_spinlock_init(void)
|
|||
|
||||
if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) &&
|
||||
IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) &&
|
||||
IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZACAS) &&
|
||||
riscv_isa_extension_available(NULL, ZABHA) &&
|
||||
riscv_isa_extension_available(NULL, ZACAS)) {
|
||||
using_ext = "using Zabha";
|
||||
|
|
|
|||
|
|
@ -13,21 +13,15 @@
|
|||
#include <linux/filter.h>
|
||||
#include <asm/cacheflush.h>
|
||||
|
||||
/* verify runtime detection extension status */
|
||||
#define rv_ext_enabled(ext) \
|
||||
(IS_ENABLED(CONFIG_RISCV_ISA_##ext) && riscv_has_extension_likely(RISCV_ISA_EXT_##ext))
|
||||
|
||||
static inline bool rvc_enabled(void)
|
||||
{
|
||||
return IS_ENABLED(CONFIG_RISCV_ISA_C);
|
||||
}
|
||||
|
||||
static inline bool rvzba_enabled(void)
|
||||
{
|
||||
return IS_ENABLED(CONFIG_RISCV_ISA_ZBA) && riscv_has_extension_likely(RISCV_ISA_EXT_ZBA);
|
||||
}
|
||||
|
||||
static inline bool rvzbb_enabled(void)
|
||||
{
|
||||
return IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && riscv_has_extension_likely(RISCV_ISA_EXT_ZBB);
|
||||
}
|
||||
|
||||
enum {
|
||||
RV_REG_ZERO = 0, /* The constant value 0 */
|
||||
RV_REG_RA = 1, /* Return address */
|
||||
|
|
@ -84,6 +78,8 @@ struct rv_jit_context {
|
|||
int epilogue_offset;
|
||||
int *offset; /* BPF to RV */
|
||||
int nexentries;
|
||||
int ex_insn_off;
|
||||
int ex_jmp_off;
|
||||
unsigned long flags;
|
||||
int stack_size;
|
||||
u64 arena_vm_start;
|
||||
|
|
@ -757,6 +753,17 @@ static inline u16 rvc_swsp(u32 imm8, u8 rs2)
|
|||
return rv_css_insn(0x6, imm, rs2, 0x2);
|
||||
}
|
||||
|
||||
/* RVZACAS instructions. */
|
||||
static inline u32 rvzacas_amocas_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
|
||||
{
|
||||
return rv_amo_insn(0x5, aq, rl, rs2, rs1, 2, rd, 0x2f);
|
||||
}
|
||||
|
||||
static inline u32 rvzacas_amocas_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
|
||||
{
|
||||
return rv_amo_insn(0x5, aq, rl, rs2, rs1, 3, rd, 0x2f);
|
||||
}
|
||||
|
||||
/* RVZBA instructions. */
|
||||
static inline u32 rvzba_sh2add(u8 rd, u8 rs1, u8 rs2)
|
||||
{
|
||||
|
|
@ -1123,7 +1130,7 @@ static inline void emit_sw(u8 rs1, s32 off, u8 rs2, struct rv_jit_context *ctx)
|
|||
|
||||
static inline void emit_sh2add(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx)
|
||||
{
|
||||
if (rvzba_enabled()) {
|
||||
if (rv_ext_enabled(ZBA)) {
|
||||
emit(rvzba_sh2add(rd, rs1, rs2), ctx);
|
||||
return;
|
||||
}
|
||||
|
|
@ -1134,7 +1141,7 @@ static inline void emit_sh2add(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx
|
|||
|
||||
static inline void emit_sh3add(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx)
|
||||
{
|
||||
if (rvzba_enabled()) {
|
||||
if (rv_ext_enabled(ZBA)) {
|
||||
emit(rvzba_sh3add(rd, rs1, rs2), ctx);
|
||||
return;
|
||||
}
|
||||
|
|
@ -1184,7 +1191,7 @@ static inline void emit_subw(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx)
|
|||
|
||||
static inline void emit_sextb(u8 rd, u8 rs, struct rv_jit_context *ctx)
|
||||
{
|
||||
if (rvzbb_enabled()) {
|
||||
if (rv_ext_enabled(ZBB)) {
|
||||
emit(rvzbb_sextb(rd, rs), ctx);
|
||||
return;
|
||||
}
|
||||
|
|
@ -1195,7 +1202,7 @@ static inline void emit_sextb(u8 rd, u8 rs, struct rv_jit_context *ctx)
|
|||
|
||||
static inline void emit_sexth(u8 rd, u8 rs, struct rv_jit_context *ctx)
|
||||
{
|
||||
if (rvzbb_enabled()) {
|
||||
if (rv_ext_enabled(ZBB)) {
|
||||
emit(rvzbb_sexth(rd, rs), ctx);
|
||||
return;
|
||||
}
|
||||
|
|
@ -1211,7 +1218,7 @@ static inline void emit_sextw(u8 rd, u8 rs, struct rv_jit_context *ctx)
|
|||
|
||||
static inline void emit_zexth(u8 rd, u8 rs, struct rv_jit_context *ctx)
|
||||
{
|
||||
if (rvzbb_enabled()) {
|
||||
if (rv_ext_enabled(ZBB)) {
|
||||
emit(rvzbb_zexth(rd, rs), ctx);
|
||||
return;
|
||||
}
|
||||
|
|
@ -1222,7 +1229,7 @@ static inline void emit_zexth(u8 rd, u8 rs, struct rv_jit_context *ctx)
|
|||
|
||||
static inline void emit_zextw(u8 rd, u8 rs, struct rv_jit_context *ctx)
|
||||
{
|
||||
if (rvzba_enabled()) {
|
||||
if (rv_ext_enabled(ZBA)) {
|
||||
emit(rvzba_zextw(rd, rs), ctx);
|
||||
return;
|
||||
}
|
||||
|
|
@ -1233,7 +1240,7 @@ static inline void emit_zextw(u8 rd, u8 rs, struct rv_jit_context *ctx)
|
|||
|
||||
static inline void emit_bswap(u8 rd, s32 imm, struct rv_jit_context *ctx)
|
||||
{
|
||||
if (rvzbb_enabled()) {
|
||||
if (rv_ext_enabled(ZBB)) {
|
||||
int bits = 64 - imm;
|
||||
|
||||
emit(rvzbb_rev8(rd, rd), ctx);
|
||||
|
|
@ -1289,6 +1296,35 @@ static inline void emit_bswap(u8 rd, s32 imm, struct rv_jit_context *ctx)
|
|||
emit_mv(rd, RV_REG_T2, ctx);
|
||||
}
|
||||
|
||||
static inline void emit_cmpxchg(u8 rd, u8 rs, u8 r0, bool is64, struct rv_jit_context *ctx)
|
||||
{
|
||||
int jmp_offset;
|
||||
|
||||
if (rv_ext_enabled(ZACAS)) {
|
||||
ctx->ex_insn_off = ctx->ninsns;
|
||||
emit(is64 ? rvzacas_amocas_d(r0, rs, rd, 1, 1) :
|
||||
rvzacas_amocas_w(r0, rs, rd, 1, 1), ctx);
|
||||
ctx->ex_jmp_off = ctx->ninsns;
|
||||
if (!is64)
|
||||
emit_zextw(r0, r0, ctx);
|
||||
return;
|
||||
}
|
||||
|
||||
if (is64)
|
||||
emit_mv(RV_REG_T2, r0, ctx);
|
||||
else
|
||||
emit_addiw(RV_REG_T2, r0, 0, ctx);
|
||||
emit(is64 ? rv_lr_d(r0, 0, rd, 0, 0) :
|
||||
rv_lr_w(r0, 0, rd, 0, 0), ctx);
|
||||
jmp_offset = ninsns_rvoff(8);
|
||||
emit(rv_bne(RV_REG_T2, r0, jmp_offset >> 1), ctx);
|
||||
emit(is64 ? rv_sc_d(RV_REG_T3, rs, rd, 0, 1) :
|
||||
rv_sc_w(RV_REG_T3, rs, rd, 0, 1), ctx);
|
||||
jmp_offset = ninsns_rvoff(-6);
|
||||
emit(rv_bne(RV_REG_T3, 0, jmp_offset >> 1), ctx);
|
||||
emit_fence_rw_rw(ctx);
|
||||
}
|
||||
|
||||
#endif /* __riscv_xlen == 64 */
|
||||
|
||||
void bpf_jit_build_prologue(struct rv_jit_context *ctx, bool is_subprog);
|
||||
|
|
|
|||
|
|
@ -473,138 +473,92 @@ static inline void emit_kcfi(u32 hash, struct rv_jit_context *ctx)
|
|||
emit(hash, ctx);
|
||||
}
|
||||
|
||||
static int emit_load_8(bool sign_ext, u8 rd, s32 off, u8 rs, struct rv_jit_context *ctx)
|
||||
static void emit_ldx_insn(u8 rd, s16 off, u8 rs, u8 size, bool sign_ext,
|
||||
struct rv_jit_context *ctx)
|
||||
{
|
||||
int insns_start;
|
||||
|
||||
if (is_12b_int(off)) {
|
||||
insns_start = ctx->ninsns;
|
||||
if (sign_ext)
|
||||
emit(rv_lb(rd, off, rs), ctx);
|
||||
else
|
||||
emit(rv_lbu(rd, off, rs), ctx);
|
||||
return ctx->ninsns - insns_start;
|
||||
}
|
||||
|
||||
emit_imm(RV_REG_T1, off, ctx);
|
||||
emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
|
||||
insns_start = ctx->ninsns;
|
||||
if (sign_ext)
|
||||
emit(rv_lb(rd, 0, RV_REG_T1), ctx);
|
||||
else
|
||||
emit(rv_lbu(rd, 0, RV_REG_T1), ctx);
|
||||
return ctx->ninsns - insns_start;
|
||||
}
|
||||
|
||||
static int emit_load_16(bool sign_ext, u8 rd, s32 off, u8 rs, struct rv_jit_context *ctx)
|
||||
{
|
||||
int insns_start;
|
||||
|
||||
if (is_12b_int(off)) {
|
||||
insns_start = ctx->ninsns;
|
||||
if (sign_ext)
|
||||
emit(rv_lh(rd, off, rs), ctx);
|
||||
else
|
||||
emit(rv_lhu(rd, off, rs), ctx);
|
||||
return ctx->ninsns - insns_start;
|
||||
}
|
||||
|
||||
emit_imm(RV_REG_T1, off, ctx);
|
||||
emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
|
||||
insns_start = ctx->ninsns;
|
||||
if (sign_ext)
|
||||
emit(rv_lh(rd, 0, RV_REG_T1), ctx);
|
||||
else
|
||||
emit(rv_lhu(rd, 0, RV_REG_T1), ctx);
|
||||
return ctx->ninsns - insns_start;
|
||||
}
|
||||
|
||||
static int emit_load_32(bool sign_ext, u8 rd, s32 off, u8 rs, struct rv_jit_context *ctx)
|
||||
{
|
||||
int insns_start;
|
||||
|
||||
if (is_12b_int(off)) {
|
||||
insns_start = ctx->ninsns;
|
||||
if (sign_ext)
|
||||
emit(rv_lw(rd, off, rs), ctx);
|
||||
else
|
||||
emit(rv_lwu(rd, off, rs), ctx);
|
||||
return ctx->ninsns - insns_start;
|
||||
}
|
||||
|
||||
emit_imm(RV_REG_T1, off, ctx);
|
||||
emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
|
||||
insns_start = ctx->ninsns;
|
||||
if (sign_ext)
|
||||
emit(rv_lw(rd, 0, RV_REG_T1), ctx);
|
||||
else
|
||||
emit(rv_lwu(rd, 0, RV_REG_T1), ctx);
|
||||
return ctx->ninsns - insns_start;
|
||||
}
|
||||
|
||||
static int emit_load_64(bool sign_ext, u8 rd, s32 off, u8 rs, struct rv_jit_context *ctx)
|
||||
{
|
||||
int insns_start;
|
||||
|
||||
if (is_12b_int(off)) {
|
||||
insns_start = ctx->ninsns;
|
||||
switch (size) {
|
||||
case BPF_B:
|
||||
emit(sign_ext ? rv_lb(rd, off, rs) : rv_lbu(rd, off, rs), ctx);
|
||||
break;
|
||||
case BPF_H:
|
||||
emit(sign_ext ? rv_lh(rd, off, rs) : rv_lhu(rd, off, rs), ctx);
|
||||
break;
|
||||
case BPF_W:
|
||||
emit(sign_ext ? rv_lw(rd, off, rs) : rv_lwu(rd, off, rs), ctx);
|
||||
break;
|
||||
case BPF_DW:
|
||||
emit_ld(rd, off, rs, ctx);
|
||||
return ctx->ninsns - insns_start;
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void emit_stx_insn(u8 rd, s16 off, u8 rs, u8 size, struct rv_jit_context *ctx)
|
||||
{
|
||||
switch (size) {
|
||||
case BPF_B:
|
||||
emit(rv_sb(rd, off, rs), ctx);
|
||||
break;
|
||||
case BPF_H:
|
||||
emit(rv_sh(rd, off, rs), ctx);
|
||||
break;
|
||||
case BPF_W:
|
||||
emit_sw(rd, off, rs, ctx);
|
||||
break;
|
||||
case BPF_DW:
|
||||
emit_sd(rd, off, rs, ctx);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void emit_ldx(u8 rd, s16 off, u8 rs, u8 size, bool sign_ext,
|
||||
struct rv_jit_context *ctx)
|
||||
{
|
||||
if (is_12b_int(off)) {
|
||||
ctx->ex_insn_off = ctx->ninsns;
|
||||
emit_ldx_insn(rd, off, rs, size, sign_ext, ctx);
|
||||
ctx->ex_jmp_off = ctx->ninsns;
|
||||
return;
|
||||
}
|
||||
|
||||
emit_imm(RV_REG_T1, off, ctx);
|
||||
emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
|
||||
insns_start = ctx->ninsns;
|
||||
emit_ld(rd, 0, RV_REG_T1, ctx);
|
||||
return ctx->ninsns - insns_start;
|
||||
ctx->ex_insn_off = ctx->ninsns;
|
||||
emit_ldx_insn(rd, 0, RV_REG_T1, size, sign_ext, ctx);
|
||||
ctx->ex_jmp_off = ctx->ninsns;
|
||||
}
|
||||
|
||||
static void emit_store_8(u8 rd, s32 off, u8 rs, struct rv_jit_context *ctx)
|
||||
static void emit_st(u8 rd, s16 off, s32 imm, u8 size, struct rv_jit_context *ctx)
|
||||
{
|
||||
emit_imm(RV_REG_T1, imm, ctx);
|
||||
if (is_12b_int(off)) {
|
||||
ctx->ex_insn_off = ctx->ninsns;
|
||||
emit_stx_insn(rd, off, RV_REG_T1, size, ctx);
|
||||
ctx->ex_jmp_off = ctx->ninsns;
|
||||
return;
|
||||
}
|
||||
|
||||
emit_imm(RV_REG_T2, off, ctx);
|
||||
emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
|
||||
ctx->ex_insn_off = ctx->ninsns;
|
||||
emit_stx_insn(RV_REG_T2, 0, RV_REG_T1, size, ctx);
|
||||
ctx->ex_jmp_off = ctx->ninsns;
|
||||
}
|
||||
|
||||
static void emit_stx(u8 rd, s16 off, u8 rs, u8 size, struct rv_jit_context *ctx)
|
||||
{
|
||||
if (is_12b_int(off)) {
|
||||
emit(rv_sb(rd, off, rs), ctx);
|
||||
ctx->ex_insn_off = ctx->ninsns;
|
||||
emit_stx_insn(rd, off, rs, size, ctx);
|
||||
ctx->ex_jmp_off = ctx->ninsns;
|
||||
return;
|
||||
}
|
||||
|
||||
emit_imm(RV_REG_T1, off, ctx);
|
||||
emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
|
||||
emit(rv_sb(RV_REG_T1, 0, rs), ctx);
|
||||
}
|
||||
|
||||
static void emit_store_16(u8 rd, s32 off, u8 rs, struct rv_jit_context *ctx)
|
||||
{
|
||||
if (is_12b_int(off)) {
|
||||
emit(rv_sh(rd, off, rs), ctx);
|
||||
return;
|
||||
}
|
||||
|
||||
emit_imm(RV_REG_T1, off, ctx);
|
||||
emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
|
||||
emit(rv_sh(RV_REG_T1, 0, rs), ctx);
|
||||
}
|
||||
|
||||
static void emit_store_32(u8 rd, s32 off, u8 rs, struct rv_jit_context *ctx)
|
||||
{
|
||||
if (is_12b_int(off)) {
|
||||
emit_sw(rd, off, rs, ctx);
|
||||
return;
|
||||
}
|
||||
|
||||
emit_imm(RV_REG_T1, off, ctx);
|
||||
emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
|
||||
emit_sw(RV_REG_T1, 0, rs, ctx);
|
||||
}
|
||||
|
||||
static void emit_store_64(u8 rd, s32 off, u8 rs, struct rv_jit_context *ctx)
|
||||
{
|
||||
if (is_12b_int(off)) {
|
||||
emit_sd(rd, off, rs, ctx);
|
||||
return;
|
||||
}
|
||||
|
||||
emit_imm(RV_REG_T1, off, ctx);
|
||||
emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
|
||||
emit_sd(RV_REG_T1, 0, rs, ctx);
|
||||
ctx->ex_insn_off = ctx->ninsns;
|
||||
emit_stx_insn(RV_REG_T1, 0, rs, size, ctx);
|
||||
ctx->ex_jmp_off = ctx->ninsns;
|
||||
}
|
||||
|
||||
static int emit_atomic_ld_st(u8 rd, u8 rs, const struct bpf_insn *insn,
|
||||
|
|
@ -617,20 +571,12 @@ static int emit_atomic_ld_st(u8 rd, u8 rs, const struct bpf_insn *insn,
|
|||
switch (imm) {
|
||||
/* dst_reg = load_acquire(src_reg + off16) */
|
||||
case BPF_LOAD_ACQ:
|
||||
switch (BPF_SIZE(code)) {
|
||||
case BPF_B:
|
||||
emit_load_8(false, rd, off, rs, ctx);
|
||||
break;
|
||||
case BPF_H:
|
||||
emit_load_16(false, rd, off, rs, ctx);
|
||||
break;
|
||||
case BPF_W:
|
||||
emit_load_32(false, rd, off, rs, ctx);
|
||||
break;
|
||||
case BPF_DW:
|
||||
emit_load_64(false, rd, off, rs, ctx);
|
||||
break;
|
||||
if (BPF_MODE(code) == BPF_PROBE_ATOMIC) {
|
||||
emit_add(RV_REG_T2, rs, RV_REG_ARENA, ctx);
|
||||
rs = RV_REG_T2;
|
||||
}
|
||||
|
||||
emit_ldx(rd, off, rs, BPF_SIZE(code), false, ctx);
|
||||
emit_fence_r_rw(ctx);
|
||||
|
||||
/* If our next insn is a redundant zext, return 1 to tell
|
||||
|
|
@ -641,21 +587,13 @@ static int emit_atomic_ld_st(u8 rd, u8 rs, const struct bpf_insn *insn,
|
|||
break;
|
||||
/* store_release(dst_reg + off16, src_reg) */
|
||||
case BPF_STORE_REL:
|
||||
emit_fence_rw_w(ctx);
|
||||
switch (BPF_SIZE(code)) {
|
||||
case BPF_B:
|
||||
emit_store_8(rd, off, rs, ctx);
|
||||
break;
|
||||
case BPF_H:
|
||||
emit_store_16(rd, off, rs, ctx);
|
||||
break;
|
||||
case BPF_W:
|
||||
emit_store_32(rd, off, rs, ctx);
|
||||
break;
|
||||
case BPF_DW:
|
||||
emit_store_64(rd, off, rs, ctx);
|
||||
break;
|
||||
if (BPF_MODE(code) == BPF_PROBE_ATOMIC) {
|
||||
emit_add(RV_REG_T2, rd, RV_REG_ARENA, ctx);
|
||||
rd = RV_REG_T2;
|
||||
}
|
||||
|
||||
emit_fence_rw_w(ctx);
|
||||
emit_stx(rd, off, rs, BPF_SIZE(code), ctx);
|
||||
break;
|
||||
default:
|
||||
pr_err_once("bpf-jit: invalid atomic load/store opcode %02x\n", imm);
|
||||
|
|
@ -668,17 +606,15 @@ static int emit_atomic_ld_st(u8 rd, u8 rs, const struct bpf_insn *insn,
|
|||
static int emit_atomic_rmw(u8 rd, u8 rs, const struct bpf_insn *insn,
|
||||
struct rv_jit_context *ctx)
|
||||
{
|
||||
u8 r0, code = insn->code;
|
||||
u8 code = insn->code;
|
||||
s16 off = insn->off;
|
||||
s32 imm = insn->imm;
|
||||
int jmp_offset;
|
||||
bool is64;
|
||||
bool is64 = BPF_SIZE(code) == BPF_DW;
|
||||
|
||||
if (BPF_SIZE(code) != BPF_W && BPF_SIZE(code) != BPF_DW) {
|
||||
pr_err_once("bpf-jit: 1- and 2-byte RMW atomics are not supported\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
is64 = BPF_SIZE(code) == BPF_DW;
|
||||
|
||||
if (off) {
|
||||
if (is_12b_int(off)) {
|
||||
|
|
@ -690,72 +626,82 @@ static int emit_atomic_rmw(u8 rd, u8 rs, const struct bpf_insn *insn,
|
|||
rd = RV_REG_T1;
|
||||
}
|
||||
|
||||
if (BPF_MODE(code) == BPF_PROBE_ATOMIC) {
|
||||
emit_add(RV_REG_T1, rd, RV_REG_ARENA, ctx);
|
||||
rd = RV_REG_T1;
|
||||
}
|
||||
|
||||
switch (imm) {
|
||||
/* lock *(u32/u64 *)(dst_reg + off16) <op>= src_reg */
|
||||
case BPF_ADD:
|
||||
ctx->ex_insn_off = ctx->ninsns;
|
||||
emit(is64 ? rv_amoadd_d(RV_REG_ZERO, rs, rd, 0, 0) :
|
||||
rv_amoadd_w(RV_REG_ZERO, rs, rd, 0, 0), ctx);
|
||||
ctx->ex_jmp_off = ctx->ninsns;
|
||||
break;
|
||||
case BPF_AND:
|
||||
ctx->ex_insn_off = ctx->ninsns;
|
||||
emit(is64 ? rv_amoand_d(RV_REG_ZERO, rs, rd, 0, 0) :
|
||||
rv_amoand_w(RV_REG_ZERO, rs, rd, 0, 0), ctx);
|
||||
ctx->ex_jmp_off = ctx->ninsns;
|
||||
break;
|
||||
case BPF_OR:
|
||||
ctx->ex_insn_off = ctx->ninsns;
|
||||
emit(is64 ? rv_amoor_d(RV_REG_ZERO, rs, rd, 0, 0) :
|
||||
rv_amoor_w(RV_REG_ZERO, rs, rd, 0, 0), ctx);
|
||||
ctx->ex_jmp_off = ctx->ninsns;
|
||||
break;
|
||||
case BPF_XOR:
|
||||
ctx->ex_insn_off = ctx->ninsns;
|
||||
emit(is64 ? rv_amoxor_d(RV_REG_ZERO, rs, rd, 0, 0) :
|
||||
rv_amoxor_w(RV_REG_ZERO, rs, rd, 0, 0), ctx);
|
||||
ctx->ex_jmp_off = ctx->ninsns;
|
||||
break;
|
||||
/* src_reg = atomic_fetch_<op>(dst_reg + off16, src_reg) */
|
||||
case BPF_ADD | BPF_FETCH:
|
||||
ctx->ex_insn_off = ctx->ninsns;
|
||||
emit(is64 ? rv_amoadd_d(rs, rs, rd, 1, 1) :
|
||||
rv_amoadd_w(rs, rs, rd, 1, 1), ctx);
|
||||
ctx->ex_jmp_off = ctx->ninsns;
|
||||
if (!is64)
|
||||
emit_zextw(rs, rs, ctx);
|
||||
break;
|
||||
case BPF_AND | BPF_FETCH:
|
||||
ctx->ex_insn_off = ctx->ninsns;
|
||||
emit(is64 ? rv_amoand_d(rs, rs, rd, 1, 1) :
|
||||
rv_amoand_w(rs, rs, rd, 1, 1), ctx);
|
||||
ctx->ex_jmp_off = ctx->ninsns;
|
||||
if (!is64)
|
||||
emit_zextw(rs, rs, ctx);
|
||||
break;
|
||||
case BPF_OR | BPF_FETCH:
|
||||
ctx->ex_insn_off = ctx->ninsns;
|
||||
emit(is64 ? rv_amoor_d(rs, rs, rd, 1, 1) :
|
||||
rv_amoor_w(rs, rs, rd, 1, 1), ctx);
|
||||
ctx->ex_jmp_off = ctx->ninsns;
|
||||
if (!is64)
|
||||
emit_zextw(rs, rs, ctx);
|
||||
break;
|
||||
case BPF_XOR | BPF_FETCH:
|
||||
ctx->ex_insn_off = ctx->ninsns;
|
||||
emit(is64 ? rv_amoxor_d(rs, rs, rd, 1, 1) :
|
||||
rv_amoxor_w(rs, rs, rd, 1, 1), ctx);
|
||||
ctx->ex_jmp_off = ctx->ninsns;
|
||||
if (!is64)
|
||||
emit_zextw(rs, rs, ctx);
|
||||
break;
|
||||
/* src_reg = atomic_xchg(dst_reg + off16, src_reg); */
|
||||
case BPF_XCHG:
|
||||
ctx->ex_insn_off = ctx->ninsns;
|
||||
emit(is64 ? rv_amoswap_d(rs, rs, rd, 1, 1) :
|
||||
rv_amoswap_w(rs, rs, rd, 1, 1), ctx);
|
||||
ctx->ex_jmp_off = ctx->ninsns;
|
||||
if (!is64)
|
||||
emit_zextw(rs, rs, ctx);
|
||||
break;
|
||||
/* r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg); */
|
||||
case BPF_CMPXCHG:
|
||||
r0 = bpf_to_rv_reg(BPF_REG_0, ctx);
|
||||
if (is64)
|
||||
emit_mv(RV_REG_T2, r0, ctx);
|
||||
else
|
||||
emit_addiw(RV_REG_T2, r0, 0, ctx);
|
||||
emit(is64 ? rv_lr_d(r0, 0, rd, 0, 0) :
|
||||
rv_lr_w(r0, 0, rd, 0, 0), ctx);
|
||||
jmp_offset = ninsns_rvoff(8);
|
||||
emit(rv_bne(RV_REG_T2, r0, jmp_offset >> 1), ctx);
|
||||
emit(is64 ? rv_sc_d(RV_REG_T3, rs, rd, 0, 1) :
|
||||
rv_sc_w(RV_REG_T3, rs, rd, 0, 1), ctx);
|
||||
jmp_offset = ninsns_rvoff(-6);
|
||||
emit(rv_bne(RV_REG_T3, 0, jmp_offset >> 1), ctx);
|
||||
emit_fence_rw_rw(ctx);
|
||||
emit_cmpxchg(rd, rs, regmap[BPF_REG_0], is64, ctx);
|
||||
break;
|
||||
default:
|
||||
pr_err_once("bpf-jit: invalid atomic RMW opcode %02x\n", imm);
|
||||
|
|
@ -765,6 +711,39 @@ static int emit_atomic_rmw(u8 rd, u8 rs, const struct bpf_insn *insn,
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Sign-extend the register if necessary
|
||||
*/
|
||||
static int sign_extend(u8 rd, u8 rs, u8 sz, bool sign, struct rv_jit_context *ctx)
|
||||
{
|
||||
if (!sign && (sz == 1 || sz == 2)) {
|
||||
if (rd != rs)
|
||||
emit_mv(rd, rs, ctx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
switch (sz) {
|
||||
case 1:
|
||||
emit_sextb(rd, rs, ctx);
|
||||
break;
|
||||
case 2:
|
||||
emit_sexth(rd, rs, ctx);
|
||||
break;
|
||||
case 4:
|
||||
emit_sextw(rd, rs, ctx);
|
||||
break;
|
||||
case 8:
|
||||
if (rd != rs)
|
||||
emit_mv(rd, rs, ctx);
|
||||
break;
|
||||
default:
|
||||
pr_err("bpf-jit: invalid size %d for sign_extend\n", sz);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0)
|
||||
#define BPF_FIXUP_REG_MASK GENMASK(31, 27)
|
||||
#define REG_DONT_CLEAR_MARKER 0 /* RV_REG_ZERO unused in pt_regmap */
|
||||
|
|
@ -783,9 +762,8 @@ bool ex_handler_bpf(const struct exception_table_entry *ex,
|
|||
}
|
||||
|
||||
/* For accesses to BTF pointers, add an entry to the exception table */
|
||||
static int add_exception_handler(const struct bpf_insn *insn,
|
||||
struct rv_jit_context *ctx,
|
||||
int dst_reg, int insn_len)
|
||||
static int add_exception_handler(const struct bpf_insn *insn, int dst_reg,
|
||||
struct rv_jit_context *ctx)
|
||||
{
|
||||
struct exception_table_entry *ex;
|
||||
unsigned long pc;
|
||||
|
|
@ -793,21 +771,23 @@ static int add_exception_handler(const struct bpf_insn *insn,
|
|||
off_t fixup_offset;
|
||||
|
||||
if (!ctx->insns || !ctx->ro_insns || !ctx->prog->aux->extable ||
|
||||
(BPF_MODE(insn->code) != BPF_PROBE_MEM && BPF_MODE(insn->code) != BPF_PROBE_MEMSX &&
|
||||
BPF_MODE(insn->code) != BPF_PROBE_MEM32))
|
||||
ctx->ex_insn_off <= 0 || ctx->ex_jmp_off <= 0)
|
||||
return 0;
|
||||
|
||||
if (BPF_MODE(insn->code) != BPF_PROBE_MEM &&
|
||||
BPF_MODE(insn->code) != BPF_PROBE_MEMSX &&
|
||||
BPF_MODE(insn->code) != BPF_PROBE_MEM32 &&
|
||||
BPF_MODE(insn->code) != BPF_PROBE_ATOMIC)
|
||||
return 0;
|
||||
|
||||
if (WARN_ON_ONCE(ctx->nexentries >= ctx->prog->aux->num_exentries))
|
||||
return -EINVAL;
|
||||
|
||||
if (WARN_ON_ONCE(insn_len > ctx->ninsns))
|
||||
return -EINVAL;
|
||||
|
||||
if (WARN_ON_ONCE(!rvc_enabled() && insn_len == 1))
|
||||
if (WARN_ON_ONCE(ctx->ex_insn_off > ctx->ninsns || ctx->ex_jmp_off > ctx->ninsns))
|
||||
return -EINVAL;
|
||||
|
||||
ex = &ctx->prog->aux->extable[ctx->nexentries];
|
||||
pc = (unsigned long)&ctx->ro_insns[ctx->ninsns - insn_len];
|
||||
pc = (unsigned long)&ctx->ro_insns[ctx->ex_insn_off];
|
||||
|
||||
/*
|
||||
* This is the relative offset of the instruction that may fault from
|
||||
|
|
@ -831,7 +811,7 @@ static int add_exception_handler(const struct bpf_insn *insn,
|
|||
* that may fault. The execution will jump to this after handling the
|
||||
* fault.
|
||||
*/
|
||||
fixup_offset = (long)&ex->fixup - (pc + insn_len * sizeof(u16));
|
||||
fixup_offset = (long)&ex->fixup - (long)&ctx->ro_insns[ctx->ex_jmp_off];
|
||||
if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, fixup_offset))
|
||||
return -ERANGE;
|
||||
|
||||
|
|
@ -848,6 +828,8 @@ static int add_exception_handler(const struct bpf_insn *insn,
|
|||
FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
|
||||
ex->type = EX_TYPE_BPF;
|
||||
|
||||
ctx->ex_insn_off = 0;
|
||||
ctx->ex_jmp_off = 0;
|
||||
ctx->nexentries++;
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -1079,10 +1061,9 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
|
|||
stack_size += 16;
|
||||
|
||||
save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET);
|
||||
if (save_ret) {
|
||||
if (save_ret)
|
||||
stack_size += 16; /* Save both A5 (BPF R0) and A0 */
|
||||
retval_off = stack_size;
|
||||
}
|
||||
retval_off = stack_size;
|
||||
|
||||
stack_size += nr_arg_slots * 8;
|
||||
args_off = stack_size;
|
||||
|
|
@ -1226,8 +1207,15 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
|
|||
restore_args(min_t(int, nr_arg_slots, RV_MAX_REG_ARGS), args_off, ctx);
|
||||
|
||||
if (save_ret) {
|
||||
emit_ld(RV_REG_A0, -retval_off, RV_REG_FP, ctx);
|
||||
emit_ld(regmap[BPF_REG_0], -(retval_off - 8), RV_REG_FP, ctx);
|
||||
if (is_struct_ops) {
|
||||
ret = sign_extend(RV_REG_A0, regmap[BPF_REG_0], m->ret_size,
|
||||
m->ret_flags & BTF_FMODEL_SIGNED_ARG, ctx);
|
||||
if (ret)
|
||||
goto out;
|
||||
} else {
|
||||
emit_ld(RV_REG_A0, -retval_off, RV_REG_FP, ctx);
|
||||
}
|
||||
}
|
||||
|
||||
emit_ld(RV_REG_S1, -sreg_off, RV_REG_FP, ctx);
|
||||
|
|
@ -1320,7 +1308,6 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
|
|||
goto out;
|
||||
}
|
||||
|
||||
bpf_flush_icache(ro_image, ro_image_end);
|
||||
out:
|
||||
kvfree(image);
|
||||
return ret < 0 ? ret : size;
|
||||
|
|
@ -1857,7 +1844,6 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
|
|||
case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW:
|
||||
{
|
||||
bool sign_ext;
|
||||
int insn_len;
|
||||
|
||||
sign_ext = BPF_MODE(insn->code) == BPF_MEMSX ||
|
||||
BPF_MODE(insn->code) == BPF_PROBE_MEMSX;
|
||||
|
|
@ -1867,22 +1853,9 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
|
|||
rs = RV_REG_T2;
|
||||
}
|
||||
|
||||
switch (BPF_SIZE(code)) {
|
||||
case BPF_B:
|
||||
insn_len = emit_load_8(sign_ext, rd, off, rs, ctx);
|
||||
break;
|
||||
case BPF_H:
|
||||
insn_len = emit_load_16(sign_ext, rd, off, rs, ctx);
|
||||
break;
|
||||
case BPF_W:
|
||||
insn_len = emit_load_32(sign_ext, rd, off, rs, ctx);
|
||||
break;
|
||||
case BPF_DW:
|
||||
insn_len = emit_load_64(sign_ext, rd, off, rs, ctx);
|
||||
break;
|
||||
}
|
||||
emit_ldx(rd, off, rs, BPF_SIZE(code), sign_ext, ctx);
|
||||
|
||||
ret = add_exception_handler(insn, ctx, rd, insn_len);
|
||||
ret = add_exception_handler(insn, rd, ctx);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
|
|
@ -1890,238 +1863,73 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
|
|||
return 1;
|
||||
break;
|
||||
}
|
||||
|
||||
/* speculation barrier */
|
||||
case BPF_ST | BPF_NOSPEC:
|
||||
break;
|
||||
|
||||
/* ST: *(size *)(dst + off) = imm */
|
||||
case BPF_ST | BPF_MEM | BPF_B:
|
||||
emit_imm(RV_REG_T1, imm, ctx);
|
||||
if (is_12b_int(off)) {
|
||||
emit(rv_sb(rd, off, RV_REG_T1), ctx);
|
||||
break;
|
||||
}
|
||||
|
||||
emit_imm(RV_REG_T2, off, ctx);
|
||||
emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
|
||||
emit(rv_sb(RV_REG_T2, 0, RV_REG_T1), ctx);
|
||||
break;
|
||||
|
||||
case BPF_ST | BPF_MEM | BPF_H:
|
||||
emit_imm(RV_REG_T1, imm, ctx);
|
||||
if (is_12b_int(off)) {
|
||||
emit(rv_sh(rd, off, RV_REG_T1), ctx);
|
||||
break;
|
||||
}
|
||||
|
||||
emit_imm(RV_REG_T2, off, ctx);
|
||||
emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
|
||||
emit(rv_sh(RV_REG_T2, 0, RV_REG_T1), ctx);
|
||||
break;
|
||||
case BPF_ST | BPF_MEM | BPF_W:
|
||||
emit_imm(RV_REG_T1, imm, ctx);
|
||||
if (is_12b_int(off)) {
|
||||
emit_sw(rd, off, RV_REG_T1, ctx);
|
||||
break;
|
||||
}
|
||||
|
||||
emit_imm(RV_REG_T2, off, ctx);
|
||||
emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
|
||||
emit_sw(RV_REG_T2, 0, RV_REG_T1, ctx);
|
||||
break;
|
||||
case BPF_ST | BPF_MEM | BPF_DW:
|
||||
emit_imm(RV_REG_T1, imm, ctx);
|
||||
if (is_12b_int(off)) {
|
||||
emit_sd(rd, off, RV_REG_T1, ctx);
|
||||
break;
|
||||
}
|
||||
|
||||
emit_imm(RV_REG_T2, off, ctx);
|
||||
emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
|
||||
emit_sd(RV_REG_T2, 0, RV_REG_T1, ctx);
|
||||
break;
|
||||
|
||||
/* ST | PROBE_MEM32: *(size *)(dst + RV_REG_ARENA + off) = imm */
|
||||
case BPF_ST | BPF_PROBE_MEM32 | BPF_B:
|
||||
case BPF_ST | BPF_PROBE_MEM32 | BPF_H:
|
||||
case BPF_ST | BPF_PROBE_MEM32 | BPF_W:
|
||||
case BPF_ST | BPF_PROBE_MEM32 | BPF_DW:
|
||||
{
|
||||
int insn_len, insns_start;
|
||||
|
||||
emit_add(RV_REG_T3, rd, RV_REG_ARENA, ctx);
|
||||
rd = RV_REG_T3;
|
||||
|
||||
/* Load imm to a register then store it */
|
||||
emit_imm(RV_REG_T1, imm, ctx);
|
||||
|
||||
switch (BPF_SIZE(code)) {
|
||||
case BPF_B:
|
||||
if (is_12b_int(off)) {
|
||||
insns_start = ctx->ninsns;
|
||||
emit(rv_sb(rd, off, RV_REG_T1), ctx);
|
||||
insn_len = ctx->ninsns - insns_start;
|
||||
break;
|
||||
}
|
||||
|
||||
emit_imm(RV_REG_T2, off, ctx);
|
||||
emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
|
||||
insns_start = ctx->ninsns;
|
||||
emit(rv_sb(RV_REG_T2, 0, RV_REG_T1), ctx);
|
||||
insn_len = ctx->ninsns - insns_start;
|
||||
break;
|
||||
case BPF_H:
|
||||
if (is_12b_int(off)) {
|
||||
insns_start = ctx->ninsns;
|
||||
emit(rv_sh(rd, off, RV_REG_T1), ctx);
|
||||
insn_len = ctx->ninsns - insns_start;
|
||||
break;
|
||||
}
|
||||
|
||||
emit_imm(RV_REG_T2, off, ctx);
|
||||
emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
|
||||
insns_start = ctx->ninsns;
|
||||
emit(rv_sh(RV_REG_T2, 0, RV_REG_T1), ctx);
|
||||
insn_len = ctx->ninsns - insns_start;
|
||||
break;
|
||||
case BPF_W:
|
||||
if (is_12b_int(off)) {
|
||||
insns_start = ctx->ninsns;
|
||||
emit_sw(rd, off, RV_REG_T1, ctx);
|
||||
insn_len = ctx->ninsns - insns_start;
|
||||
break;
|
||||
}
|
||||
|
||||
emit_imm(RV_REG_T2, off, ctx);
|
||||
emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
|
||||
insns_start = ctx->ninsns;
|
||||
emit_sw(RV_REG_T2, 0, RV_REG_T1, ctx);
|
||||
insn_len = ctx->ninsns - insns_start;
|
||||
break;
|
||||
case BPF_DW:
|
||||
if (is_12b_int(off)) {
|
||||
insns_start = ctx->ninsns;
|
||||
emit_sd(rd, off, RV_REG_T1, ctx);
|
||||
insn_len = ctx->ninsns - insns_start;
|
||||
break;
|
||||
}
|
||||
|
||||
emit_imm(RV_REG_T2, off, ctx);
|
||||
emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
|
||||
insns_start = ctx->ninsns;
|
||||
emit_sd(RV_REG_T2, 0, RV_REG_T1, ctx);
|
||||
insn_len = ctx->ninsns - insns_start;
|
||||
break;
|
||||
if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) {
|
||||
emit_add(RV_REG_T3, rd, RV_REG_ARENA, ctx);
|
||||
rd = RV_REG_T3;
|
||||
}
|
||||
|
||||
ret = add_exception_handler(insn, ctx, REG_DONT_CLEAR_MARKER,
|
||||
insn_len);
|
||||
emit_st(rd, off, imm, BPF_SIZE(code), ctx);
|
||||
|
||||
ret = add_exception_handler(insn, REG_DONT_CLEAR_MARKER, ctx);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
/* STX: *(size *)(dst + off) = src */
|
||||
case BPF_STX | BPF_MEM | BPF_B:
|
||||
emit_store_8(rd, off, rs, ctx);
|
||||
break;
|
||||
case BPF_STX | BPF_MEM | BPF_H:
|
||||
emit_store_16(rd, off, rs, ctx);
|
||||
break;
|
||||
case BPF_STX | BPF_MEM | BPF_W:
|
||||
emit_store_32(rd, off, rs, ctx);
|
||||
break;
|
||||
case BPF_STX | BPF_MEM | BPF_DW:
|
||||
emit_store_64(rd, off, rs, ctx);
|
||||
break;
|
||||
case BPF_STX | BPF_ATOMIC | BPF_B:
|
||||
case BPF_STX | BPF_ATOMIC | BPF_H:
|
||||
case BPF_STX | BPF_ATOMIC | BPF_W:
|
||||
case BPF_STX | BPF_ATOMIC | BPF_DW:
|
||||
if (bpf_atomic_is_load_store(insn))
|
||||
ret = emit_atomic_ld_st(rd, rs, insn, ctx);
|
||||
else
|
||||
ret = emit_atomic_rmw(rd, rs, insn, ctx);
|
||||
if (ret)
|
||||
return ret;
|
||||
break;
|
||||
|
||||
/* STX | PROBE_MEM32: *(size *)(dst + RV_REG_ARENA + off) = src */
|
||||
case BPF_STX | BPF_PROBE_MEM32 | BPF_B:
|
||||
case BPF_STX | BPF_PROBE_MEM32 | BPF_H:
|
||||
case BPF_STX | BPF_PROBE_MEM32 | BPF_W:
|
||||
case BPF_STX | BPF_PROBE_MEM32 | BPF_DW:
|
||||
{
|
||||
int insn_len, insns_start;
|
||||
|
||||
emit_add(RV_REG_T2, rd, RV_REG_ARENA, ctx);
|
||||
rd = RV_REG_T2;
|
||||
|
||||
switch (BPF_SIZE(code)) {
|
||||
case BPF_B:
|
||||
if (is_12b_int(off)) {
|
||||
insns_start = ctx->ninsns;
|
||||
emit(rv_sb(rd, off, rs), ctx);
|
||||
insn_len = ctx->ninsns - insns_start;
|
||||
break;
|
||||
}
|
||||
|
||||
emit_imm(RV_REG_T1, off, ctx);
|
||||
emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
|
||||
insns_start = ctx->ninsns;
|
||||
emit(rv_sb(RV_REG_T1, 0, rs), ctx);
|
||||
insn_len = ctx->ninsns - insns_start;
|
||||
break;
|
||||
case BPF_H:
|
||||
if (is_12b_int(off)) {
|
||||
insns_start = ctx->ninsns;
|
||||
emit(rv_sh(rd, off, rs), ctx);
|
||||
insn_len = ctx->ninsns - insns_start;
|
||||
break;
|
||||
}
|
||||
|
||||
emit_imm(RV_REG_T1, off, ctx);
|
||||
emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
|
||||
insns_start = ctx->ninsns;
|
||||
emit(rv_sh(RV_REG_T1, 0, rs), ctx);
|
||||
insn_len = ctx->ninsns - insns_start;
|
||||
break;
|
||||
case BPF_W:
|
||||
if (is_12b_int(off)) {
|
||||
insns_start = ctx->ninsns;
|
||||
emit_sw(rd, off, rs, ctx);
|
||||
insn_len = ctx->ninsns - insns_start;
|
||||
break;
|
||||
}
|
||||
|
||||
emit_imm(RV_REG_T1, off, ctx);
|
||||
emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
|
||||
insns_start = ctx->ninsns;
|
||||
emit_sw(RV_REG_T1, 0, rs, ctx);
|
||||
insn_len = ctx->ninsns - insns_start;
|
||||
break;
|
||||
case BPF_DW:
|
||||
if (is_12b_int(off)) {
|
||||
insns_start = ctx->ninsns;
|
||||
emit_sd(rd, off, rs, ctx);
|
||||
insn_len = ctx->ninsns - insns_start;
|
||||
break;
|
||||
}
|
||||
|
||||
emit_imm(RV_REG_T1, off, ctx);
|
||||
emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
|
||||
insns_start = ctx->ninsns;
|
||||
emit_sd(RV_REG_T1, 0, rs, ctx);
|
||||
insn_len = ctx->ninsns - insns_start;
|
||||
break;
|
||||
if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) {
|
||||
emit_add(RV_REG_T2, rd, RV_REG_ARENA, ctx);
|
||||
rd = RV_REG_T2;
|
||||
}
|
||||
|
||||
ret = add_exception_handler(insn, ctx, REG_DONT_CLEAR_MARKER,
|
||||
insn_len);
|
||||
emit_stx(rd, off, rs, BPF_SIZE(code), ctx);
|
||||
|
||||
ret = add_exception_handler(insn, REG_DONT_CLEAR_MARKER, ctx);
|
||||
if (ret)
|
||||
return ret;
|
||||
break;
|
||||
|
||||
/* Atomics */
|
||||
case BPF_STX | BPF_ATOMIC | BPF_B:
|
||||
case BPF_STX | BPF_ATOMIC | BPF_H:
|
||||
case BPF_STX | BPF_ATOMIC | BPF_W:
|
||||
case BPF_STX | BPF_ATOMIC | BPF_DW:
|
||||
case BPF_STX | BPF_PROBE_ATOMIC | BPF_B:
|
||||
case BPF_STX | BPF_PROBE_ATOMIC | BPF_H:
|
||||
case BPF_STX | BPF_PROBE_ATOMIC | BPF_W:
|
||||
case BPF_STX | BPF_PROBE_ATOMIC | BPF_DW:
|
||||
if (bpf_atomic_is_load_store(insn))
|
||||
ret = emit_atomic_ld_st(rd, rs, insn, ctx);
|
||||
else
|
||||
ret = emit_atomic_rmw(rd, rs, insn, ctx);
|
||||
|
||||
ret = ret ?: add_exception_handler(insn, REG_DONT_CLEAR_MARKER, ctx);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
pr_err("bpf-jit: unknown opcode %02x\n", code);
|
||||
|
|
@ -2249,6 +2057,25 @@ bool bpf_jit_supports_arena(void)
|
|||
return true;
|
||||
}
|
||||
|
||||
bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena)
|
||||
{
|
||||
if (in_arena) {
|
||||
switch (insn->code) {
|
||||
case BPF_STX | BPF_ATOMIC | BPF_W:
|
||||
case BPF_STX | BPF_ATOMIC | BPF_DW:
|
||||
if (insn->imm == BPF_CMPXCHG)
|
||||
return rv_ext_enabled(ZACAS);
|
||||
break;
|
||||
case BPF_LDX | BPF_MEMSX | BPF_B:
|
||||
case BPF_LDX | BPF_MEMSX | BPF_H:
|
||||
case BPF_LDX | BPF_MEMSX | BPF_W:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool bpf_jit_supports_percpu_insn(void)
|
||||
{
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -2,5 +2,5 @@
|
|||
#
|
||||
# Arch-specific network modules
|
||||
#
|
||||
obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o
|
||||
obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o bpf_timed_may_goto.o
|
||||
obj-$(CONFIG_HAVE_PNETID) += pnet.o
|
||||
|
|
|
|||
|
|
@ -674,20 +674,6 @@ static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp)
|
|||
_EMIT2(0x07f0 | reg); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* Call r1 either directly or via __s390_indirect_jump_r1 thunk
|
||||
*/
|
||||
static void call_r1(struct bpf_jit *jit)
|
||||
{
|
||||
if (nospec_uses_trampoline())
|
||||
/* brasl %r14,__s390_indirect_jump_r1 */
|
||||
EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14,
|
||||
__s390_indirect_jump_r1);
|
||||
else
|
||||
/* basr %r14,%r1 */
|
||||
EMIT2(0x0d00, REG_14, REG_1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Function epilogue
|
||||
*/
|
||||
|
|
@ -1790,20 +1776,21 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
|
|||
|
||||
REG_SET_SEEN(BPF_REG_5);
|
||||
jit->seen |= SEEN_FUNC;
|
||||
|
||||
/*
|
||||
* Copy the tail call counter to where the callee expects it.
|
||||
*
|
||||
* Note 1: The callee can increment the tail call counter, but
|
||||
* we do not load it back, since the x86 JIT does not do this
|
||||
* either.
|
||||
*
|
||||
* Note 2: We assume that the verifier does not let us call the
|
||||
* main program, which clears the tail call counter on entry.
|
||||
*/
|
||||
/* mvc tail_call_cnt(4,%r15),frame_off+tail_call_cnt(%r15) */
|
||||
_EMIT6(0xd203f000 | offsetof(struct prog_frame, tail_call_cnt),
|
||||
0xf000 | (jit->frame_off +
|
||||
offsetof(struct prog_frame, tail_call_cnt)));
|
||||
|
||||
if (insn->src_reg == BPF_PSEUDO_CALL)
|
||||
/*
|
||||
* mvc tail_call_cnt(4,%r15),
|
||||
* frame_off+tail_call_cnt(%r15)
|
||||
*/
|
||||
_EMIT6(0xd203f000 | offsetof(struct prog_frame,
|
||||
tail_call_cnt),
|
||||
0xf000 | (jit->frame_off +
|
||||
offsetof(struct prog_frame,
|
||||
tail_call_cnt)));
|
||||
|
||||
/* Sign-extend the kfunc arguments. */
|
||||
if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
|
||||
|
|
@ -1819,12 +1806,38 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
|
|||
}
|
||||
}
|
||||
|
||||
/* lgrl %w1,func */
|
||||
EMIT6_PCREL_RILB(0xc4080000, REG_W1, _EMIT_CONST_U64(func));
|
||||
/* %r1() */
|
||||
call_r1(jit);
|
||||
/* lgr %b0,%r2: load return value into %b0 */
|
||||
EMIT4(0xb9040000, BPF_REG_0, REG_2);
|
||||
if ((void *)func == arch_bpf_timed_may_goto) {
|
||||
/*
|
||||
* arch_bpf_timed_may_goto() has a special ABI: the
|
||||
* parameters are in BPF_REG_AX and BPF_REG_10; the
|
||||
* return value is in BPF_REG_AX; and all GPRs except
|
||||
* REG_W0, REG_W1, and BPF_REG_AX are callee-saved.
|
||||
*/
|
||||
|
||||
/* brasl %r0,func */
|
||||
EMIT6_PCREL_RILB_PTR(0xc0050000, REG_0, (void *)func);
|
||||
} else {
|
||||
/* brasl %r14,func */
|
||||
EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14, (void *)func);
|
||||
/* lgr %b0,%r2: load return value into %b0 */
|
||||
EMIT4(0xb9040000, BPF_REG_0, REG_2);
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy the potentially updated tail call counter back.
|
||||
*/
|
||||
|
||||
if (insn->src_reg == BPF_PSEUDO_CALL)
|
||||
/*
|
||||
* mvc frame_off+tail_call_cnt(%r15),
|
||||
* tail_call_cnt(4,%r15)
|
||||
*/
|
||||
_EMIT6(0xd203f000 | (jit->frame_off +
|
||||
offsetof(struct prog_frame,
|
||||
tail_call_cnt)),
|
||||
0xf000 | offsetof(struct prog_frame,
|
||||
tail_call_cnt));
|
||||
|
||||
break;
|
||||
}
|
||||
case BPF_JMP | BPF_TAIL_CALL: {
|
||||
|
|
@ -2517,14 +2530,12 @@ static int invoke_bpf_prog(struct bpf_tramp_jit *tjit,
|
|||
* goto skip;
|
||||
*/
|
||||
|
||||
/* %r1 = __bpf_prog_enter */
|
||||
load_imm64(jit, REG_1, (u64)bpf_trampoline_enter(p));
|
||||
/* %r2 = p */
|
||||
load_imm64(jit, REG_2, (u64)p);
|
||||
/* la %r3,run_ctx_off(%r15) */
|
||||
EMIT4_DISP(0x41000000, REG_3, REG_15, tjit->run_ctx_off);
|
||||
/* %r1() */
|
||||
call_r1(jit);
|
||||
/* brasl %r14,__bpf_prog_enter */
|
||||
EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14, bpf_trampoline_enter(p));
|
||||
/* ltgr %r7,%r2 */
|
||||
EMIT4(0xb9020000, REG_7, REG_2);
|
||||
/* brcl 8,skip */
|
||||
|
|
@ -2535,15 +2546,13 @@ static int invoke_bpf_prog(struct bpf_tramp_jit *tjit,
|
|||
* retval = bpf_func(args, p->insnsi);
|
||||
*/
|
||||
|
||||
/* %r1 = p->bpf_func */
|
||||
load_imm64(jit, REG_1, (u64)p->bpf_func);
|
||||
/* la %r2,bpf_args_off(%r15) */
|
||||
EMIT4_DISP(0x41000000, REG_2, REG_15, tjit->bpf_args_off);
|
||||
/* %r3 = p->insnsi */
|
||||
if (!p->jited)
|
||||
load_imm64(jit, REG_3, (u64)p->insnsi);
|
||||
/* %r1() */
|
||||
call_r1(jit);
|
||||
/* brasl %r14,p->bpf_func */
|
||||
EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14, p->bpf_func);
|
||||
/* stg %r2,retval_off(%r15) */
|
||||
if (save_ret) {
|
||||
if (sign_extend(jit, REG_2, m->ret_size, m->ret_flags))
|
||||
|
|
@ -2560,16 +2569,14 @@ static int invoke_bpf_prog(struct bpf_tramp_jit *tjit,
|
|||
* __bpf_prog_exit(p, start, &run_ctx);
|
||||
*/
|
||||
|
||||
/* %r1 = __bpf_prog_exit */
|
||||
load_imm64(jit, REG_1, (u64)bpf_trampoline_exit(p));
|
||||
/* %r2 = p */
|
||||
load_imm64(jit, REG_2, (u64)p);
|
||||
/* lgr %r3,%r7 */
|
||||
EMIT4(0xb9040000, REG_3, REG_7);
|
||||
/* la %r4,run_ctx_off(%r15) */
|
||||
EMIT4_DISP(0x41000000, REG_4, REG_15, tjit->run_ctx_off);
|
||||
/* %r1() */
|
||||
call_r1(jit);
|
||||
/* brasl %r14,__bpf_prog_exit */
|
||||
EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14, bpf_trampoline_exit(p));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -2729,9 +2736,6 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
|
|||
|
||||
/* lgr %r8,%r0 */
|
||||
EMIT4(0xb9040000, REG_8, REG_0);
|
||||
} else {
|
||||
/* %r8 = func_addr + S390X_PATCH_SIZE */
|
||||
load_imm64(jit, REG_8, (u64)func_addr + S390X_PATCH_SIZE);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -2757,12 +2761,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
|
|||
* __bpf_tramp_enter(im);
|
||||
*/
|
||||
|
||||
/* %r1 = __bpf_tramp_enter */
|
||||
load_imm64(jit, REG_1, (u64)__bpf_tramp_enter);
|
||||
/* %r2 = im */
|
||||
load_imm64(jit, REG_2, (u64)im);
|
||||
/* %r1() */
|
||||
call_r1(jit);
|
||||
/* brasl %r14,__bpf_tramp_enter */
|
||||
EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14, __bpf_tramp_enter);
|
||||
}
|
||||
|
||||
for (i = 0; i < fentry->nr_links; i++)
|
||||
|
|
@ -2815,13 +2817,25 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
|
|||
/* mvc tail_call_cnt(4,%r15),tccnt_off(%r15) */
|
||||
_EMIT6(0xd203f000 | offsetof(struct prog_frame, tail_call_cnt),
|
||||
0xf000 | tjit->tccnt_off);
|
||||
/* lgr %r1,%r8 */
|
||||
EMIT4(0xb9040000, REG_1, REG_8);
|
||||
/* %r1() */
|
||||
call_r1(jit);
|
||||
if (flags & BPF_TRAMP_F_ORIG_STACK) {
|
||||
if (nospec_uses_trampoline())
|
||||
/* brasl %r14,__s390_indirect_jump_r8 */
|
||||
EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14,
|
||||
__s390_indirect_jump_r8);
|
||||
else
|
||||
/* basr %r14,%r8 */
|
||||
EMIT2(0x0d00, REG_14, REG_8);
|
||||
} else {
|
||||
/* brasl %r14,func_addr+S390X_PATCH_SIZE */
|
||||
EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14,
|
||||
func_addr + S390X_PATCH_SIZE);
|
||||
}
|
||||
/* stg %r2,retval_off(%r15) */
|
||||
EMIT6_DISP_LH(0xe3000000, 0x0024, REG_2, REG_0, REG_15,
|
||||
tjit->retval_off);
|
||||
/* mvc tccnt_off(%r15),tail_call_cnt(4,%r15) */
|
||||
_EMIT6(0xd203f000 | tjit->tccnt_off,
|
||||
0xf000 | offsetof(struct prog_frame, tail_call_cnt));
|
||||
|
||||
im->ip_after_call = jit->prg_buf + jit->prg;
|
||||
|
||||
|
|
@ -2846,12 +2860,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
|
|||
* __bpf_tramp_exit(im);
|
||||
*/
|
||||
|
||||
/* %r1 = __bpf_tramp_exit */
|
||||
load_imm64(jit, REG_1, (u64)__bpf_tramp_exit);
|
||||
/* %r2 = im */
|
||||
load_imm64(jit, REG_2, (u64)im);
|
||||
/* %r1() */
|
||||
call_r1(jit);
|
||||
/* brasl %r14,__bpf_tramp_exit */
|
||||
EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14, __bpf_tramp_exit);
|
||||
}
|
||||
|
||||
/* lmg %r2,%rN,reg_args_off(%r15) */
|
||||
|
|
@ -2860,7 +2872,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
|
|||
REG_2 + (nr_reg_args - 1), REG_15,
|
||||
tjit->reg_args_off);
|
||||
/* lgr %r1,%r8 */
|
||||
if (!(flags & BPF_TRAMP_F_SKIP_FRAME))
|
||||
if (!(flags & BPF_TRAMP_F_SKIP_FRAME) &&
|
||||
(flags & BPF_TRAMP_F_ORIG_STACK))
|
||||
EMIT4(0xb9040000, REG_1, REG_8);
|
||||
/* lmg %r7,%r8,r7_r8_off(%r15) */
|
||||
EMIT6_DISP_LH(0xeb000000, 0x0004, REG_7, REG_8, REG_15,
|
||||
|
|
@ -2879,9 +2892,12 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
|
|||
EMIT4_IMM(0xa70b0000, REG_15, tjit->stack_size);
|
||||
if (flags & BPF_TRAMP_F_SKIP_FRAME)
|
||||
EMIT_JUMP_REG(14);
|
||||
else
|
||||
else if (flags & BPF_TRAMP_F_ORIG_STACK)
|
||||
EMIT_JUMP_REG(1);
|
||||
|
||||
else
|
||||
/* brcl 0xf,func_addr+S390X_PATCH_SIZE */
|
||||
EMIT6_PCREL_RILC_PTR(0xc0040000, 0xf,
|
||||
func_addr + S390X_PATCH_SIZE);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -2951,6 +2967,11 @@ bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena)
|
|||
case BPF_STX | BPF_ATOMIC | BPF_DW:
|
||||
if (bpf_atomic_is_load_store(insn))
|
||||
return false;
|
||||
break;
|
||||
case BPF_LDX | BPF_MEMSX | BPF_B:
|
||||
case BPF_LDX | BPF_MEMSX | BPF_H:
|
||||
case BPF_LDX | BPF_MEMSX | BPF_W:
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
|
@ -2989,3 +3010,8 @@ void arch_bpf_stack_walk(bool (*consume_fn)(void *, u64, u64, u64),
|
|||
prev_addr = addr;
|
||||
}
|
||||
}
|
||||
|
||||
bool bpf_jit_supports_timed_may_goto(void)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
45
arch/s390/net/bpf_timed_may_goto.S
Normal file
45
arch/s390/net/bpf_timed_may_goto.S
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#include <linux/export.h>
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/nospec-insn.h>
|
||||
|
||||
#define R2_OFF 0
|
||||
#define R5_OFF (R2_OFF + (5 - 2 + 1) * 8)
|
||||
#define R14_OFF (R5_OFF + 8)
|
||||
#define RETADDR_OFF (R14_OFF + 8)
|
||||
#define R15_OFF (RETADDR_OFF + 8)
|
||||
#define BACKCHAIN_OFF (R15_OFF + 8)
|
||||
#define FRAME_SIZE (BACKCHAIN_OFF + 8)
|
||||
#define FRAME_OFF (STACK_FRAME_OVERHEAD - FRAME_SIZE)
|
||||
#if (FRAME_OFF + BACKCHAIN_OFF) != __SF_BACKCHAIN
|
||||
#error Stack frame layout calculation is broken
|
||||
#endif
|
||||
|
||||
GEN_BR_THUNK %r1
|
||||
|
||||
SYM_FUNC_START(arch_bpf_timed_may_goto)
|
||||
/*
|
||||
* This function has a special ABI: the parameters are in %r12 and
|
||||
* %r13; the return value is in %r12; all GPRs except %r0, %r1, and
|
||||
* %r12 are callee-saved; and the return address is in %r0.
|
||||
*/
|
||||
stmg %r2,%r5,FRAME_OFF+R2_OFF(%r15)
|
||||
stg %r14,FRAME_OFF+R14_OFF(%r15)
|
||||
stg %r0,FRAME_OFF+RETADDR_OFF(%r15)
|
||||
stg %r15,FRAME_OFF+R15_OFF(%r15)
|
||||
lgr %r1,%r15
|
||||
lay %r15,-FRAME_SIZE(%r15)
|
||||
stg %r1,__SF_BACKCHAIN(%r15)
|
||||
|
||||
lay %r2,0(%r12,%r13)
|
||||
brasl %r14,bpf_check_timed_may_goto
|
||||
lgr %r12,%r2
|
||||
|
||||
lg %r15,FRAME_SIZE+FRAME_OFF+R15_OFF(%r15)
|
||||
lmg %r2,%r5,FRAME_OFF+R2_OFF(%r15)
|
||||
lg %r14,FRAME_OFF+R14_OFF(%r15)
|
||||
lg %r1,FRAME_OFF+RETADDR_OFF(%r15)
|
||||
BR_EX %r1
|
||||
SYM_FUNC_END(arch_bpf_timed_may_goto)
|
||||
|
|
@ -8,6 +8,7 @@
|
|||
#include <linux/netdevice.h>
|
||||
#include <linux/filter.h>
|
||||
#include <linux/if_vlan.h>
|
||||
#include <linux/bitfield.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/memory.h>
|
||||
#include <linux/sort.h>
|
||||
|
|
@ -1151,11 +1152,38 @@ static void emit_ldx_index(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, u32 i
|
|||
*pprog = prog;
|
||||
}
|
||||
|
||||
static void emit_ldsx_index(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, u32 index_reg, int off)
|
||||
{
|
||||
u8 *prog = *pprog;
|
||||
|
||||
switch (size) {
|
||||
case BPF_B:
|
||||
/* movsx rax, byte ptr [rax + r12 + off] */
|
||||
EMIT3(add_3mod(0x48, src_reg, dst_reg, index_reg), 0x0F, 0xBE);
|
||||
break;
|
||||
case BPF_H:
|
||||
/* movsx rax, word ptr [rax + r12 + off] */
|
||||
EMIT3(add_3mod(0x48, src_reg, dst_reg, index_reg), 0x0F, 0xBF);
|
||||
break;
|
||||
case BPF_W:
|
||||
/* movsx rax, dword ptr [rax + r12 + off] */
|
||||
EMIT2(add_3mod(0x48, src_reg, dst_reg, index_reg), 0x63);
|
||||
break;
|
||||
}
|
||||
emit_insn_suffix_SIB(&prog, src_reg, dst_reg, index_reg, off);
|
||||
*pprog = prog;
|
||||
}
|
||||
|
||||
static void emit_ldx_r12(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
|
||||
{
|
||||
emit_ldx_index(pprog, size, dst_reg, src_reg, X86_REG_R12, off);
|
||||
}
|
||||
|
||||
static void emit_ldsx_r12(u8 **prog, u32 size, u32 dst_reg, u32 src_reg, int off)
|
||||
{
|
||||
emit_ldsx_index(prog, size, dst_reg, src_reg, X86_REG_R12, off);
|
||||
}
|
||||
|
||||
/* STX: *(u8*)(dst_reg + off) = src_reg */
|
||||
static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
|
||||
{
|
||||
|
|
@ -1388,16 +1416,67 @@ static int emit_atomic_ld_st_index(u8 **pprog, u32 atomic_op, u32 size,
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Metadata encoding for exception handling in JITed code.
|
||||
*
|
||||
* Format of `fixup` and `data` fields in `struct exception_table_entry`:
|
||||
*
|
||||
* Bit layout of `fixup` (32-bit):
|
||||
*
|
||||
* +-----------+--------+-----------+---------+----------+
|
||||
* | 31 | 30-24 | 23-16 | 15-8 | 7-0 |
|
||||
* | | | | | |
|
||||
* | ARENA_ACC | Unused | ARENA_REG | DST_REG | INSN_LEN |
|
||||
* +-----------+--------+-----------+---------+----------+
|
||||
*
|
||||
* - INSN_LEN (8 bits): Length of faulting insn (max x86 insn = 15 bytes (fits in 8 bits)).
|
||||
* - DST_REG (8 bits): Offset of dst_reg from reg2pt_regs[] (max offset = 112 (fits in 8 bits)).
|
||||
* This is set to DONT_CLEAR if the insn is a store.
|
||||
* - ARENA_REG (8 bits): Offset of the register that is used to calculate the
|
||||
* address for load/store when accessing the arena region.
|
||||
* - ARENA_ACCESS (1 bit): This bit is set when the faulting instruction accessed the arena region.
|
||||
*
|
||||
* Bit layout of `data` (32-bit):
|
||||
*
|
||||
* +--------------+--------+--------------+
|
||||
* | 31-16 | 15-8 | 7-0 |
|
||||
* | | | |
|
||||
* | ARENA_OFFSET | Unused | EX_TYPE_BPF |
|
||||
* +--------------+--------+--------------+
|
||||
*
|
||||
* - ARENA_OFFSET (16 bits): Offset used to calculate the address for load/store when
|
||||
* accessing the arena region.
|
||||
*/
|
||||
|
||||
#define DONT_CLEAR 1
|
||||
#define FIXUP_INSN_LEN_MASK GENMASK(7, 0)
|
||||
#define FIXUP_REG_MASK GENMASK(15, 8)
|
||||
#define FIXUP_ARENA_REG_MASK GENMASK(23, 16)
|
||||
#define FIXUP_ARENA_ACCESS BIT(31)
|
||||
#define DATA_ARENA_OFFSET_MASK GENMASK(31, 16)
|
||||
|
||||
bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs)
|
||||
{
|
||||
u32 reg = x->fixup >> 8;
|
||||
u32 reg = FIELD_GET(FIXUP_REG_MASK, x->fixup);
|
||||
u32 insn_len = FIELD_GET(FIXUP_INSN_LEN_MASK, x->fixup);
|
||||
bool is_arena = !!(x->fixup & FIXUP_ARENA_ACCESS);
|
||||
bool is_write = (reg == DONT_CLEAR);
|
||||
unsigned long addr;
|
||||
s16 off;
|
||||
u32 arena_reg;
|
||||
|
||||
if (is_arena) {
|
||||
arena_reg = FIELD_GET(FIXUP_ARENA_REG_MASK, x->fixup);
|
||||
off = FIELD_GET(DATA_ARENA_OFFSET_MASK, x->data);
|
||||
addr = *(unsigned long *)((void *)regs + arena_reg) + off;
|
||||
bpf_prog_report_arena_violation(is_write, addr, regs->ip);
|
||||
}
|
||||
|
||||
/* jump over faulting load and clear dest register */
|
||||
if (reg != DONT_CLEAR)
|
||||
*(unsigned long *)((void *)regs + reg) = 0;
|
||||
regs->ip += x->fixup & 0xff;
|
||||
regs->ip += insn_len;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -2057,19 +2136,27 @@ st: if (is_imm8(insn->off))
|
|||
case BPF_LDX | BPF_PROBE_MEM32 | BPF_H:
|
||||
case BPF_LDX | BPF_PROBE_MEM32 | BPF_W:
|
||||
case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW:
|
||||
case BPF_LDX | BPF_PROBE_MEM32SX | BPF_B:
|
||||
case BPF_LDX | BPF_PROBE_MEM32SX | BPF_H:
|
||||
case BPF_LDX | BPF_PROBE_MEM32SX | BPF_W:
|
||||
case BPF_STX | BPF_PROBE_MEM32 | BPF_B:
|
||||
case BPF_STX | BPF_PROBE_MEM32 | BPF_H:
|
||||
case BPF_STX | BPF_PROBE_MEM32 | BPF_W:
|
||||
case BPF_STX | BPF_PROBE_MEM32 | BPF_DW:
|
||||
start_of_ldx = prog;
|
||||
if (BPF_CLASS(insn->code) == BPF_LDX)
|
||||
emit_ldx_r12(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
|
||||
else
|
||||
if (BPF_CLASS(insn->code) == BPF_LDX) {
|
||||
if (BPF_MODE(insn->code) == BPF_PROBE_MEM32SX)
|
||||
emit_ldsx_r12(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
|
||||
else
|
||||
emit_ldx_r12(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
|
||||
} else {
|
||||
emit_stx_r12(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
|
||||
}
|
||||
populate_extable:
|
||||
{
|
||||
struct exception_table_entry *ex;
|
||||
u8 *_insn = image + proglen + (start_of_ldx - temp);
|
||||
u32 arena_reg, fixup_reg;
|
||||
s64 delta;
|
||||
|
||||
if (!bpf_prog->aux->extable)
|
||||
|
|
@ -2089,8 +2176,29 @@ st: if (is_imm8(insn->off))
|
|||
|
||||
ex->data = EX_TYPE_BPF;
|
||||
|
||||
ex->fixup = (prog - start_of_ldx) |
|
||||
((BPF_CLASS(insn->code) == BPF_LDX ? reg2pt_regs[dst_reg] : DONT_CLEAR) << 8);
|
||||
/*
|
||||
* src_reg/dst_reg holds the address in the arena region with upper
|
||||
* 32-bits being zero because of a preceding addr_space_cast(r<n>,
|
||||
* 0x0, 0x1) instruction. This address is adjusted with the addition
|
||||
* of arena_vm_start (see the implementation of BPF_PROBE_MEM32 and
|
||||
* BPF_PROBE_ATOMIC) before being used for the memory access. Pass
|
||||
* the reg holding the unmodified 32-bit address to
|
||||
* ex_handler_bpf().
|
||||
*/
|
||||
if (BPF_CLASS(insn->code) == BPF_LDX) {
|
||||
arena_reg = reg2pt_regs[src_reg];
|
||||
fixup_reg = reg2pt_regs[dst_reg];
|
||||
} else {
|
||||
arena_reg = reg2pt_regs[dst_reg];
|
||||
fixup_reg = DONT_CLEAR;
|
||||
}
|
||||
|
||||
ex->fixup = FIELD_PREP(FIXUP_INSN_LEN_MASK, prog - start_of_ldx) |
|
||||
FIELD_PREP(FIXUP_ARENA_REG_MASK, arena_reg) |
|
||||
FIELD_PREP(FIXUP_REG_MASK, fixup_reg);
|
||||
ex->fixup |= FIXUP_ARENA_ACCESS;
|
||||
|
||||
ex->data |= FIELD_PREP(DATA_ARENA_OFFSET_MASK, insn->off);
|
||||
}
|
||||
break;
|
||||
|
||||
|
|
@ -2208,7 +2316,8 @@ st: if (is_imm8(insn->off))
|
|||
* End result: x86 insn "mov rbx, qword ptr [rax+0x14]"
|
||||
* of 4 bytes will be ignored and rbx will be zero inited.
|
||||
*/
|
||||
ex->fixup = (prog - start_of_ldx) | (reg2pt_regs[dst_reg] << 8);
|
||||
ex->fixup = FIELD_PREP(FIXUP_INSN_LEN_MASK, prog - start_of_ldx) |
|
||||
FIELD_PREP(FIXUP_REG_MASK, reg2pt_regs[dst_reg]);
|
||||
}
|
||||
break;
|
||||
|
||||
|
|
|
|||
|
|
@ -429,6 +429,7 @@ int pkcs7_verify(struct pkcs7_message *pkcs7,
|
|||
/* Authattr presence checked in parser */
|
||||
break;
|
||||
case VERIFYING_UNSPECIFIED_SIGNATURE:
|
||||
case VERIFYING_BPF_SIGNATURE:
|
||||
if (pkcs7->data_type != OID_data) {
|
||||
pr_warn("Invalid unspecified sig (not pkcs7-data)\n");
|
||||
return -EKEYREJECTED;
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@
|
|||
#include <uapi/linux/bpf.h>
|
||||
#include <uapi/linux/filter.h>
|
||||
|
||||
#include <crypto/sha2.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/percpu.h>
|
||||
|
|
@ -109,6 +110,7 @@ struct bpf_map_ops {
|
|||
long (*map_pop_elem)(struct bpf_map *map, void *value);
|
||||
long (*map_peek_elem)(struct bpf_map *map, void *value);
|
||||
void *(*map_lookup_percpu_elem)(struct bpf_map *map, void *key, u32 cpu);
|
||||
int (*map_get_hash)(struct bpf_map *map, u32 hash_buf_size, void *hash_buf);
|
||||
|
||||
/* funcs called by prog_array and perf_event_array map */
|
||||
void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
|
||||
|
|
@ -206,6 +208,7 @@ enum btf_field_type {
|
|||
BPF_WORKQUEUE = (1 << 10),
|
||||
BPF_UPTR = (1 << 11),
|
||||
BPF_RES_SPIN_LOCK = (1 << 12),
|
||||
BPF_TASK_WORK = (1 << 13),
|
||||
};
|
||||
|
||||
enum bpf_cgroup_storage_type {
|
||||
|
|
@ -259,6 +262,7 @@ struct btf_record {
|
|||
int timer_off;
|
||||
int wq_off;
|
||||
int refcount_off;
|
||||
int task_work_off;
|
||||
struct btf_field fields[];
|
||||
};
|
||||
|
||||
|
|
@ -285,9 +289,11 @@ struct bpf_map_owner {
|
|||
bool xdp_has_frags;
|
||||
u64 storage_cookie[MAX_BPF_CGROUP_STORAGE_TYPE];
|
||||
const struct btf_type *attach_func_proto;
|
||||
enum bpf_attach_type expected_attach_type;
|
||||
};
|
||||
|
||||
struct bpf_map {
|
||||
u8 sha[SHA256_DIGEST_SIZE];
|
||||
const struct bpf_map_ops *ops;
|
||||
struct bpf_map *inner_map_meta;
|
||||
#ifdef CONFIG_SECURITY
|
||||
|
|
@ -328,6 +334,7 @@ struct bpf_map {
|
|||
atomic64_t sleepable_refcnt;
|
||||
s64 __percpu *elem_count;
|
||||
u64 cookie; /* write-once */
|
||||
char *excl_prog_sha;
|
||||
};
|
||||
|
||||
static inline const char *btf_field_type_name(enum btf_field_type type)
|
||||
|
|
@ -358,6 +365,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type)
|
|||
return "bpf_rb_node";
|
||||
case BPF_REFCOUNT:
|
||||
return "bpf_refcount";
|
||||
case BPF_TASK_WORK:
|
||||
return "bpf_task_work";
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
return "unknown";
|
||||
|
|
@ -396,6 +405,8 @@ static inline u32 btf_field_type_size(enum btf_field_type type)
|
|||
return sizeof(struct bpf_rb_node);
|
||||
case BPF_REFCOUNT:
|
||||
return sizeof(struct bpf_refcount);
|
||||
case BPF_TASK_WORK:
|
||||
return sizeof(struct bpf_task_work);
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
return 0;
|
||||
|
|
@ -428,6 +439,8 @@ static inline u32 btf_field_type_align(enum btf_field_type type)
|
|||
return __alignof__(struct bpf_rb_node);
|
||||
case BPF_REFCOUNT:
|
||||
return __alignof__(struct bpf_refcount);
|
||||
case BPF_TASK_WORK:
|
||||
return __alignof__(struct bpf_task_work);
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
return 0;
|
||||
|
|
@ -459,6 +472,7 @@ static inline void bpf_obj_init_field(const struct btf_field *field, void *addr)
|
|||
case BPF_KPTR_REF:
|
||||
case BPF_KPTR_PERCPU:
|
||||
case BPF_UPTR:
|
||||
case BPF_TASK_WORK:
|
||||
break;
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
|
|
@ -595,6 +609,7 @@ void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
|
|||
bool lock_src);
|
||||
void bpf_timer_cancel_and_free(void *timer);
|
||||
void bpf_wq_cancel_and_free(void *timer);
|
||||
void bpf_task_work_cancel_and_free(void *timer);
|
||||
void bpf_list_head_free(const struct btf_field *field, void *list_head,
|
||||
struct bpf_spin_lock *spin_lock);
|
||||
void bpf_rb_root_free(const struct btf_field *field, void *rb_root,
|
||||
|
|
@ -767,12 +782,15 @@ enum bpf_type_flag {
|
|||
*/
|
||||
MEM_WRITE = BIT(18 + BPF_BASE_TYPE_BITS),
|
||||
|
||||
/* DYNPTR points to skb_metadata_end()-skb_metadata_len() */
|
||||
DYNPTR_TYPE_SKB_META = BIT(19 + BPF_BASE_TYPE_BITS),
|
||||
|
||||
__BPF_TYPE_FLAG_MAX,
|
||||
__BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1,
|
||||
};
|
||||
|
||||
#define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF | DYNPTR_TYPE_SKB \
|
||||
| DYNPTR_TYPE_XDP)
|
||||
| DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META)
|
||||
|
||||
/* Max number of base types. */
|
||||
#define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS)
|
||||
|
|
@ -1110,7 +1128,7 @@ struct bpf_prog_offload {
|
|||
*/
|
||||
#define MAX_BPF_FUNC_REG_ARGS 5
|
||||
|
||||
/* The argument is a structure. */
|
||||
/* The argument is a structure or a union. */
|
||||
#define BTF_FMODEL_STRUCT_ARG BIT(0)
|
||||
|
||||
/* The argument is signed. */
|
||||
|
|
@ -1358,6 +1376,8 @@ enum bpf_dynptr_type {
|
|||
BPF_DYNPTR_TYPE_SKB,
|
||||
/* Underlying data is a xdp_buff */
|
||||
BPF_DYNPTR_TYPE_XDP,
|
||||
/* Points to skb_metadata_end()-skb_metadata_len() */
|
||||
BPF_DYNPTR_TYPE_SKB_META,
|
||||
};
|
||||
|
||||
int bpf_dynptr_check_size(u32 size);
|
||||
|
|
@ -1619,6 +1639,7 @@ struct bpf_prog_aux {
|
|||
bool priv_stack_requested;
|
||||
bool changes_pkt_data;
|
||||
bool might_sleep;
|
||||
bool kprobe_write_ctx;
|
||||
u64 prog_array_member_cnt; /* counts how many times as member of prog_array */
|
||||
struct mutex ext_mutex; /* mutex for is_extended and prog_array_member_cnt */
|
||||
struct bpf_arena *arena;
|
||||
|
|
@ -1628,6 +1649,7 @@ struct bpf_prog_aux {
|
|||
/* function name for valid attach_btf_id */
|
||||
const char *attach_func_name;
|
||||
struct bpf_prog **func;
|
||||
struct bpf_prog_aux *main_prog_aux;
|
||||
void *jit_data; /* JIT specific data. arch dependent */
|
||||
struct bpf_jit_poke_descriptor *poke_tab;
|
||||
struct bpf_kfunc_desc_tab *kfunc_tab;
|
||||
|
|
@ -1711,7 +1733,10 @@ struct bpf_prog {
|
|||
enum bpf_attach_type expected_attach_type; /* For some prog types */
|
||||
u32 len; /* Number of filter blocks */
|
||||
u32 jited_len; /* Size of jited insns in bytes */
|
||||
u8 tag[BPF_TAG_SIZE];
|
||||
union {
|
||||
u8 digest[SHA256_DIGEST_SIZE];
|
||||
u8 tag[BPF_TAG_SIZE];
|
||||
};
|
||||
struct bpf_prog_stats __percpu *stats;
|
||||
int __percpu *active;
|
||||
unsigned int (*bpf_func)(const void *ctx,
|
||||
|
|
@ -1985,6 +2010,7 @@ static inline void bpf_module_put(const void *data, struct module *owner)
|
|||
module_put(owner);
|
||||
}
|
||||
int bpf_struct_ops_link_create(union bpf_attr *attr);
|
||||
u32 bpf_struct_ops_id(const void *kdata);
|
||||
|
||||
#ifdef CONFIG_NET
|
||||
/* Define it here to avoid the use of forward declaration */
|
||||
|
|
@ -2411,6 +2437,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec);
|
|||
bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b);
|
||||
void bpf_obj_free_timer(const struct btf_record *rec, void *obj);
|
||||
void bpf_obj_free_workqueue(const struct btf_record *rec, void *obj);
|
||||
void bpf_obj_free_task_work(const struct btf_record *rec, void *obj);
|
||||
void bpf_obj_free_fields(const struct btf_record *rec, void *obj);
|
||||
void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu);
|
||||
|
||||
|
|
@ -2697,7 +2724,7 @@ int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
|
|||
int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
|
||||
u64 flags);
|
||||
|
||||
int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value);
|
||||
int bpf_stackmap_extract(struct bpf_map *map, void *key, void *value, bool delete);
|
||||
|
||||
int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
|
||||
void *key, void *value, u64 map_flags);
|
||||
|
|
@ -2874,6 +2901,7 @@ void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
|
|||
enum bpf_dynptr_type type, u32 offset, u32 size);
|
||||
void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr);
|
||||
void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr);
|
||||
void bpf_prog_report_arena_violation(bool write, unsigned long addr, unsigned long fault_ip);
|
||||
|
||||
#else /* !CONFIG_BPF_SYSCALL */
|
||||
static inline struct bpf_prog *bpf_prog_get(u32 ufd)
|
||||
|
|
@ -3161,6 +3189,11 @@ static inline void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr)
|
|||
static inline void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void bpf_prog_report_arena_violation(bool write, unsigned long addr,
|
||||
unsigned long fault_ip)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_BPF_SYSCALL */
|
||||
|
||||
static __always_inline int
|
||||
|
|
@ -3403,6 +3436,38 @@ static inline int bpf_fd_reuseport_array_update_elem(struct bpf_map *map,
|
|||
#endif /* CONFIG_BPF_SYSCALL */
|
||||
#endif /* defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) */
|
||||
|
||||
#if defined(CONFIG_KEYS) && defined(CONFIG_BPF_SYSCALL)
|
||||
|
||||
struct bpf_key *bpf_lookup_user_key(s32 serial, u64 flags);
|
||||
struct bpf_key *bpf_lookup_system_key(u64 id);
|
||||
void bpf_key_put(struct bpf_key *bkey);
|
||||
int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_p,
|
||||
struct bpf_dynptr *sig_p,
|
||||
struct bpf_key *trusted_keyring);
|
||||
|
||||
#else
|
||||
static inline struct bpf_key *bpf_lookup_user_key(u32 serial, u64 flags)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline struct bpf_key *bpf_lookup_system_key(u64 id)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void bpf_key_put(struct bpf_key *bkey)
|
||||
{
|
||||
}
|
||||
|
||||
static inline int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_p,
|
||||
struct bpf_dynptr *sig_p,
|
||||
struct bpf_key *trusted_keyring)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
#endif /* defined(CONFIG_KEYS) && defined(CONFIG_BPF_SYSCALL) */
|
||||
|
||||
/* verifier prototypes for helper functions called from eBPF programs */
|
||||
extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
|
||||
extern const struct bpf_func_proto bpf_map_update_elem_proto;
|
||||
|
|
|
|||
|
|
@ -26,28 +26,6 @@
|
|||
/* Patch buffer size */
|
||||
#define INSN_BUF_SIZE 32
|
||||
|
||||
/* Liveness marks, used for registers and spilled-regs (in stack slots).
|
||||
* Read marks propagate upwards until they find a write mark; they record that
|
||||
* "one of this state's descendants read this reg" (and therefore the reg is
|
||||
* relevant for states_equal() checks).
|
||||
* Write marks collect downwards and do not propagate; they record that "the
|
||||
* straight-line code that reached this state (from its parent) wrote this reg"
|
||||
* (and therefore that reads propagated from this state or its descendants
|
||||
* should not propagate to its parent).
|
||||
* A state with a write mark can receive read marks; it just won't propagate
|
||||
* them to its parent, since the write mark is a property, not of the state,
|
||||
* but of the link between it and its parent. See mark_reg_read() and
|
||||
* mark_stack_slot_read() in kernel/bpf/verifier.c.
|
||||
*/
|
||||
enum bpf_reg_liveness {
|
||||
REG_LIVE_NONE = 0, /* reg hasn't been read or written this branch */
|
||||
REG_LIVE_READ32 = 0x1, /* reg was read, so we're sensitive to initial value */
|
||||
REG_LIVE_READ64 = 0x2, /* likewise, but full 64-bit content matters */
|
||||
REG_LIVE_READ = REG_LIVE_READ32 | REG_LIVE_READ64,
|
||||
REG_LIVE_WRITTEN = 0x4, /* reg was written first, screening off later reads */
|
||||
REG_LIVE_DONE = 0x8, /* liveness won't be updating this register anymore */
|
||||
};
|
||||
|
||||
#define ITER_PREFIX "bpf_iter_"
|
||||
|
||||
enum bpf_iter_state {
|
||||
|
|
@ -212,8 +190,6 @@ struct bpf_reg_state {
|
|||
* allowed and has the same effect as bpf_sk_release(sk).
|
||||
*/
|
||||
u32 ref_obj_id;
|
||||
/* parentage chain for liveness checking */
|
||||
struct bpf_reg_state *parent;
|
||||
/* Inside the callee two registers can be both PTR_TO_STACK like
|
||||
* R1=fp-8 and R2=fp-8, but one of them points to this function stack
|
||||
* while another to the caller's stack. To differentiate them 'frameno'
|
||||
|
|
@ -226,7 +202,6 @@ struct bpf_reg_state {
|
|||
* patching which only happens after main verification finished.
|
||||
*/
|
||||
s32 subreg_def;
|
||||
enum bpf_reg_liveness live;
|
||||
/* if (!precise && SCALAR_VALUE) min/max/tnum don't affect safety */
|
||||
bool precise;
|
||||
};
|
||||
|
|
@ -445,6 +420,7 @@ struct bpf_verifier_state {
|
|||
|
||||
bool speculative;
|
||||
bool in_sleepable;
|
||||
bool cleaned;
|
||||
|
||||
/* first and last insn idx of this verifier state */
|
||||
u32 first_insn_idx;
|
||||
|
|
@ -665,6 +641,7 @@ struct bpf_subprog_info {
|
|||
/* 'start' has to be the first field otherwise find_subprog() won't work */
|
||||
u32 start; /* insn idx of function entry point */
|
||||
u32 linfo_idx; /* The idx to the main_prog->aux->linfo */
|
||||
u32 postorder_start; /* The idx to the env->cfg.insn_postorder */
|
||||
u16 stack_depth; /* max. stack depth used by this function */
|
||||
u16 stack_extra;
|
||||
/* offsets in range [stack_depth .. fastcall_stack_off)
|
||||
|
|
@ -744,6 +721,8 @@ struct bpf_scc_info {
|
|||
struct bpf_scc_visit visits[];
|
||||
};
|
||||
|
||||
struct bpf_liveness;
|
||||
|
||||
/* single container for all structs
|
||||
* one verifier_env per bpf_check() call
|
||||
*/
|
||||
|
|
@ -794,7 +773,10 @@ struct bpf_verifier_env {
|
|||
struct {
|
||||
int *insn_state;
|
||||
int *insn_stack;
|
||||
/* vector of instruction indexes sorted in post-order */
|
||||
/*
|
||||
* vector of instruction indexes sorted in post-order, grouped by subprogram,
|
||||
* see bpf_subprog_info->postorder_start.
|
||||
*/
|
||||
int *insn_postorder;
|
||||
int cur_stack;
|
||||
/* current position in the insn_postorder vector */
|
||||
|
|
@ -842,6 +824,7 @@ struct bpf_verifier_env {
|
|||
struct bpf_insn insn_buf[INSN_BUF_SIZE];
|
||||
struct bpf_insn epilogue_buf[INSN_BUF_SIZE];
|
||||
struct bpf_scc_callchain callchain_buf;
|
||||
struct bpf_liveness *liveness;
|
||||
/* array of pointers to bpf_scc_info indexed by SCC id */
|
||||
struct bpf_scc_info **scc_info;
|
||||
u32 scc_cnt;
|
||||
|
|
@ -875,13 +858,15 @@ __printf(3, 4) void verbose_linfo(struct bpf_verifier_env *env,
|
|||
#define verifier_bug_if(cond, env, fmt, args...) \
|
||||
({ \
|
||||
bool __cond = (cond); \
|
||||
if (unlikely(__cond)) { \
|
||||
BPF_WARN_ONCE(1, "verifier bug: " fmt "(" #cond ")\n", ##args); \
|
||||
bpf_log(&env->log, "verifier bug: " fmt "(" #cond ")\n", ##args); \
|
||||
} \
|
||||
if (unlikely(__cond)) \
|
||||
verifier_bug(env, fmt " (" #cond ")", ##args); \
|
||||
(__cond); \
|
||||
})
|
||||
#define verifier_bug(env, fmt, args...) verifier_bug_if(1, env, fmt, ##args)
|
||||
#define verifier_bug(env, fmt, args...) \
|
||||
({ \
|
||||
BPF_WARN_ONCE(1, "verifier bug: " fmt "\n", ##args); \
|
||||
bpf_log(&env->log, "verifier bug: " fmt "\n", ##args); \
|
||||
})
|
||||
|
||||
static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env)
|
||||
{
|
||||
|
|
@ -962,6 +947,7 @@ static inline bool bpf_prog_check_recur(const struct bpf_prog *prog)
|
|||
case BPF_PROG_TYPE_STRUCT_OPS:
|
||||
return prog->aux->jits_use_priv_stack;
|
||||
case BPF_PROG_TYPE_LSM:
|
||||
case BPF_PROG_TYPE_SYSCALL:
|
||||
return false;
|
||||
default:
|
||||
return true;
|
||||
|
|
@ -1062,4 +1048,21 @@ void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_verifie
|
|||
void print_insn_state(struct bpf_verifier_env *env, const struct bpf_verifier_state *vstate,
|
||||
u32 frameno);
|
||||
|
||||
struct bpf_subprog_info *bpf_find_containing_subprog(struct bpf_verifier_env *env, int off);
|
||||
int bpf_jmp_offset(struct bpf_insn *insn);
|
||||
int bpf_insn_successors(struct bpf_prog *prog, u32 idx, u32 succ[2]);
|
||||
void bpf_fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask);
|
||||
bool bpf_calls_callback(struct bpf_verifier_env *env, int insn_idx);
|
||||
|
||||
int bpf_stack_liveness_init(struct bpf_verifier_env *env);
|
||||
void bpf_stack_liveness_free(struct bpf_verifier_env *env);
|
||||
int bpf_update_live_stack(struct bpf_verifier_env *env);
|
||||
int bpf_mark_stack_read(struct bpf_verifier_env *env, u32 frameno, u32 insn_idx, u64 mask);
|
||||
void bpf_mark_stack_write(struct bpf_verifier_env *env, u32 frameno, u64 mask);
|
||||
int bpf_reset_stack_write_marks(struct bpf_verifier_env *env, u32 insn_idx);
|
||||
int bpf_commit_stack_write_marks(struct bpf_verifier_env *env);
|
||||
int bpf_live_stack_query_init(struct bpf_verifier_env *env, struct bpf_verifier_state *st);
|
||||
bool bpf_stack_slot_alive(struct bpf_verifier_env *env, u32 frameno, u32 spi);
|
||||
void bpf_reset_live_stack_callchain(struct bpf_verifier_env *env);
|
||||
|
||||
#endif /* _LINUX_BPF_VERIFIER_H */
|
||||
|
|
|
|||
|
|
@ -86,7 +86,7 @@
|
|||
* as to avoid issues such as the compiler inlining or eliding either a static
|
||||
* kfunc, or a global kfunc in an LTO build.
|
||||
*/
|
||||
#define __bpf_kfunc __used __retain noinline
|
||||
#define __bpf_kfunc __used __retain __noclone noinline
|
||||
|
||||
#define __bpf_kfunc_start_defs() \
|
||||
__diag_push(); \
|
||||
|
|
|
|||
|
|
@ -656,6 +656,7 @@ static inline void cgroup_kthread_ready(void)
|
|||
}
|
||||
|
||||
void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen);
|
||||
struct cgroup *__cgroup_get_from_id(u64 id);
|
||||
struct cgroup *cgroup_get_from_id(u64 id);
|
||||
#else /* !CONFIG_CGROUPS */
|
||||
|
||||
|
|
|
|||
|
|
@ -78,6 +78,9 @@ struct ctl_table_header;
|
|||
/* unused opcode to mark special atomic instruction */
|
||||
#define BPF_PROBE_ATOMIC 0xe0
|
||||
|
||||
/* unused opcode to mark special ldsx instruction. Same as BPF_NOSPEC */
|
||||
#define BPF_PROBE_MEM32SX 0xc0
|
||||
|
||||
/* unused opcode to mark call to interpreter with arguments */
|
||||
#define BPF_CALL_ARGS 0xe0
|
||||
|
||||
|
|
@ -997,12 +1000,6 @@ static inline u32 bpf_prog_insn_size(const struct bpf_prog *prog)
|
|||
return prog->len * sizeof(struct bpf_insn);
|
||||
}
|
||||
|
||||
static inline u32 bpf_prog_tag_scratch_size(const struct bpf_prog *prog)
|
||||
{
|
||||
return round_up(bpf_prog_insn_size(prog) +
|
||||
sizeof(__be64) + 1, SHA1_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
static inline unsigned int bpf_prog_size(unsigned int proglen)
|
||||
{
|
||||
return max(sizeof(struct bpf_prog),
|
||||
|
|
@ -1296,7 +1293,7 @@ void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other);
|
|||
static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen,
|
||||
u32 pass, void *image)
|
||||
{
|
||||
pr_err("flen=%u proglen=%u pass=%u image=%pK from=%s pid=%d\n", flen,
|
||||
pr_err("flen=%u proglen=%u pass=%u image=%p from=%s pid=%d\n", flen,
|
||||
proglen, pass, image, current->comm, task_pid_nr(current));
|
||||
|
||||
if (image)
|
||||
|
|
@ -1784,6 +1781,7 @@ int __bpf_xdp_store_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len);
|
|||
void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len);
|
||||
void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off,
|
||||
void *buf, unsigned long len, bool flush);
|
||||
void *bpf_skb_meta_pointer(struct sk_buff *skb, u32 offset);
|
||||
#else /* CONFIG_NET */
|
||||
static inline int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset,
|
||||
void *to, u32 len)
|
||||
|
|
@ -1818,6 +1816,11 @@ static inline void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, voi
|
|||
unsigned long len, bool flush)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void *bpf_skb_meta_pointer(struct sk_buff *skb, u32 offset)
|
||||
{
|
||||
return ERR_PTR(-EOPNOTSUPP);
|
||||
}
|
||||
#endif /* CONFIG_NET */
|
||||
|
||||
#endif /* __LINUX_FILTER_H__ */
|
||||
|
|
|
|||
|
|
@ -962,6 +962,20 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
|
|||
preempt_enable_notrace();
|
||||
}
|
||||
|
||||
static __always_inline void rcu_read_lock_dont_migrate(void)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_PREEMPT_RCU))
|
||||
migrate_disable();
|
||||
rcu_read_lock();
|
||||
}
|
||||
|
||||
static inline void rcu_read_unlock_migrate(void)
|
||||
{
|
||||
rcu_read_unlock();
|
||||
if (IS_ENABLED(CONFIG_PREEMPT_RCU))
|
||||
migrate_enable();
|
||||
}
|
||||
|
||||
/**
|
||||
* RCU_INIT_POINTER() - initialize an RCU protected pointer
|
||||
* @p: The pointer to be initialized.
|
||||
|
|
|
|||
|
|
@ -51,9 +51,15 @@ struct tnum tnum_xor(struct tnum a, struct tnum b);
|
|||
/* Multiply two tnums, return @a * @b */
|
||||
struct tnum tnum_mul(struct tnum a, struct tnum b);
|
||||
|
||||
/* Return true if the known bits of both tnums have the same value */
|
||||
bool tnum_overlap(struct tnum a, struct tnum b);
|
||||
|
||||
/* Return a tnum representing numbers satisfying both @a and @b */
|
||||
struct tnum tnum_intersect(struct tnum a, struct tnum b);
|
||||
|
||||
/* Returns a tnum representing numbers satisfying either @a or @b */
|
||||
struct tnum tnum_union(struct tnum t1, struct tnum t2);
|
||||
|
||||
/* Return @a with all but the lowest @size bytes cleared */
|
||||
struct tnum tnum_cast(struct tnum a, u8 size);
|
||||
|
||||
|
|
|
|||
|
|
@ -36,6 +36,7 @@ enum key_being_used_for {
|
|||
VERIFYING_KEY_SIGNATURE,
|
||||
VERIFYING_KEY_SELF_SIGNATURE,
|
||||
VERIFYING_UNSPECIFIED_SIGNATURE,
|
||||
VERIFYING_BPF_SIGNATURE,
|
||||
NR__KEY_BEING_USED_FOR
|
||||
};
|
||||
#ifdef CONFIG_SYSTEM_DATA_VERIFICATION
|
||||
|
|
|
|||
|
|
@ -115,6 +115,11 @@ static __always_inline void xdp_buff_set_frag_pfmemalloc(struct xdp_buff *xdp)
|
|||
xdp->flags |= XDP_FLAGS_FRAGS_PF_MEMALLOC;
|
||||
}
|
||||
|
||||
static __always_inline void xdp_buff_clear_frag_pfmemalloc(struct xdp_buff *xdp)
|
||||
{
|
||||
xdp->flags &= ~XDP_FLAGS_FRAGS_PF_MEMALLOC;
|
||||
}
|
||||
|
||||
static __always_inline void
|
||||
xdp_init_buff(struct xdp_buff *xdp, u32 frame_sz, struct xdp_rxq_info *rxq)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -160,13 +160,23 @@ static inline struct xdp_buff *xsk_buff_get_frag(const struct xdp_buff *first)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static inline void xsk_buff_del_tail(struct xdp_buff *tail)
|
||||
static inline void xsk_buff_del_frag(struct xdp_buff *xdp)
|
||||
{
|
||||
struct xdp_buff_xsk *xskb = container_of(tail, struct xdp_buff_xsk, xdp);
|
||||
struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
|
||||
|
||||
list_del(&xskb->list_node);
|
||||
}
|
||||
|
||||
static inline struct xdp_buff *xsk_buff_get_head(struct xdp_buff *first)
|
||||
{
|
||||
struct xdp_buff_xsk *xskb = container_of(first, struct xdp_buff_xsk, xdp);
|
||||
struct xdp_buff_xsk *frag;
|
||||
|
||||
frag = list_first_entry(&xskb->pool->xskb_list, struct xdp_buff_xsk,
|
||||
list_node);
|
||||
return &frag->xdp;
|
||||
}
|
||||
|
||||
static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first)
|
||||
{
|
||||
struct xdp_buff_xsk *xskb = container_of(first, struct xdp_buff_xsk, xdp);
|
||||
|
|
@ -389,10 +399,15 @@ static inline struct xdp_buff *xsk_buff_get_frag(const struct xdp_buff *first)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static inline void xsk_buff_del_tail(struct xdp_buff *tail)
|
||||
static inline void xsk_buff_del_frag(struct xdp_buff *xdp)
|
||||
{
|
||||
}
|
||||
|
||||
static inline struct xdp_buff *xsk_buff_get_head(struct xdp_buff *first)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first)
|
||||
{
|
||||
return NULL;
|
||||
|
|
|
|||
|
|
@ -1522,6 +1522,12 @@ union bpf_attr {
|
|||
* If provided, map_flags should have BPF_F_TOKEN_FD flag set.
|
||||
*/
|
||||
__s32 map_token_fd;
|
||||
|
||||
/* Hash of the program that has exclusive access to the map.
|
||||
*/
|
||||
__aligned_u64 excl_prog_hash;
|
||||
/* Size of the passed excl_prog_hash. */
|
||||
__u32 excl_prog_hash_size;
|
||||
};
|
||||
|
||||
struct { /* anonymous struct used by BPF_MAP_*_ELEM and BPF_MAP_FREEZE commands */
|
||||
|
|
@ -1605,6 +1611,16 @@ union bpf_attr {
|
|||
* continuous.
|
||||
*/
|
||||
__u32 fd_array_cnt;
|
||||
/* Pointer to a buffer containing the signature of the BPF
|
||||
* program.
|
||||
*/
|
||||
__aligned_u64 signature;
|
||||
/* Size of the signature buffer in bytes. */
|
||||
__u32 signature_size;
|
||||
/* ID of the kernel keyring to be used for signature
|
||||
* verification.
|
||||
*/
|
||||
__s32 keyring_id;
|
||||
};
|
||||
|
||||
struct { /* anonymous struct used by BPF_OBJ_* commands */
|
||||
|
|
@ -6666,6 +6682,8 @@ struct bpf_map_info {
|
|||
__u32 btf_value_type_id;
|
||||
__u32 btf_vmlinux_id;
|
||||
__u64 map_extra;
|
||||
__aligned_u64 hash;
|
||||
__u32 hash_size;
|
||||
} __attribute__((aligned(8)));
|
||||
|
||||
struct bpf_btf_info {
|
||||
|
|
@ -7418,6 +7436,10 @@ struct bpf_timer {
|
|||
__u64 __opaque[2];
|
||||
} __attribute__((aligned(8)));
|
||||
|
||||
struct bpf_task_work {
|
||||
__u64 __opaque;
|
||||
} __attribute__((aligned(8)));
|
||||
|
||||
struct bpf_wq {
|
||||
__u64 __opaque[2];
|
||||
} __attribute__((aligned(8)));
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
# BPF interpreter that, for example, classic socket filters depend on.
|
||||
config BPF
|
||||
bool
|
||||
select CRYPTO_LIB_SHA1
|
||||
select CRYPTO_LIB_SHA256
|
||||
|
||||
# Used by archs to tell that they support BPF JIT compiler plus which
|
||||
# flavour. Only one of the two can be selected for a specific arch since
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse
|
|||
endif
|
||||
CFLAGS_core.o += -Wno-override-init $(cflags-nogcse-yy)
|
||||
|
||||
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o token.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o token.o liveness.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += bpf_iter.o map_iter.o task_iter.o prog_iter.o link_iter.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
|
||||
|
|
|
|||
|
|
@ -633,3 +633,33 @@ static int __init kfunc_init(void)
|
|||
return register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &common_kfunc_set);
|
||||
}
|
||||
late_initcall(kfunc_init);
|
||||
|
||||
void bpf_prog_report_arena_violation(bool write, unsigned long addr, unsigned long fault_ip)
|
||||
{
|
||||
struct bpf_stream_stage ss;
|
||||
struct bpf_prog *prog;
|
||||
u64 user_vm_start;
|
||||
|
||||
/*
|
||||
* The RCU read lock is held to safely traverse the latch tree, but we
|
||||
* don't need its protection when accessing the prog, since it will not
|
||||
* disappear while we are handling the fault.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
prog = bpf_prog_ksym_find(fault_ip);
|
||||
rcu_read_unlock();
|
||||
if (!prog)
|
||||
return;
|
||||
|
||||
/* Use main prog for stream access */
|
||||
prog = prog->aux->main_prog_aux->prog;
|
||||
|
||||
user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena);
|
||||
addr += clear_lo32(user_vm_start);
|
||||
|
||||
bpf_stream_stage(ss, prog, BPF_STDERR, ({
|
||||
bpf_stream_printk(ss, "ERROR: Arena %s access at unmapped address 0x%lx\n",
|
||||
write ? "WRITE" : "READ", addr);
|
||||
bpf_stream_dump_stack(ss);
|
||||
}));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@
|
|||
#include <uapi/linux/btf.h>
|
||||
#include <linux/rcupdate_trace.h>
|
||||
#include <linux/btf_ids.h>
|
||||
#include <crypto/sha2.h>
|
||||
|
||||
#include "map_in_map.h"
|
||||
|
||||
|
|
@ -174,6 +175,17 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key)
|
|||
return array->value + (u64)array->elem_size * (index & array->index_mask);
|
||||
}
|
||||
|
||||
static int array_map_get_hash(struct bpf_map *map, u32 hash_buf_size,
|
||||
void *hash_buf)
|
||||
{
|
||||
struct bpf_array *array = container_of(map, struct bpf_array, map);
|
||||
|
||||
sha256(array->value, (u64)array->elem_size * array->map.max_entries,
|
||||
hash_buf);
|
||||
memcpy(array->map.sha, hash_buf, sizeof(array->map.sha));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int array_map_direct_value_addr(const struct bpf_map *map, u64 *imm,
|
||||
u32 off)
|
||||
{
|
||||
|
|
@ -431,7 +443,7 @@ static void *array_map_vmalloc_addr(struct bpf_array *array)
|
|||
return (void *)round_down((unsigned long)array, PAGE_SIZE);
|
||||
}
|
||||
|
||||
static void array_map_free_timers_wq(struct bpf_map *map)
|
||||
static void array_map_free_internal_structs(struct bpf_map *map)
|
||||
{
|
||||
struct bpf_array *array = container_of(map, struct bpf_array, map);
|
||||
int i;
|
||||
|
|
@ -439,12 +451,14 @@ static void array_map_free_timers_wq(struct bpf_map *map)
|
|||
/* We don't reset or free fields other than timer and workqueue
|
||||
* on uref dropping to zero.
|
||||
*/
|
||||
if (btf_record_has_field(map->record, BPF_TIMER | BPF_WORKQUEUE)) {
|
||||
if (btf_record_has_field(map->record, BPF_TIMER | BPF_WORKQUEUE | BPF_TASK_WORK)) {
|
||||
for (i = 0; i < array->map.max_entries; i++) {
|
||||
if (btf_record_has_field(map->record, BPF_TIMER))
|
||||
bpf_obj_free_timer(map->record, array_map_elem_ptr(array, i));
|
||||
if (btf_record_has_field(map->record, BPF_WORKQUEUE))
|
||||
bpf_obj_free_workqueue(map->record, array_map_elem_ptr(array, i));
|
||||
if (btf_record_has_field(map->record, BPF_TASK_WORK))
|
||||
bpf_obj_free_task_work(map->record, array_map_elem_ptr(array, i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -783,7 +797,7 @@ const struct bpf_map_ops array_map_ops = {
|
|||
.map_alloc = array_map_alloc,
|
||||
.map_free = array_map_free,
|
||||
.map_get_next_key = array_map_get_next_key,
|
||||
.map_release_uref = array_map_free_timers_wq,
|
||||
.map_release_uref = array_map_free_internal_structs,
|
||||
.map_lookup_elem = array_map_lookup_elem,
|
||||
.map_update_elem = array_map_update_elem,
|
||||
.map_delete_elem = array_map_delete_elem,
|
||||
|
|
@ -800,6 +814,7 @@ const struct bpf_map_ops array_map_ops = {
|
|||
.map_mem_usage = array_map_mem_usage,
|
||||
.map_btf_id = &array_map_btf_ids[0],
|
||||
.iter_seq_info = &iter_seq_info,
|
||||
.map_get_hash = &array_map_get_hash,
|
||||
};
|
||||
|
||||
const struct bpf_map_ops percpu_array_map_ops = {
|
||||
|
|
|
|||
|
|
@ -45,8 +45,7 @@ void bpf_cgrp_storage_free(struct cgroup *cgroup)
|
|||
{
|
||||
struct bpf_local_storage *local_storage;
|
||||
|
||||
migrate_disable();
|
||||
rcu_read_lock();
|
||||
rcu_read_lock_dont_migrate();
|
||||
local_storage = rcu_dereference(cgroup->bpf_cgrp_storage);
|
||||
if (!local_storage)
|
||||
goto out;
|
||||
|
|
@ -55,8 +54,7 @@ void bpf_cgrp_storage_free(struct cgroup *cgroup)
|
|||
bpf_local_storage_destroy(local_storage);
|
||||
bpf_cgrp_storage_unlock();
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
migrate_enable();
|
||||
rcu_read_unlock_migrate();
|
||||
}
|
||||
|
||||
static struct bpf_local_storage_data *
|
||||
|
|
|
|||
|
|
@ -62,8 +62,7 @@ void bpf_inode_storage_free(struct inode *inode)
|
|||
if (!bsb)
|
||||
return;
|
||||
|
||||
migrate_disable();
|
||||
rcu_read_lock();
|
||||
rcu_read_lock_dont_migrate();
|
||||
|
||||
local_storage = rcu_dereference(bsb->storage);
|
||||
if (!local_storage)
|
||||
|
|
@ -71,8 +70,7 @@ void bpf_inode_storage_free(struct inode *inode)
|
|||
|
||||
bpf_local_storage_destroy(local_storage);
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
migrate_enable();
|
||||
rcu_read_unlock_migrate();
|
||||
}
|
||||
|
||||
static void *bpf_fd_inode_storage_lookup_elem(struct bpf_map *map, void *key)
|
||||
|
|
|
|||
|
|
@ -705,13 +705,11 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
|
|||
migrate_enable();
|
||||
rcu_read_unlock_trace();
|
||||
} else {
|
||||
rcu_read_lock();
|
||||
migrate_disable();
|
||||
rcu_read_lock_dont_migrate();
|
||||
old_run_ctx = bpf_set_run_ctx(&run_ctx);
|
||||
ret = bpf_prog_run(prog, ctx);
|
||||
bpf_reset_run_ctx(old_run_ctx);
|
||||
migrate_enable();
|
||||
rcu_read_unlock();
|
||||
rcu_read_unlock_migrate();
|
||||
}
|
||||
|
||||
/* bpf program can only return 0 or 1:
|
||||
|
|
|
|||
|
|
@ -19,14 +19,6 @@
|
|||
#define LOCAL_PENDING_LIST_IDX LOCAL_LIST_IDX(BPF_LRU_LOCAL_LIST_T_PENDING)
|
||||
#define IS_LOCAL_LIST_TYPE(t) ((t) >= BPF_LOCAL_LIST_T_OFFSET)
|
||||
|
||||
static int get_next_cpu(int cpu)
|
||||
{
|
||||
cpu = cpumask_next(cpu, cpu_possible_mask);
|
||||
if (cpu >= nr_cpu_ids)
|
||||
cpu = cpumask_first(cpu_possible_mask);
|
||||
return cpu;
|
||||
}
|
||||
|
||||
/* Local list helpers */
|
||||
static struct list_head *local_free_list(struct bpf_lru_locallist *loc_l)
|
||||
{
|
||||
|
|
@ -482,7 +474,7 @@ static struct bpf_lru_node *bpf_common_lru_pop_free(struct bpf_lru *lru,
|
|||
|
||||
raw_spin_unlock_irqrestore(&steal_loc_l->lock, flags);
|
||||
|
||||
steal = get_next_cpu(steal);
|
||||
steal = cpumask_next_wrap(steal, cpu_possible_mask);
|
||||
} while (!node && steal != first_steal);
|
||||
|
||||
loc_l->next_steal = steal;
|
||||
|
|
|
|||
|
|
@ -1174,6 +1174,18 @@ void bpf_struct_ops_put(const void *kdata)
|
|||
bpf_map_put(&st_map->map);
|
||||
}
|
||||
|
||||
u32 bpf_struct_ops_id(const void *kdata)
|
||||
{
|
||||
struct bpf_struct_ops_value *kvalue;
|
||||
struct bpf_struct_ops_map *st_map;
|
||||
|
||||
kvalue = container_of(kdata, struct bpf_struct_ops_value, data);
|
||||
st_map = container_of(kvalue, struct bpf_struct_ops_map, kvalue);
|
||||
|
||||
return st_map->map.id;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bpf_struct_ops_id);
|
||||
|
||||
static bool bpf_struct_ops_valid_to_reg(struct bpf_map *map)
|
||||
{
|
||||
struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
|
||||
|
|
|
|||
|
|
@ -70,8 +70,7 @@ void bpf_task_storage_free(struct task_struct *task)
|
|||
{
|
||||
struct bpf_local_storage *local_storage;
|
||||
|
||||
migrate_disable();
|
||||
rcu_read_lock();
|
||||
rcu_read_lock_dont_migrate();
|
||||
|
||||
local_storage = rcu_dereference(task->bpf_storage);
|
||||
if (!local_storage)
|
||||
|
|
@ -81,8 +80,7 @@ void bpf_task_storage_free(struct task_struct *task)
|
|||
bpf_local_storage_destroy(local_storage);
|
||||
bpf_task_storage_unlock();
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
migrate_enable();
|
||||
rcu_read_unlock_migrate();
|
||||
}
|
||||
|
||||
static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key)
|
||||
|
|
|
|||
|
|
@ -3478,60 +3478,45 @@ btf_find_graph_root(const struct btf *btf, const struct btf_type *pt,
|
|||
return BTF_FIELD_FOUND;
|
||||
}
|
||||
|
||||
#define field_mask_test_name(field_type, field_type_str) \
|
||||
if (field_mask & field_type && !strcmp(name, field_type_str)) { \
|
||||
type = field_type; \
|
||||
goto end; \
|
||||
}
|
||||
|
||||
static int btf_get_field_type(const struct btf *btf, const struct btf_type *var_type,
|
||||
u32 field_mask, u32 *seen_mask,
|
||||
int *align, int *sz)
|
||||
u32 field_mask, u32 *seen_mask, int *align, int *sz)
|
||||
{
|
||||
int type = 0;
|
||||
const struct {
|
||||
enum btf_field_type type;
|
||||
const char *const name;
|
||||
const bool is_unique;
|
||||
} field_types[] = {
|
||||
{ BPF_SPIN_LOCK, "bpf_spin_lock", true },
|
||||
{ BPF_RES_SPIN_LOCK, "bpf_res_spin_lock", true },
|
||||
{ BPF_TIMER, "bpf_timer", true },
|
||||
{ BPF_WORKQUEUE, "bpf_wq", true },
|
||||
{ BPF_TASK_WORK, "bpf_task_work", true },
|
||||
{ BPF_LIST_HEAD, "bpf_list_head", false },
|
||||
{ BPF_LIST_NODE, "bpf_list_node", false },
|
||||
{ BPF_RB_ROOT, "bpf_rb_root", false },
|
||||
{ BPF_RB_NODE, "bpf_rb_node", false },
|
||||
{ BPF_REFCOUNT, "bpf_refcount", false },
|
||||
};
|
||||
int type = 0, i;
|
||||
const char *name = __btf_name_by_offset(btf, var_type->name_off);
|
||||
const char *field_type_name;
|
||||
enum btf_field_type field_type;
|
||||
bool is_unique;
|
||||
|
||||
if (field_mask & BPF_SPIN_LOCK) {
|
||||
if (!strcmp(name, "bpf_spin_lock")) {
|
||||
if (*seen_mask & BPF_SPIN_LOCK)
|
||||
for (i = 0; i < ARRAY_SIZE(field_types); ++i) {
|
||||
field_type = field_types[i].type;
|
||||
field_type_name = field_types[i].name;
|
||||
is_unique = field_types[i].is_unique;
|
||||
if (!(field_mask & field_type) || strcmp(name, field_type_name))
|
||||
continue;
|
||||
if (is_unique) {
|
||||
if (*seen_mask & field_type)
|
||||
return -E2BIG;
|
||||
*seen_mask |= BPF_SPIN_LOCK;
|
||||
type = BPF_SPIN_LOCK;
|
||||
goto end;
|
||||
*seen_mask |= field_type;
|
||||
}
|
||||
type = field_type;
|
||||
goto end;
|
||||
}
|
||||
if (field_mask & BPF_RES_SPIN_LOCK) {
|
||||
if (!strcmp(name, "bpf_res_spin_lock")) {
|
||||
if (*seen_mask & BPF_RES_SPIN_LOCK)
|
||||
return -E2BIG;
|
||||
*seen_mask |= BPF_RES_SPIN_LOCK;
|
||||
type = BPF_RES_SPIN_LOCK;
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
if (field_mask & BPF_TIMER) {
|
||||
if (!strcmp(name, "bpf_timer")) {
|
||||
if (*seen_mask & BPF_TIMER)
|
||||
return -E2BIG;
|
||||
*seen_mask |= BPF_TIMER;
|
||||
type = BPF_TIMER;
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
if (field_mask & BPF_WORKQUEUE) {
|
||||
if (!strcmp(name, "bpf_wq")) {
|
||||
if (*seen_mask & BPF_WORKQUEUE)
|
||||
return -E2BIG;
|
||||
*seen_mask |= BPF_WORKQUEUE;
|
||||
type = BPF_WORKQUEUE;
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
field_mask_test_name(BPF_LIST_HEAD, "bpf_list_head");
|
||||
field_mask_test_name(BPF_LIST_NODE, "bpf_list_node");
|
||||
field_mask_test_name(BPF_RB_ROOT, "bpf_rb_root");
|
||||
field_mask_test_name(BPF_RB_NODE, "bpf_rb_node");
|
||||
field_mask_test_name(BPF_REFCOUNT, "bpf_refcount");
|
||||
|
||||
/* Only return BPF_KPTR when all other types with matchable names fail */
|
||||
if (field_mask & (BPF_KPTR | BPF_UPTR) && !__btf_type_is_struct(var_type)) {
|
||||
|
|
@ -3545,8 +3530,6 @@ static int btf_get_field_type(const struct btf *btf, const struct btf_type *var_
|
|||
return type;
|
||||
}
|
||||
|
||||
#undef field_mask_test_name
|
||||
|
||||
/* Repeat a number of fields for a specified number of times.
|
||||
*
|
||||
* Copy the fields starting from the first field and repeat them for
|
||||
|
|
@ -3693,6 +3676,7 @@ static int btf_find_field_one(const struct btf *btf,
|
|||
case BPF_LIST_NODE:
|
||||
case BPF_RB_NODE:
|
||||
case BPF_REFCOUNT:
|
||||
case BPF_TASK_WORK:
|
||||
ret = btf_find_struct(btf, var_type, off, sz, field_type,
|
||||
info_cnt ? &info[0] : &tmp);
|
||||
if (ret < 0)
|
||||
|
|
@ -3985,6 +3969,7 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
|
|||
rec->timer_off = -EINVAL;
|
||||
rec->wq_off = -EINVAL;
|
||||
rec->refcount_off = -EINVAL;
|
||||
rec->task_work_off = -EINVAL;
|
||||
for (i = 0; i < cnt; i++) {
|
||||
field_type_size = btf_field_type_size(info_arr[i].type);
|
||||
if (info_arr[i].off + field_type_size > value_size) {
|
||||
|
|
@ -4024,6 +4009,10 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
|
|||
/* Cache offset for faster lookup at runtime */
|
||||
rec->wq_off = rec->fields[i].offset;
|
||||
break;
|
||||
case BPF_TASK_WORK:
|
||||
WARN_ON_ONCE(rec->task_work_off >= 0);
|
||||
rec->task_work_off = rec->fields[i].offset;
|
||||
break;
|
||||
case BPF_REFCOUNT:
|
||||
WARN_ON_ONCE(rec->refcount_off >= 0);
|
||||
/* Cache offset for faster lookup at runtime */
|
||||
|
|
@ -6762,7 +6751,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
|
|||
/* skip modifiers */
|
||||
while (btf_type_is_modifier(t))
|
||||
t = btf_type_by_id(btf, t->type);
|
||||
if (btf_type_is_small_int(t) || btf_is_any_enum(t) || __btf_type_is_struct(t))
|
||||
if (btf_type_is_small_int(t) || btf_is_any_enum(t) || btf_type_is_struct(t))
|
||||
/* accessing a scalar */
|
||||
return true;
|
||||
if (!btf_type_is_ptr(t)) {
|
||||
|
|
@ -7334,7 +7323,7 @@ static int __get_type_size(struct btf *btf, u32 btf_id,
|
|||
if (btf_type_is_ptr(t))
|
||||
/* kernel size of pointer. Not BPF's size of pointer*/
|
||||
return sizeof(void *);
|
||||
if (btf_type_is_int(t) || btf_is_any_enum(t) || __btf_type_is_struct(t))
|
||||
if (btf_type_is_int(t) || btf_is_any_enum(t) || btf_type_is_struct(t))
|
||||
return t->size;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
|
@ -7343,7 +7332,7 @@ static u8 __get_type_fmodel_flags(const struct btf_type *t)
|
|||
{
|
||||
u8 flags = 0;
|
||||
|
||||
if (__btf_type_is_struct(t))
|
||||
if (btf_type_is_struct(t))
|
||||
flags |= BTF_FMODEL_STRUCT_ARG;
|
||||
if (btf_type_is_signed_int(t))
|
||||
flags |= BTF_FMODEL_SIGNED_ARG;
|
||||
|
|
@ -7384,7 +7373,7 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
|
|||
return -EINVAL;
|
||||
}
|
||||
ret = __get_type_size(btf, func->type, &t);
|
||||
if (ret < 0 || __btf_type_is_struct(t)) {
|
||||
if (ret < 0 || btf_type_is_struct(t)) {
|
||||
bpf_log(log,
|
||||
"The function %s return type %s is unsupported.\n",
|
||||
tname, btf_type_str(t));
|
||||
|
|
|
|||
|
|
@ -27,14 +27,15 @@ EXPORT_SYMBOL(cgroup_bpf_enabled_key);
|
|||
/*
|
||||
* cgroup bpf destruction makes heavy use of work items and there can be a lot
|
||||
* of concurrent destructions. Use a separate workqueue so that cgroup bpf
|
||||
* destruction work items don't end up filling up max_active of system_wq
|
||||
* destruction work items don't end up filling up max_active of system_percpu_wq
|
||||
* which may lead to deadlock.
|
||||
*/
|
||||
static struct workqueue_struct *cgroup_bpf_destroy_wq;
|
||||
|
||||
static int __init cgroup_bpf_wq_init(void)
|
||||
{
|
||||
cgroup_bpf_destroy_wq = alloc_workqueue("cgroup_bpf_destroy", 0, 1);
|
||||
cgroup_bpf_destroy_wq = alloc_workqueue("cgroup_bpf_destroy",
|
||||
WQ_PERCPU, 1);
|
||||
if (!cgroup_bpf_destroy_wq)
|
||||
panic("Failed to alloc workqueue for cgroup bpf destroy.\n");
|
||||
return 0;
|
||||
|
|
@ -71,8 +72,7 @@ bpf_prog_run_array_cg(const struct cgroup_bpf *cgrp,
|
|||
u32 func_ret;
|
||||
|
||||
run_ctx.retval = retval;
|
||||
migrate_disable();
|
||||
rcu_read_lock();
|
||||
rcu_read_lock_dont_migrate();
|
||||
array = rcu_dereference(cgrp->effective[atype]);
|
||||
item = &array->items[0];
|
||||
old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
|
||||
|
|
@ -88,8 +88,7 @@ bpf_prog_run_array_cg(const struct cgroup_bpf *cgrp,
|
|||
item++;
|
||||
}
|
||||
bpf_reset_run_ctx(old_run_ctx);
|
||||
rcu_read_unlock();
|
||||
migrate_enable();
|
||||
rcu_read_unlock_migrate();
|
||||
return run_ctx.retval;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@
|
|||
*/
|
||||
|
||||
#include <uapi/linux/btf.h>
|
||||
#include <crypto/sha1.h>
|
||||
#include <linux/filter.h>
|
||||
#include <linux/skbuff.h>
|
||||
#include <linux/vmalloc.h>
|
||||
|
|
@ -38,6 +39,7 @@
|
|||
#include <linux/bpf_mem_alloc.h>
|
||||
#include <linux/memcontrol.h>
|
||||
#include <linux/execmem.h>
|
||||
#include <crypto/sha2.h>
|
||||
|
||||
#include <asm/barrier.h>
|
||||
#include <linux/unaligned.h>
|
||||
|
|
@ -119,6 +121,7 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
|
|||
|
||||
fp->pages = size / PAGE_SIZE;
|
||||
fp->aux = aux;
|
||||
fp->aux->main_prog_aux = aux;
|
||||
fp->aux->prog = fp;
|
||||
fp->jit_requested = ebpf_jit_enabled();
|
||||
fp->blinding_requested = bpf_jit_blinding_enabled(fp);
|
||||
|
|
@ -293,28 +296,18 @@ void __bpf_prog_free(struct bpf_prog *fp)
|
|||
|
||||
int bpf_prog_calc_tag(struct bpf_prog *fp)
|
||||
{
|
||||
const u32 bits_offset = SHA1_BLOCK_SIZE - sizeof(__be64);
|
||||
u32 raw_size = bpf_prog_tag_scratch_size(fp);
|
||||
u32 digest[SHA1_DIGEST_WORDS];
|
||||
u32 ws[SHA1_WORKSPACE_WORDS];
|
||||
u32 i, bsize, psize, blocks;
|
||||
size_t size = bpf_prog_insn_size(fp);
|
||||
struct bpf_insn *dst;
|
||||
bool was_ld_map;
|
||||
u8 *raw, *todo;
|
||||
__be32 *result;
|
||||
__be64 *bits;
|
||||
u32 i;
|
||||
|
||||
raw = vmalloc(raw_size);
|
||||
if (!raw)
|
||||
dst = vmalloc(size);
|
||||
if (!dst)
|
||||
return -ENOMEM;
|
||||
|
||||
sha1_init_raw(digest);
|
||||
memset(ws, 0, sizeof(ws));
|
||||
|
||||
/* We need to take out the map fd for the digest calculation
|
||||
* since they are unstable from user space side.
|
||||
*/
|
||||
dst = (void *)raw;
|
||||
for (i = 0, was_ld_map = false; i < fp->len; i++) {
|
||||
dst[i] = fp->insnsi[i];
|
||||
if (!was_ld_map &&
|
||||
|
|
@ -334,33 +327,8 @@ int bpf_prog_calc_tag(struct bpf_prog *fp)
|
|||
was_ld_map = false;
|
||||
}
|
||||
}
|
||||
|
||||
psize = bpf_prog_insn_size(fp);
|
||||
memset(&raw[psize], 0, raw_size - psize);
|
||||
raw[psize++] = 0x80;
|
||||
|
||||
bsize = round_up(psize, SHA1_BLOCK_SIZE);
|
||||
blocks = bsize / SHA1_BLOCK_SIZE;
|
||||
todo = raw;
|
||||
if (bsize - psize >= sizeof(__be64)) {
|
||||
bits = (__be64 *)(todo + bsize - sizeof(__be64));
|
||||
} else {
|
||||
bits = (__be64 *)(todo + bsize + bits_offset);
|
||||
blocks++;
|
||||
}
|
||||
*bits = cpu_to_be64((psize - 1) << 3);
|
||||
|
||||
while (blocks--) {
|
||||
sha1_transform(digest, todo, ws);
|
||||
todo += SHA1_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
result = (__force __be32 *)digest;
|
||||
for (i = 0; i < SHA1_DIGEST_WORDS; i++)
|
||||
result[i] = cpu_to_be32(digest[i]);
|
||||
memcpy(fp->tag, result, sizeof(fp->tag));
|
||||
|
||||
vfree(raw);
|
||||
sha256((u8 *)dst, size, fp->digest);
|
||||
vfree(dst);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -2393,6 +2361,7 @@ static bool __bpf_prog_map_compatible(struct bpf_map *map,
|
|||
map->owner->type = prog_type;
|
||||
map->owner->jited = fp->jited;
|
||||
map->owner->xdp_has_frags = aux->xdp_has_frags;
|
||||
map->owner->expected_attach_type = fp->expected_attach_type;
|
||||
map->owner->attach_func_proto = aux->attach_func_proto;
|
||||
for_each_cgroup_storage_type(i) {
|
||||
map->owner->storage_cookie[i] =
|
||||
|
|
@ -2404,6 +2373,10 @@ static bool __bpf_prog_map_compatible(struct bpf_map *map,
|
|||
ret = map->owner->type == prog_type &&
|
||||
map->owner->jited == fp->jited &&
|
||||
map->owner->xdp_has_frags == aux->xdp_has_frags;
|
||||
if (ret &&
|
||||
map->map_type == BPF_MAP_TYPE_PROG_ARRAY &&
|
||||
map->owner->expected_attach_type != fp->expected_attach_type)
|
||||
ret = false;
|
||||
for_each_cgroup_storage_type(i) {
|
||||
if (!ret)
|
||||
break;
|
||||
|
|
@ -3329,9 +3302,8 @@ static bool find_from_stack_cb(void *cookie, u64 ip, u64 sp, u64 bp)
|
|||
rcu_read_unlock();
|
||||
if (!prog)
|
||||
return true;
|
||||
if (bpf_is_subprog(prog))
|
||||
return true;
|
||||
ctxp->prog = prog;
|
||||
/* Make sure we return the main prog if we found a subprog */
|
||||
ctxp->prog = prog->aux->main_prog_aux->prog;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -550,7 +550,7 @@ static void __cpu_map_entry_replace(struct bpf_cpu_map *cmap,
|
|||
old_rcpu = unrcu_pointer(xchg(&cmap->cpu_map[key_cpu], RCU_INITIALIZER(rcpu)));
|
||||
if (old_rcpu) {
|
||||
INIT_RCU_WORK(&old_rcpu->free_work, __cpu_map_entry_free);
|
||||
queue_rcu_work(system_wq, &old_rcpu->free_work);
|
||||
queue_rcu_work(system_percpu_wq, &old_rcpu->free_work);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -865,7 +865,7 @@ static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
|
|||
struct bpf_dtab_netdev *dev;
|
||||
|
||||
dev = bpf_map_kmalloc_node(&dtab->map, sizeof(*dev),
|
||||
GFP_NOWAIT | __GFP_NOWARN,
|
||||
GFP_NOWAIT,
|
||||
dtab->map.numa_node);
|
||||
if (!dev)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
|
|
|||
|
|
@ -215,7 +215,20 @@ static bool htab_has_extra_elems(struct bpf_htab *htab)
|
|||
return !htab_is_percpu(htab) && !htab_is_lru(htab) && !is_fd_htab(htab);
|
||||
}
|
||||
|
||||
static void htab_free_prealloced_timers_and_wq(struct bpf_htab *htab)
|
||||
static void htab_free_internal_structs(struct bpf_htab *htab, struct htab_elem *elem)
|
||||
{
|
||||
if (btf_record_has_field(htab->map.record, BPF_TIMER))
|
||||
bpf_obj_free_timer(htab->map.record,
|
||||
htab_elem_value(elem, htab->map.key_size));
|
||||
if (btf_record_has_field(htab->map.record, BPF_WORKQUEUE))
|
||||
bpf_obj_free_workqueue(htab->map.record,
|
||||
htab_elem_value(elem, htab->map.key_size));
|
||||
if (btf_record_has_field(htab->map.record, BPF_TASK_WORK))
|
||||
bpf_obj_free_task_work(htab->map.record,
|
||||
htab_elem_value(elem, htab->map.key_size));
|
||||
}
|
||||
|
||||
static void htab_free_prealloced_internal_structs(struct bpf_htab *htab)
|
||||
{
|
||||
u32 num_entries = htab->map.max_entries;
|
||||
int i;
|
||||
|
|
@ -227,12 +240,7 @@ static void htab_free_prealloced_timers_and_wq(struct bpf_htab *htab)
|
|||
struct htab_elem *elem;
|
||||
|
||||
elem = get_htab_elem(htab, i);
|
||||
if (btf_record_has_field(htab->map.record, BPF_TIMER))
|
||||
bpf_obj_free_timer(htab->map.record,
|
||||
htab_elem_value(elem, htab->map.key_size));
|
||||
if (btf_record_has_field(htab->map.record, BPF_WORKQUEUE))
|
||||
bpf_obj_free_workqueue(htab->map.record,
|
||||
htab_elem_value(elem, htab->map.key_size));
|
||||
htab_free_internal_structs(htab, elem);
|
||||
cond_resched();
|
||||
}
|
||||
}
|
||||
|
|
@ -1490,7 +1498,7 @@ static void delete_all_elements(struct bpf_htab *htab)
|
|||
}
|
||||
}
|
||||
|
||||
static void htab_free_malloced_timers_and_wq(struct bpf_htab *htab)
|
||||
static void htab_free_malloced_internal_structs(struct bpf_htab *htab)
|
||||
{
|
||||
int i;
|
||||
|
||||
|
|
@ -1502,28 +1510,23 @@ static void htab_free_malloced_timers_and_wq(struct bpf_htab *htab)
|
|||
|
||||
hlist_nulls_for_each_entry(l, n, head, hash_node) {
|
||||
/* We only free timer on uref dropping to zero */
|
||||
if (btf_record_has_field(htab->map.record, BPF_TIMER))
|
||||
bpf_obj_free_timer(htab->map.record,
|
||||
htab_elem_value(l, htab->map.key_size));
|
||||
if (btf_record_has_field(htab->map.record, BPF_WORKQUEUE))
|
||||
bpf_obj_free_workqueue(htab->map.record,
|
||||
htab_elem_value(l, htab->map.key_size));
|
||||
htab_free_internal_structs(htab, l);
|
||||
}
|
||||
cond_resched_rcu();
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void htab_map_free_timers_and_wq(struct bpf_map *map)
|
||||
static void htab_map_free_internal_structs(struct bpf_map *map)
|
||||
{
|
||||
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
|
||||
|
||||
/* We only free timer and workqueue on uref dropping to zero */
|
||||
if (btf_record_has_field(htab->map.record, BPF_TIMER | BPF_WORKQUEUE)) {
|
||||
if (btf_record_has_field(htab->map.record, BPF_TIMER | BPF_WORKQUEUE | BPF_TASK_WORK)) {
|
||||
if (!htab_is_prealloc(htab))
|
||||
htab_free_malloced_timers_and_wq(htab);
|
||||
htab_free_malloced_internal_structs(htab);
|
||||
else
|
||||
htab_free_prealloced_timers_and_wq(htab);
|
||||
htab_free_prealloced_internal_structs(htab);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -2255,7 +2258,7 @@ const struct bpf_map_ops htab_map_ops = {
|
|||
.map_alloc = htab_map_alloc,
|
||||
.map_free = htab_map_free,
|
||||
.map_get_next_key = htab_map_get_next_key,
|
||||
.map_release_uref = htab_map_free_timers_and_wq,
|
||||
.map_release_uref = htab_map_free_internal_structs,
|
||||
.map_lookup_elem = htab_map_lookup_elem,
|
||||
.map_lookup_and_delete_elem = htab_map_lookup_and_delete_elem,
|
||||
.map_update_elem = htab_map_update_elem,
|
||||
|
|
@ -2276,7 +2279,7 @@ const struct bpf_map_ops htab_lru_map_ops = {
|
|||
.map_alloc = htab_map_alloc,
|
||||
.map_free = htab_map_free,
|
||||
.map_get_next_key = htab_map_get_next_key,
|
||||
.map_release_uref = htab_map_free_timers_and_wq,
|
||||
.map_release_uref = htab_map_free_internal_structs,
|
||||
.map_lookup_elem = htab_lru_map_lookup_elem,
|
||||
.map_lookup_and_delete_elem = htab_lru_map_lookup_and_delete_elem,
|
||||
.map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys,
|
||||
|
|
|
|||
|
|
@ -25,6 +25,9 @@
|
|||
#include <linux/kasan.h>
|
||||
#include <linux/bpf_verifier.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/verification.h>
|
||||
#include <linux/task_work.h>
|
||||
#include <linux/irq_work.h>
|
||||
|
||||
#include "../../lib/kstrtox.h"
|
||||
|
||||
|
|
@ -774,11 +777,9 @@ int bpf_try_get_buffers(struct bpf_bprintf_buffers **bufs)
|
|||
{
|
||||
int nest_level;
|
||||
|
||||
preempt_disable();
|
||||
nest_level = this_cpu_inc_return(bpf_bprintf_nest_level);
|
||||
if (WARN_ON_ONCE(nest_level > MAX_BPRINTF_NEST_LEVEL)) {
|
||||
this_cpu_dec(bpf_bprintf_nest_level);
|
||||
preempt_enable();
|
||||
return -EBUSY;
|
||||
}
|
||||
*bufs = this_cpu_ptr(&bpf_bprintf_bufs[nest_level - 1]);
|
||||
|
|
@ -791,7 +792,6 @@ void bpf_put_buffers(void)
|
|||
if (WARN_ON_ONCE(this_cpu_read(bpf_bprintf_nest_level) == 0))
|
||||
return;
|
||||
this_cpu_dec(bpf_bprintf_nest_level);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
void bpf_bprintf_cleanup(struct bpf_bprintf_data *data)
|
||||
|
|
@ -1084,6 +1084,17 @@ const struct bpf_func_proto bpf_snprintf_proto = {
|
|||
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
|
||||
};
|
||||
|
||||
static void *map_key_from_value(struct bpf_map *map, void *value, u32 *arr_idx)
|
||||
{
|
||||
if (map->map_type == BPF_MAP_TYPE_ARRAY) {
|
||||
struct bpf_array *array = container_of(map, struct bpf_array, map);
|
||||
|
||||
*arr_idx = ((char *)value - array->value) / array->elem_size;
|
||||
return arr_idx;
|
||||
}
|
||||
return (void *)value - round_up(map->key_size, 8);
|
||||
}
|
||||
|
||||
struct bpf_async_cb {
|
||||
struct bpf_map *map;
|
||||
struct bpf_prog *prog;
|
||||
|
|
@ -1166,15 +1177,8 @@ static enum hrtimer_restart bpf_timer_cb(struct hrtimer *hrtimer)
|
|||
* bpf_map_delete_elem() on the same timer.
|
||||
*/
|
||||
this_cpu_write(hrtimer_running, t);
|
||||
if (map->map_type == BPF_MAP_TYPE_ARRAY) {
|
||||
struct bpf_array *array = container_of(map, struct bpf_array, map);
|
||||
|
||||
/* compute the key */
|
||||
idx = ((char *)value - array->value) / array->elem_size;
|
||||
key = &idx;
|
||||
} else { /* hash or lru */
|
||||
key = value - round_up(map->key_size, 8);
|
||||
}
|
||||
key = map_key_from_value(map, value, &idx);
|
||||
|
||||
callback_fn((u64)(long)map, (u64)(long)key, (u64)(long)value, 0, 0);
|
||||
/* The verifier checked that return value is zero. */
|
||||
|
|
@ -1200,15 +1204,7 @@ static void bpf_wq_work(struct work_struct *work)
|
|||
if (!callback_fn)
|
||||
return;
|
||||
|
||||
if (map->map_type == BPF_MAP_TYPE_ARRAY) {
|
||||
struct bpf_array *array = container_of(map, struct bpf_array, map);
|
||||
|
||||
/* compute the key */
|
||||
idx = ((char *)value - array->value) / array->elem_size;
|
||||
key = &idx;
|
||||
} else { /* hash or lru */
|
||||
key = value - round_up(map->key_size, 8);
|
||||
}
|
||||
key = map_key_from_value(map, value, &idx);
|
||||
|
||||
rcu_read_lock_trace();
|
||||
migrate_disable();
|
||||
|
|
@ -1600,7 +1596,7 @@ void bpf_timer_cancel_and_free(void *val)
|
|||
* timer callback.
|
||||
*/
|
||||
if (this_cpu_read(hrtimer_running)) {
|
||||
queue_work(system_unbound_wq, &t->cb.delete_work);
|
||||
queue_work(system_dfl_wq, &t->cb.delete_work);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -1613,7 +1609,7 @@ void bpf_timer_cancel_and_free(void *val)
|
|||
if (hrtimer_try_to_cancel(&t->timer) >= 0)
|
||||
kfree_rcu(t, cb.rcu);
|
||||
else
|
||||
queue_work(system_unbound_wq, &t->cb.delete_work);
|
||||
queue_work(system_dfl_wq, &t->cb.delete_work);
|
||||
} else {
|
||||
bpf_timer_delete_work(&t->cb.delete_work);
|
||||
}
|
||||
|
|
@ -1783,6 +1779,9 @@ static int __bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr_kern *s
|
|||
return __bpf_skb_load_bytes(src->data, src->offset + offset, dst, len);
|
||||
case BPF_DYNPTR_TYPE_XDP:
|
||||
return __bpf_xdp_load_bytes(src->data, src->offset + offset, dst, len);
|
||||
case BPF_DYNPTR_TYPE_SKB_META:
|
||||
memmove(dst, bpf_skb_meta_pointer(src->data, src->offset + offset), len);
|
||||
return 0;
|
||||
default:
|
||||
WARN_ONCE(true, "bpf_dynptr_read: unknown dynptr type %d\n", type);
|
||||
return -EFAULT;
|
||||
|
|
@ -1839,6 +1838,11 @@ int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset, void *src,
|
|||
if (flags)
|
||||
return -EINVAL;
|
||||
return __bpf_xdp_store_bytes(dst->data, dst->offset + offset, src, len);
|
||||
case BPF_DYNPTR_TYPE_SKB_META:
|
||||
if (flags)
|
||||
return -EINVAL;
|
||||
memmove(bpf_skb_meta_pointer(dst->data, dst->offset + offset), src, len);
|
||||
return 0;
|
||||
default:
|
||||
WARN_ONCE(true, "bpf_dynptr_write: unknown dynptr type %d\n", type);
|
||||
return -EFAULT;
|
||||
|
|
@ -1885,6 +1889,7 @@ BPF_CALL_3(bpf_dynptr_data, const struct bpf_dynptr_kern *, ptr, u32, offset, u3
|
|||
return (unsigned long)(ptr->data + ptr->offset + offset);
|
||||
case BPF_DYNPTR_TYPE_SKB:
|
||||
case BPF_DYNPTR_TYPE_XDP:
|
||||
case BPF_DYNPTR_TYPE_SKB_META:
|
||||
/* skb and xdp dynptrs should use bpf_dynptr_slice / bpf_dynptr_slice_rdwr */
|
||||
return 0;
|
||||
default:
|
||||
|
|
@ -2540,7 +2545,7 @@ __bpf_kfunc struct cgroup *bpf_cgroup_from_id(u64 cgid)
|
|||
{
|
||||
struct cgroup *cgrp;
|
||||
|
||||
cgrp = cgroup_get_from_id(cgid);
|
||||
cgrp = __cgroup_get_from_id(cgid);
|
||||
if (IS_ERR(cgrp))
|
||||
return NULL;
|
||||
return cgrp;
|
||||
|
|
@ -2713,6 +2718,8 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr *p, u32 offset,
|
|||
bpf_xdp_copy_buf(ptr->data, ptr->offset + offset, buffer__opt, len, false);
|
||||
return buffer__opt;
|
||||
}
|
||||
case BPF_DYNPTR_TYPE_SKB_META:
|
||||
return bpf_skb_meta_pointer(ptr->data, ptr->offset + offset);
|
||||
default:
|
||||
WARN_ONCE(true, "unknown dynptr type %d\n", type);
|
||||
return NULL;
|
||||
|
|
@ -3344,6 +3351,36 @@ __bpf_kfunc void __bpf_trap(void)
|
|||
* __get_kernel_nofault instead of plain dereference to make them safe.
|
||||
*/
|
||||
|
||||
static int __bpf_strcasecmp(const char *s1, const char *s2, bool ignore_case)
|
||||
{
|
||||
char c1, c2;
|
||||
int i;
|
||||
|
||||
if (!copy_from_kernel_nofault_allowed(s1, 1) ||
|
||||
!copy_from_kernel_nofault_allowed(s2, 1)) {
|
||||
return -ERANGE;
|
||||
}
|
||||
|
||||
guard(pagefault)();
|
||||
for (i = 0; i < XATTR_SIZE_MAX; i++) {
|
||||
__get_kernel_nofault(&c1, s1, char, err_out);
|
||||
__get_kernel_nofault(&c2, s2, char, err_out);
|
||||
if (ignore_case) {
|
||||
c1 = tolower(c1);
|
||||
c2 = tolower(c2);
|
||||
}
|
||||
if (c1 != c2)
|
||||
return c1 < c2 ? -1 : 1;
|
||||
if (c1 == '\0')
|
||||
return 0;
|
||||
s1++;
|
||||
s2++;
|
||||
}
|
||||
return -E2BIG;
|
||||
err_out:
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_strcmp - Compare two strings
|
||||
* @s1__ign: One string
|
||||
|
|
@ -3359,28 +3396,25 @@ __bpf_kfunc void __bpf_trap(void)
|
|||
*/
|
||||
__bpf_kfunc int bpf_strcmp(const char *s1__ign, const char *s2__ign)
|
||||
{
|
||||
char c1, c2;
|
||||
int i;
|
||||
return __bpf_strcasecmp(s1__ign, s2__ign, false);
|
||||
}
|
||||
|
||||
if (!copy_from_kernel_nofault_allowed(s1__ign, 1) ||
|
||||
!copy_from_kernel_nofault_allowed(s2__ign, 1)) {
|
||||
return -ERANGE;
|
||||
}
|
||||
|
||||
guard(pagefault)();
|
||||
for (i = 0; i < XATTR_SIZE_MAX; i++) {
|
||||
__get_kernel_nofault(&c1, s1__ign, char, err_out);
|
||||
__get_kernel_nofault(&c2, s2__ign, char, err_out);
|
||||
if (c1 != c2)
|
||||
return c1 < c2 ? -1 : 1;
|
||||
if (c1 == '\0')
|
||||
return 0;
|
||||
s1__ign++;
|
||||
s2__ign++;
|
||||
}
|
||||
return -E2BIG;
|
||||
err_out:
|
||||
return -EFAULT;
|
||||
/**
|
||||
* bpf_strcasecmp - Compare two strings, ignoring the case of the characters
|
||||
* @s1__ign: One string
|
||||
* @s2__ign: Another string
|
||||
*
|
||||
* Return:
|
||||
* * %0 - Strings are equal
|
||||
* * %-1 - @s1__ign is smaller
|
||||
* * %1 - @s2__ign is smaller
|
||||
* * %-EFAULT - Cannot read one of the strings
|
||||
* * %-E2BIG - One of strings is too large
|
||||
* * %-ERANGE - One of strings is outside of kernel address space
|
||||
*/
|
||||
__bpf_kfunc int bpf_strcasecmp(const char *s1__ign, const char *s2__ign)
|
||||
{
|
||||
return __bpf_strcasecmp(s1__ign, s2__ign, true);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -3712,9 +3746,490 @@ __bpf_kfunc int bpf_strstr(const char *s1__ign, const char *s2__ign)
|
|||
{
|
||||
return bpf_strnstr(s1__ign, s2__ign, XATTR_SIZE_MAX);
|
||||
}
|
||||
#ifdef CONFIG_KEYS
|
||||
/**
|
||||
* bpf_lookup_user_key - lookup a key by its serial
|
||||
* @serial: key handle serial number
|
||||
* @flags: lookup-specific flags
|
||||
*
|
||||
* Search a key with a given *serial* and the provided *flags*.
|
||||
* If found, increment the reference count of the key by one, and
|
||||
* return it in the bpf_key structure.
|
||||
*
|
||||
* The bpf_key structure must be passed to bpf_key_put() when done
|
||||
* with it, so that the key reference count is decremented and the
|
||||
* bpf_key structure is freed.
|
||||
*
|
||||
* Permission checks are deferred to the time the key is used by
|
||||
* one of the available key-specific kfuncs.
|
||||
*
|
||||
* Set *flags* with KEY_LOOKUP_CREATE, to attempt creating a requested
|
||||
* special keyring (e.g. session keyring), if it doesn't yet exist.
|
||||
* Set *flags* with KEY_LOOKUP_PARTIAL, to lookup a key without waiting
|
||||
* for the key construction, and to retrieve uninstantiated keys (keys
|
||||
* without data attached to them).
|
||||
*
|
||||
* Return: a bpf_key pointer with a valid key pointer if the key is found, a
|
||||
* NULL pointer otherwise.
|
||||
*/
|
||||
__bpf_kfunc struct bpf_key *bpf_lookup_user_key(s32 serial, u64 flags)
|
||||
{
|
||||
key_ref_t key_ref;
|
||||
struct bpf_key *bkey;
|
||||
|
||||
if (flags & ~KEY_LOOKUP_ALL)
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* Permission check is deferred until the key is used, as the
|
||||
* intent of the caller is unknown here.
|
||||
*/
|
||||
key_ref = lookup_user_key(serial, flags, KEY_DEFER_PERM_CHECK);
|
||||
if (IS_ERR(key_ref))
|
||||
return NULL;
|
||||
|
||||
bkey = kmalloc(sizeof(*bkey), GFP_KERNEL);
|
||||
if (!bkey) {
|
||||
key_put(key_ref_to_ptr(key_ref));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bkey->key = key_ref_to_ptr(key_ref);
|
||||
bkey->has_ref = true;
|
||||
|
||||
return bkey;
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_lookup_system_key - lookup a key by a system-defined ID
|
||||
* @id: key ID
|
||||
*
|
||||
* Obtain a bpf_key structure with a key pointer set to the passed key ID.
|
||||
* The key pointer is marked as invalid, to prevent bpf_key_put() from
|
||||
* attempting to decrement the key reference count on that pointer. The key
|
||||
* pointer set in such way is currently understood only by
|
||||
* verify_pkcs7_signature().
|
||||
*
|
||||
* Set *id* to one of the values defined in include/linux/verification.h:
|
||||
* 0 for the primary keyring (immutable keyring of system keys);
|
||||
* VERIFY_USE_SECONDARY_KEYRING for both the primary and secondary keyring
|
||||
* (where keys can be added only if they are vouched for by existing keys
|
||||
* in those keyrings); VERIFY_USE_PLATFORM_KEYRING for the platform
|
||||
* keyring (primarily used by the integrity subsystem to verify a kexec'ed
|
||||
* kerned image and, possibly, the initramfs signature).
|
||||
*
|
||||
* Return: a bpf_key pointer with an invalid key pointer set from the
|
||||
* pre-determined ID on success, a NULL pointer otherwise
|
||||
*/
|
||||
__bpf_kfunc struct bpf_key *bpf_lookup_system_key(u64 id)
|
||||
{
|
||||
struct bpf_key *bkey;
|
||||
|
||||
if (system_keyring_id_check(id) < 0)
|
||||
return NULL;
|
||||
|
||||
bkey = kmalloc(sizeof(*bkey), GFP_ATOMIC);
|
||||
if (!bkey)
|
||||
return NULL;
|
||||
|
||||
bkey->key = (struct key *)(unsigned long)id;
|
||||
bkey->has_ref = false;
|
||||
|
||||
return bkey;
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_key_put - decrement key reference count if key is valid and free bpf_key
|
||||
* @bkey: bpf_key structure
|
||||
*
|
||||
* Decrement the reference count of the key inside *bkey*, if the pointer
|
||||
* is valid, and free *bkey*.
|
||||
*/
|
||||
__bpf_kfunc void bpf_key_put(struct bpf_key *bkey)
|
||||
{
|
||||
if (bkey->has_ref)
|
||||
key_put(bkey->key);
|
||||
|
||||
kfree(bkey);
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_verify_pkcs7_signature - verify a PKCS#7 signature
|
||||
* @data_p: data to verify
|
||||
* @sig_p: signature of the data
|
||||
* @trusted_keyring: keyring with keys trusted for signature verification
|
||||
*
|
||||
* Verify the PKCS#7 signature *sig_ptr* against the supplied *data_ptr*
|
||||
* with keys in a keyring referenced by *trusted_keyring*.
|
||||
*
|
||||
* Return: 0 on success, a negative value on error.
|
||||
*/
|
||||
__bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_p,
|
||||
struct bpf_dynptr *sig_p,
|
||||
struct bpf_key *trusted_keyring)
|
||||
{
|
||||
#ifdef CONFIG_SYSTEM_DATA_VERIFICATION
|
||||
struct bpf_dynptr_kern *data_ptr = (struct bpf_dynptr_kern *)data_p;
|
||||
struct bpf_dynptr_kern *sig_ptr = (struct bpf_dynptr_kern *)sig_p;
|
||||
const void *data, *sig;
|
||||
u32 data_len, sig_len;
|
||||
int ret;
|
||||
|
||||
if (trusted_keyring->has_ref) {
|
||||
/*
|
||||
* Do the permission check deferred in bpf_lookup_user_key().
|
||||
* See bpf_lookup_user_key() for more details.
|
||||
*
|
||||
* A call to key_task_permission() here would be redundant, as
|
||||
* it is already done by keyring_search() called by
|
||||
* find_asymmetric_key().
|
||||
*/
|
||||
ret = key_validate(trusted_keyring->key);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
|
||||
data_len = __bpf_dynptr_size(data_ptr);
|
||||
data = __bpf_dynptr_data(data_ptr, data_len);
|
||||
sig_len = __bpf_dynptr_size(sig_ptr);
|
||||
sig = __bpf_dynptr_data(sig_ptr, sig_len);
|
||||
|
||||
return verify_pkcs7_signature(data, data_len, sig, sig_len,
|
||||
trusted_keyring->key,
|
||||
VERIFYING_BPF_SIGNATURE, NULL,
|
||||
NULL);
|
||||
#else
|
||||
return -EOPNOTSUPP;
|
||||
#endif /* CONFIG_SYSTEM_DATA_VERIFICATION */
|
||||
}
|
||||
#endif /* CONFIG_KEYS */
|
||||
|
||||
typedef int (*bpf_task_work_callback_t)(struct bpf_map *map, void *key, void *value);
|
||||
|
||||
enum bpf_task_work_state {
|
||||
/* bpf_task_work is ready to be used */
|
||||
BPF_TW_STANDBY = 0,
|
||||
/* irq work scheduling in progress */
|
||||
BPF_TW_PENDING,
|
||||
/* task work scheduling in progress */
|
||||
BPF_TW_SCHEDULING,
|
||||
/* task work is scheduled successfully */
|
||||
BPF_TW_SCHEDULED,
|
||||
/* callback is running */
|
||||
BPF_TW_RUNNING,
|
||||
/* associated BPF map value is deleted */
|
||||
BPF_TW_FREED,
|
||||
};
|
||||
|
||||
struct bpf_task_work_ctx {
|
||||
enum bpf_task_work_state state;
|
||||
refcount_t refcnt;
|
||||
struct callback_head work;
|
||||
struct irq_work irq_work;
|
||||
/* bpf_prog that schedules task work */
|
||||
struct bpf_prog *prog;
|
||||
/* task for which callback is scheduled */
|
||||
struct task_struct *task;
|
||||
/* the map and map value associated with this context */
|
||||
struct bpf_map *map;
|
||||
void *map_val;
|
||||
enum task_work_notify_mode mode;
|
||||
bpf_task_work_callback_t callback_fn;
|
||||
struct rcu_head rcu;
|
||||
} __aligned(8);
|
||||
|
||||
/* Actual type for struct bpf_task_work */
|
||||
struct bpf_task_work_kern {
|
||||
struct bpf_task_work_ctx *ctx;
|
||||
};
|
||||
|
||||
static void bpf_task_work_ctx_reset(struct bpf_task_work_ctx *ctx)
|
||||
{
|
||||
if (ctx->prog) {
|
||||
bpf_prog_put(ctx->prog);
|
||||
ctx->prog = NULL;
|
||||
}
|
||||
if (ctx->task) {
|
||||
bpf_task_release(ctx->task);
|
||||
ctx->task = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static bool bpf_task_work_ctx_tryget(struct bpf_task_work_ctx *ctx)
|
||||
{
|
||||
return refcount_inc_not_zero(&ctx->refcnt);
|
||||
}
|
||||
|
||||
static void bpf_task_work_ctx_put(struct bpf_task_work_ctx *ctx)
|
||||
{
|
||||
if (!refcount_dec_and_test(&ctx->refcnt))
|
||||
return;
|
||||
|
||||
bpf_task_work_ctx_reset(ctx);
|
||||
|
||||
/* bpf_mem_free expects migration to be disabled */
|
||||
migrate_disable();
|
||||
bpf_mem_free(&bpf_global_ma, ctx);
|
||||
migrate_enable();
|
||||
}
|
||||
|
||||
static void bpf_task_work_cancel(struct bpf_task_work_ctx *ctx)
|
||||
{
|
||||
/*
|
||||
* Scheduled task_work callback holds ctx ref, so if we successfully
|
||||
* cancelled, we put that ref on callback's behalf. If we couldn't
|
||||
* cancel, callback will inevitably run or has already completed
|
||||
* running, and it would have taken care of its ctx ref itself.
|
||||
*/
|
||||
if (task_work_cancel(ctx->task, &ctx->work))
|
||||
bpf_task_work_ctx_put(ctx);
|
||||
}
|
||||
|
||||
static void bpf_task_work_callback(struct callback_head *cb)
|
||||
{
|
||||
struct bpf_task_work_ctx *ctx = container_of(cb, struct bpf_task_work_ctx, work);
|
||||
enum bpf_task_work_state state;
|
||||
u32 idx;
|
||||
void *key;
|
||||
|
||||
/* Read lock is needed to protect ctx and map key/value access */
|
||||
guard(rcu_tasks_trace)();
|
||||
/*
|
||||
* This callback may start running before bpf_task_work_irq() switched to
|
||||
* SCHEDULED state, so handle both transition variants SCHEDULING|SCHEDULED -> RUNNING.
|
||||
*/
|
||||
state = cmpxchg(&ctx->state, BPF_TW_SCHEDULING, BPF_TW_RUNNING);
|
||||
if (state == BPF_TW_SCHEDULED)
|
||||
state = cmpxchg(&ctx->state, BPF_TW_SCHEDULED, BPF_TW_RUNNING);
|
||||
if (state == BPF_TW_FREED) {
|
||||
bpf_task_work_ctx_put(ctx);
|
||||
return;
|
||||
}
|
||||
|
||||
key = (void *)map_key_from_value(ctx->map, ctx->map_val, &idx);
|
||||
|
||||
migrate_disable();
|
||||
ctx->callback_fn(ctx->map, key, ctx->map_val);
|
||||
migrate_enable();
|
||||
|
||||
bpf_task_work_ctx_reset(ctx);
|
||||
(void)cmpxchg(&ctx->state, BPF_TW_RUNNING, BPF_TW_STANDBY);
|
||||
|
||||
bpf_task_work_ctx_put(ctx);
|
||||
}
|
||||
|
||||
static void bpf_task_work_irq(struct irq_work *irq_work)
|
||||
{
|
||||
struct bpf_task_work_ctx *ctx = container_of(irq_work, struct bpf_task_work_ctx, irq_work);
|
||||
enum bpf_task_work_state state;
|
||||
int err;
|
||||
|
||||
guard(rcu_tasks_trace)();
|
||||
|
||||
if (cmpxchg(&ctx->state, BPF_TW_PENDING, BPF_TW_SCHEDULING) != BPF_TW_PENDING) {
|
||||
bpf_task_work_ctx_put(ctx);
|
||||
return;
|
||||
}
|
||||
|
||||
err = task_work_add(ctx->task, &ctx->work, ctx->mode);
|
||||
if (err) {
|
||||
bpf_task_work_ctx_reset(ctx);
|
||||
/*
|
||||
* try to switch back to STANDBY for another task_work reuse, but we might have
|
||||
* gone to FREED already, which is fine as we already cleaned up after ourselves
|
||||
*/
|
||||
(void)cmpxchg(&ctx->state, BPF_TW_SCHEDULING, BPF_TW_STANDBY);
|
||||
bpf_task_work_ctx_put(ctx);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* It's technically possible for just scheduled task_work callback to
|
||||
* complete running by now, going SCHEDULING -> RUNNING and then
|
||||
* dropping its ctx refcount. Instead of capturing extra ref just to
|
||||
* protected below ctx->state access, we rely on RCU protection to
|
||||
* perform below SCHEDULING -> SCHEDULED attempt.
|
||||
*/
|
||||
state = cmpxchg(&ctx->state, BPF_TW_SCHEDULING, BPF_TW_SCHEDULED);
|
||||
if (state == BPF_TW_FREED)
|
||||
bpf_task_work_cancel(ctx); /* clean up if we switched into FREED state */
|
||||
}
|
||||
|
||||
static struct bpf_task_work_ctx *bpf_task_work_fetch_ctx(struct bpf_task_work *tw,
|
||||
struct bpf_map *map)
|
||||
{
|
||||
struct bpf_task_work_kern *twk = (void *)tw;
|
||||
struct bpf_task_work_ctx *ctx, *old_ctx;
|
||||
|
||||
ctx = READ_ONCE(twk->ctx);
|
||||
if (ctx)
|
||||
return ctx;
|
||||
|
||||
ctx = bpf_mem_alloc(&bpf_global_ma, sizeof(struct bpf_task_work_ctx));
|
||||
if (!ctx)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
memset(ctx, 0, sizeof(*ctx));
|
||||
refcount_set(&ctx->refcnt, 1); /* map's own ref */
|
||||
ctx->state = BPF_TW_STANDBY;
|
||||
|
||||
old_ctx = cmpxchg(&twk->ctx, NULL, ctx);
|
||||
if (old_ctx) {
|
||||
/*
|
||||
* tw->ctx is set by concurrent BPF program, release allocated
|
||||
* memory and try to reuse already set context.
|
||||
*/
|
||||
bpf_mem_free(&bpf_global_ma, ctx);
|
||||
return old_ctx;
|
||||
}
|
||||
|
||||
return ctx; /* Success */
|
||||
}
|
||||
|
||||
static struct bpf_task_work_ctx *bpf_task_work_acquire_ctx(struct bpf_task_work *tw,
|
||||
struct bpf_map *map)
|
||||
{
|
||||
struct bpf_task_work_ctx *ctx;
|
||||
|
||||
ctx = bpf_task_work_fetch_ctx(tw, map);
|
||||
if (IS_ERR(ctx))
|
||||
return ctx;
|
||||
|
||||
/* try to get ref for task_work callback to hold */
|
||||
if (!bpf_task_work_ctx_tryget(ctx))
|
||||
return ERR_PTR(-EBUSY);
|
||||
|
||||
if (cmpxchg(&ctx->state, BPF_TW_STANDBY, BPF_TW_PENDING) != BPF_TW_STANDBY) {
|
||||
/* lost acquiring race or map_release_uref() stole it from us, put ref and bail */
|
||||
bpf_task_work_ctx_put(ctx);
|
||||
return ERR_PTR(-EBUSY);
|
||||
}
|
||||
|
||||
/*
|
||||
* If no process or bpffs is holding a reference to the map, no new callbacks should be
|
||||
* scheduled. This does not address any race or correctness issue, but rather is a policy
|
||||
* choice: dropping user references should stop everything.
|
||||
*/
|
||||
if (!atomic64_read(&map->usercnt)) {
|
||||
/* drop ref we just got for task_work callback itself */
|
||||
bpf_task_work_ctx_put(ctx);
|
||||
/* transfer map's ref into cancel_and_free() */
|
||||
bpf_task_work_cancel_and_free(tw);
|
||||
return ERR_PTR(-EBUSY);
|
||||
}
|
||||
|
||||
return ctx;
|
||||
}
|
||||
|
||||
static int bpf_task_work_schedule(struct task_struct *task, struct bpf_task_work *tw,
|
||||
struct bpf_map *map, bpf_task_work_callback_t callback_fn,
|
||||
struct bpf_prog_aux *aux, enum task_work_notify_mode mode)
|
||||
{
|
||||
struct bpf_prog *prog;
|
||||
struct bpf_task_work_ctx *ctx;
|
||||
int err;
|
||||
|
||||
BTF_TYPE_EMIT(struct bpf_task_work);
|
||||
|
||||
prog = bpf_prog_inc_not_zero(aux->prog);
|
||||
if (IS_ERR(prog))
|
||||
return -EBADF;
|
||||
task = bpf_task_acquire(task);
|
||||
if (!task) {
|
||||
err = -EBADF;
|
||||
goto release_prog;
|
||||
}
|
||||
|
||||
ctx = bpf_task_work_acquire_ctx(tw, map);
|
||||
if (IS_ERR(ctx)) {
|
||||
err = PTR_ERR(ctx);
|
||||
goto release_all;
|
||||
}
|
||||
|
||||
ctx->task = task;
|
||||
ctx->callback_fn = callback_fn;
|
||||
ctx->prog = prog;
|
||||
ctx->mode = mode;
|
||||
ctx->map = map;
|
||||
ctx->map_val = (void *)tw - map->record->task_work_off;
|
||||
init_task_work(&ctx->work, bpf_task_work_callback);
|
||||
init_irq_work(&ctx->irq_work, bpf_task_work_irq);
|
||||
|
||||
irq_work_queue(&ctx->irq_work);
|
||||
return 0;
|
||||
|
||||
release_all:
|
||||
bpf_task_release(task);
|
||||
release_prog:
|
||||
bpf_prog_put(prog);
|
||||
return err;
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_task_work_schedule_signal - Schedule BPF callback using task_work_add with TWA_SIGNAL mode
|
||||
* @task: Task struct for which callback should be scheduled
|
||||
* @tw: Pointer to struct bpf_task_work in BPF map value for internal bookkeeping
|
||||
* @map__map: bpf_map that embeds struct bpf_task_work in the values
|
||||
* @callback: pointer to BPF subprogram to call
|
||||
* @aux__prog: user should pass NULL
|
||||
*
|
||||
* Return: 0 if task work has been scheduled successfully, negative error code otherwise
|
||||
*/
|
||||
__bpf_kfunc int bpf_task_work_schedule_signal(struct task_struct *task, struct bpf_task_work *tw,
|
||||
void *map__map, bpf_task_work_callback_t callback,
|
||||
void *aux__prog)
|
||||
{
|
||||
return bpf_task_work_schedule(task, tw, map__map, callback, aux__prog, TWA_SIGNAL);
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_task_work_schedule_resume - Schedule BPF callback using task_work_add with TWA_RESUME mode
|
||||
* @task: Task struct for which callback should be scheduled
|
||||
* @tw: Pointer to struct bpf_task_work in BPF map value for internal bookkeeping
|
||||
* @map__map: bpf_map that embeds struct bpf_task_work in the values
|
||||
* @callback: pointer to BPF subprogram to call
|
||||
* @aux__prog: user should pass NULL
|
||||
*
|
||||
* Return: 0 if task work has been scheduled successfully, negative error code otherwise
|
||||
*/
|
||||
__bpf_kfunc int bpf_task_work_schedule_resume(struct task_struct *task, struct bpf_task_work *tw,
|
||||
void *map__map, bpf_task_work_callback_t callback,
|
||||
void *aux__prog)
|
||||
{
|
||||
return bpf_task_work_schedule(task, tw, map__map, callback, aux__prog, TWA_RESUME);
|
||||
}
|
||||
|
||||
__bpf_kfunc_end_defs();
|
||||
|
||||
static void bpf_task_work_cancel_scheduled(struct irq_work *irq_work)
|
||||
{
|
||||
struct bpf_task_work_ctx *ctx = container_of(irq_work, struct bpf_task_work_ctx, irq_work);
|
||||
|
||||
bpf_task_work_cancel(ctx); /* this might put task_work callback's ref */
|
||||
bpf_task_work_ctx_put(ctx); /* and here we put map's own ref that was transferred to us */
|
||||
}
|
||||
|
||||
void bpf_task_work_cancel_and_free(void *val)
|
||||
{
|
||||
struct bpf_task_work_kern *twk = val;
|
||||
struct bpf_task_work_ctx *ctx;
|
||||
enum bpf_task_work_state state;
|
||||
|
||||
ctx = xchg(&twk->ctx, NULL);
|
||||
if (!ctx)
|
||||
return;
|
||||
|
||||
state = xchg(&ctx->state, BPF_TW_FREED);
|
||||
if (state == BPF_TW_SCHEDULED) {
|
||||
/* run in irq_work to avoid locks in NMI */
|
||||
init_irq_work(&ctx->irq_work, bpf_task_work_cancel_scheduled);
|
||||
irq_work_queue(&ctx->irq_work);
|
||||
return;
|
||||
}
|
||||
|
||||
bpf_task_work_ctx_put(ctx); /* put bpf map's ref */
|
||||
}
|
||||
|
||||
BTF_KFUNCS_START(generic_btf_ids)
|
||||
#ifdef CONFIG_CRASH_DUMP
|
||||
BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE)
|
||||
|
|
@ -3753,6 +4268,14 @@ BTF_ID_FLAGS(func, bpf_throw)
|
|||
#ifdef CONFIG_BPF_EVENTS
|
||||
BTF_ID_FLAGS(func, bpf_send_signal_task, KF_TRUSTED_ARGS)
|
||||
#endif
|
||||
#ifdef CONFIG_KEYS
|
||||
BTF_ID_FLAGS(func, bpf_lookup_user_key, KF_ACQUIRE | KF_RET_NULL | KF_SLEEPABLE)
|
||||
BTF_ID_FLAGS(func, bpf_lookup_system_key, KF_ACQUIRE | KF_RET_NULL)
|
||||
BTF_ID_FLAGS(func, bpf_key_put, KF_RELEASE)
|
||||
#ifdef CONFIG_SYSTEM_DATA_VERIFICATION
|
||||
BTF_ID_FLAGS(func, bpf_verify_pkcs7_signature, KF_SLEEPABLE)
|
||||
#endif
|
||||
#endif
|
||||
BTF_KFUNCS_END(generic_btf_ids)
|
||||
|
||||
static const struct btf_kfunc_id_set generic_kfunc_set = {
|
||||
|
|
@ -3834,6 +4357,7 @@ BTF_ID_FLAGS(func, bpf_iter_dmabuf_destroy, KF_ITER_DESTROY | KF_SLEEPABLE)
|
|||
#endif
|
||||
BTF_ID_FLAGS(func, __bpf_trap)
|
||||
BTF_ID_FLAGS(func, bpf_strcmp);
|
||||
BTF_ID_FLAGS(func, bpf_strcasecmp);
|
||||
BTF_ID_FLAGS(func, bpf_strchr);
|
||||
BTF_ID_FLAGS(func, bpf_strchrnul);
|
||||
BTF_ID_FLAGS(func, bpf_strnchr);
|
||||
|
|
@ -3848,6 +4372,8 @@ BTF_ID_FLAGS(func, bpf_strnstr);
|
|||
BTF_ID_FLAGS(func, bpf_cgroup_read_xattr, KF_RCU)
|
||||
#endif
|
||||
BTF_ID_FLAGS(func, bpf_stream_vprintk, KF_TRUSTED_ARGS)
|
||||
BTF_ID_FLAGS(func, bpf_task_work_schedule_signal, KF_TRUSTED_ARGS)
|
||||
BTF_ID_FLAGS(func, bpf_task_work_schedule_resume, KF_TRUSTED_ARGS)
|
||||
BTF_KFUNCS_END(common_btf_ids)
|
||||
|
||||
static const struct btf_kfunc_id_set common_kfunc_set = {
|
||||
|
|
|
|||
733
kernel/bpf/liveness.c
Normal file
733
kernel/bpf/liveness.c
Normal file
|
|
@ -0,0 +1,733 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
|
||||
|
||||
#include <linux/bpf_verifier.h>
|
||||
#include <linux/hashtable.h>
|
||||
#include <linux/jhash.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
/*
|
||||
* This file implements live stack slots analysis. After accumulating
|
||||
* stack usage data, the analysis answers queries about whether a
|
||||
* particular stack slot may be read by an instruction or any of it's
|
||||
* successors. This data is consumed by the verifier states caching
|
||||
* mechanism to decide which stack slots are important when looking for a
|
||||
* visited state corresponding to the current state.
|
||||
*
|
||||
* The analysis is call chain sensitive, meaning that data is collected
|
||||
* and queried for tuples (call chain, subprogram instruction index).
|
||||
* Such sensitivity allows identifying if some subprogram call always
|
||||
* leads to writes in the caller's stack.
|
||||
*
|
||||
* The basic idea is as follows:
|
||||
* - As the verifier accumulates a set of visited states, the analysis instance
|
||||
* accumulates a conservative estimate of stack slots that can be read
|
||||
* or must be written for each visited tuple (call chain, instruction index).
|
||||
* - If several states happen to visit the same instruction with the same
|
||||
* call chain, stack usage information for the corresponding tuple is joined:
|
||||
* - "may_read" set represents a union of all possibly read slots
|
||||
* (any slot in "may_read" set might be read at or after the instruction);
|
||||
* - "must_write" set represents an intersection of all possibly written slots
|
||||
* (any slot in "must_write" set is guaranteed to be written by the instruction).
|
||||
* - The analysis is split into two phases:
|
||||
* - read and write marks accumulation;
|
||||
* - read and write marks propagation.
|
||||
* - The propagation phase is a textbook live variable data flow analysis:
|
||||
*
|
||||
* state[cc, i].live_after = U [state[cc, s].live_before for s in insn_successors(i)]
|
||||
* state[cc, i].live_before =
|
||||
* (state[cc, i].live_after / state[cc, i].must_write) U state[i].may_read
|
||||
*
|
||||
* Where:
|
||||
* - `U` stands for set union
|
||||
* - `/` stands for set difference;
|
||||
* - `cc` stands for a call chain;
|
||||
* - `i` and `s` are instruction indexes;
|
||||
*
|
||||
* The above equations are computed for each call chain and instruction
|
||||
* index until state stops changing.
|
||||
* - Additionally, in order to transfer "must_write" information from a
|
||||
* subprogram to call instructions invoking this subprogram,
|
||||
* the "must_write_acc" set is tracked for each (cc, i) tuple.
|
||||
* A set of stack slots that are guaranteed to be written by this
|
||||
* instruction or any of its successors (within the subprogram).
|
||||
* The equation for "must_write_acc" propagation looks as follows:
|
||||
*
|
||||
* state[cc, i].must_write_acc =
|
||||
* ∩ [state[cc, s].must_write_acc for s in insn_successors(i)]
|
||||
* U state[cc, i].must_write
|
||||
*
|
||||
* (An intersection of all "must_write_acc" for instruction successors
|
||||
* plus all "must_write" slots for the instruction itself).
|
||||
* - After the propagation phase completes for a subprogram, information from
|
||||
* (cc, 0) tuple (subprogram entry) is transferred to the caller's call chain:
|
||||
* - "must_write_acc" set is intersected with the call site's "must_write" set;
|
||||
* - "may_read" set is added to the call site's "may_read" set.
|
||||
* - Any live stack queries must be taken after the propagation phase.
|
||||
* - Accumulation and propagation phases can be entered multiple times,
|
||||
* at any point in time:
|
||||
* - "may_read" set only grows;
|
||||
* - "must_write" set only shrinks;
|
||||
* - for each visited verifier state with zero branches, all relevant
|
||||
* read and write marks are already recorded by the analysis instance.
|
||||
*
|
||||
* Technically, the analysis is facilitated by the following data structures:
|
||||
* - Call chain: for given verifier state, the call chain is a tuple of call
|
||||
* instruction indexes leading to the current subprogram plus the subprogram
|
||||
* entry point index.
|
||||
* - Function instance: for a given call chain, for each instruction in
|
||||
* the current subprogram, a mapping between instruction index and a
|
||||
* set of "may_read", "must_write" and other marks accumulated for this
|
||||
* instruction.
|
||||
* - A hash table mapping call chains to function instances.
|
||||
*/
|
||||
|
||||
struct callchain {
|
||||
u32 callsites[MAX_CALL_FRAMES]; /* instruction pointer for each frame */
|
||||
/* cached subprog_info[*].start for functions owning the frames:
|
||||
* - sp_starts[curframe] used to get insn relative index within current function;
|
||||
* - sp_starts[0..current-1] used for fast callchain_frame_up().
|
||||
*/
|
||||
u32 sp_starts[MAX_CALL_FRAMES];
|
||||
u32 curframe; /* depth of callsites and sp_starts arrays */
|
||||
};
|
||||
|
||||
struct per_frame_masks {
|
||||
u64 may_read; /* stack slots that may be read by this instruction */
|
||||
u64 must_write; /* stack slots written by this instruction */
|
||||
u64 must_write_acc; /* stack slots written by this instruction and its successors */
|
||||
u64 live_before; /* stack slots that may be read by this insn and its successors */
|
||||
};
|
||||
|
||||
/*
|
||||
* A function instance created for a specific callchain.
|
||||
* Encapsulates read and write marks for each instruction in the function.
|
||||
* Marks are tracked for each frame in the callchain.
|
||||
*/
|
||||
struct func_instance {
|
||||
struct hlist_node hl_node;
|
||||
struct callchain callchain;
|
||||
u32 insn_cnt; /* cached number of insns in the function */
|
||||
bool updated;
|
||||
bool must_write_dropped;
|
||||
/* Per frame, per instruction masks, frames allocated lazily. */
|
||||
struct per_frame_masks *frames[MAX_CALL_FRAMES];
|
||||
/* For each instruction a flag telling if "must_write" had been initialized for it. */
|
||||
bool *must_write_set;
|
||||
};
|
||||
|
||||
struct live_stack_query {
|
||||
struct func_instance *instances[MAX_CALL_FRAMES]; /* valid in range [0..curframe] */
|
||||
u32 curframe;
|
||||
u32 insn_idx;
|
||||
};
|
||||
|
||||
struct bpf_liveness {
|
||||
DECLARE_HASHTABLE(func_instances, 8); /* maps callchain to func_instance */
|
||||
struct live_stack_query live_stack_query; /* cache to avoid repetitive ht lookups */
|
||||
/* Cached instance corresponding to env->cur_state, avoids per-instruction ht lookup */
|
||||
struct func_instance *cur_instance;
|
||||
/*
|
||||
* Below fields are used to accumulate stack write marks for instruction at
|
||||
* @write_insn_idx before submitting the marks to @cur_instance.
|
||||
*/
|
||||
u64 write_masks_acc[MAX_CALL_FRAMES];
|
||||
u32 write_insn_idx;
|
||||
};
|
||||
|
||||
/* Compute callchain corresponding to state @st at depth @frameno */
|
||||
static void compute_callchain(struct bpf_verifier_env *env, struct bpf_verifier_state *st,
|
||||
struct callchain *callchain, u32 frameno)
|
||||
{
|
||||
struct bpf_subprog_info *subprog_info = env->subprog_info;
|
||||
u32 i;
|
||||
|
||||
memset(callchain, 0, sizeof(*callchain));
|
||||
for (i = 0; i <= frameno; i++) {
|
||||
callchain->sp_starts[i] = subprog_info[st->frame[i]->subprogno].start;
|
||||
if (i < st->curframe)
|
||||
callchain->callsites[i] = st->frame[i + 1]->callsite;
|
||||
}
|
||||
callchain->curframe = frameno;
|
||||
callchain->callsites[callchain->curframe] = callchain->sp_starts[callchain->curframe];
|
||||
}
|
||||
|
||||
static u32 hash_callchain(struct callchain *callchain)
|
||||
{
|
||||
return jhash2(callchain->callsites, callchain->curframe, 0);
|
||||
}
|
||||
|
||||
static bool same_callsites(struct callchain *a, struct callchain *b)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (a->curframe != b->curframe)
|
||||
return false;
|
||||
for (i = a->curframe; i >= 0; i--)
|
||||
if (a->callsites[i] != b->callsites[i])
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find existing or allocate new function instance corresponding to @callchain.
|
||||
* Instances are accumulated in env->liveness->func_instances and persist
|
||||
* until the end of the verification process.
|
||||
*/
|
||||
static struct func_instance *__lookup_instance(struct bpf_verifier_env *env,
|
||||
struct callchain *callchain)
|
||||
{
|
||||
struct bpf_liveness *liveness = env->liveness;
|
||||
struct bpf_subprog_info *subprog;
|
||||
struct func_instance *result;
|
||||
u32 subprog_sz, size, key;
|
||||
|
||||
key = hash_callchain(callchain);
|
||||
hash_for_each_possible(liveness->func_instances, result, hl_node, key)
|
||||
if (same_callsites(&result->callchain, callchain))
|
||||
return result;
|
||||
|
||||
subprog = bpf_find_containing_subprog(env, callchain->sp_starts[callchain->curframe]);
|
||||
subprog_sz = (subprog + 1)->start - subprog->start;
|
||||
size = sizeof(struct func_instance);
|
||||
result = kvzalloc(size, GFP_KERNEL_ACCOUNT);
|
||||
if (!result)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
result->must_write_set = kvcalloc(subprog_sz, sizeof(*result->must_write_set),
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
if (!result->must_write_set)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
memcpy(&result->callchain, callchain, sizeof(*callchain));
|
||||
result->insn_cnt = subprog_sz;
|
||||
hash_add(liveness->func_instances, &result->hl_node, key);
|
||||
return result;
|
||||
}
|
||||
|
||||
static struct func_instance *lookup_instance(struct bpf_verifier_env *env,
|
||||
struct bpf_verifier_state *st,
|
||||
u32 frameno)
|
||||
{
|
||||
struct callchain callchain;
|
||||
|
||||
compute_callchain(env, st, &callchain, frameno);
|
||||
return __lookup_instance(env, &callchain);
|
||||
}
|
||||
|
||||
int bpf_stack_liveness_init(struct bpf_verifier_env *env)
|
||||
{
|
||||
env->liveness = kvzalloc(sizeof(*env->liveness), GFP_KERNEL_ACCOUNT);
|
||||
if (!env->liveness)
|
||||
return -ENOMEM;
|
||||
hash_init(env->liveness->func_instances);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bpf_stack_liveness_free(struct bpf_verifier_env *env)
|
||||
{
|
||||
struct func_instance *instance;
|
||||
struct hlist_node *tmp;
|
||||
int bkt, i;
|
||||
|
||||
if (!env->liveness)
|
||||
return;
|
||||
hash_for_each_safe(env->liveness->func_instances, bkt, tmp, instance, hl_node) {
|
||||
for (i = 0; i <= instance->callchain.curframe; i++)
|
||||
kvfree(instance->frames[i]);
|
||||
kvfree(instance->must_write_set);
|
||||
kvfree(instance);
|
||||
}
|
||||
kvfree(env->liveness);
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert absolute instruction index @insn_idx to an index relative
|
||||
* to start of the function corresponding to @instance.
|
||||
*/
|
||||
static int relative_idx(struct func_instance *instance, u32 insn_idx)
|
||||
{
|
||||
return insn_idx - instance->callchain.sp_starts[instance->callchain.curframe];
|
||||
}
|
||||
|
||||
static struct per_frame_masks *get_frame_masks(struct func_instance *instance,
|
||||
u32 frame, u32 insn_idx)
|
||||
{
|
||||
if (!instance->frames[frame])
|
||||
return NULL;
|
||||
|
||||
return &instance->frames[frame][relative_idx(instance, insn_idx)];
|
||||
}
|
||||
|
||||
static struct per_frame_masks *alloc_frame_masks(struct bpf_verifier_env *env,
|
||||
struct func_instance *instance,
|
||||
u32 frame, u32 insn_idx)
|
||||
{
|
||||
struct per_frame_masks *arr;
|
||||
|
||||
if (!instance->frames[frame]) {
|
||||
arr = kvcalloc(instance->insn_cnt, sizeof(*arr), GFP_KERNEL_ACCOUNT);
|
||||
instance->frames[frame] = arr;
|
||||
if (!arr)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
return get_frame_masks(instance, frame, insn_idx);
|
||||
}
|
||||
|
||||
void bpf_reset_live_stack_callchain(struct bpf_verifier_env *env)
|
||||
{
|
||||
env->liveness->cur_instance = NULL;
|
||||
}
|
||||
|
||||
/* If @env->liveness->cur_instance is null, set it to instance corresponding to @env->cur_state. */
|
||||
static int ensure_cur_instance(struct bpf_verifier_env *env)
|
||||
{
|
||||
struct bpf_liveness *liveness = env->liveness;
|
||||
struct func_instance *instance;
|
||||
|
||||
if (liveness->cur_instance)
|
||||
return 0;
|
||||
|
||||
instance = lookup_instance(env, env->cur_state, env->cur_state->curframe);
|
||||
if (IS_ERR(instance))
|
||||
return PTR_ERR(instance);
|
||||
|
||||
liveness->cur_instance = instance;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Accumulate may_read masks for @frame at @insn_idx */
|
||||
static int mark_stack_read(struct bpf_verifier_env *env,
|
||||
struct func_instance *instance, u32 frame, u32 insn_idx, u64 mask)
|
||||
{
|
||||
struct per_frame_masks *masks;
|
||||
u64 new_may_read;
|
||||
|
||||
masks = alloc_frame_masks(env, instance, frame, insn_idx);
|
||||
if (IS_ERR(masks))
|
||||
return PTR_ERR(masks);
|
||||
new_may_read = masks->may_read | mask;
|
||||
if (new_may_read != masks->may_read &&
|
||||
((new_may_read | masks->live_before) != masks->live_before))
|
||||
instance->updated = true;
|
||||
masks->may_read |= mask;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bpf_mark_stack_read(struct bpf_verifier_env *env, u32 frame, u32 insn_idx, u64 mask)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = ensure_cur_instance(env);
|
||||
err = err ?: mark_stack_read(env, env->liveness->cur_instance, frame, insn_idx, mask);
|
||||
return err;
|
||||
}
|
||||
|
||||
static void reset_stack_write_marks(struct bpf_verifier_env *env,
|
||||
struct func_instance *instance, u32 insn_idx)
|
||||
{
|
||||
struct bpf_liveness *liveness = env->liveness;
|
||||
int i;
|
||||
|
||||
liveness->write_insn_idx = insn_idx;
|
||||
for (i = 0; i <= instance->callchain.curframe; i++)
|
||||
liveness->write_masks_acc[i] = 0;
|
||||
}
|
||||
|
||||
int bpf_reset_stack_write_marks(struct bpf_verifier_env *env, u32 insn_idx)
|
||||
{
|
||||
struct bpf_liveness *liveness = env->liveness;
|
||||
int err;
|
||||
|
||||
err = ensure_cur_instance(env);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
reset_stack_write_marks(env, liveness->cur_instance, insn_idx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bpf_mark_stack_write(struct bpf_verifier_env *env, u32 frame, u64 mask)
|
||||
{
|
||||
env->liveness->write_masks_acc[frame] |= mask;
|
||||
}
|
||||
|
||||
static int commit_stack_write_marks(struct bpf_verifier_env *env,
|
||||
struct func_instance *instance)
|
||||
{
|
||||
struct bpf_liveness *liveness = env->liveness;
|
||||
u32 idx, frame, curframe, old_must_write;
|
||||
struct per_frame_masks *masks;
|
||||
u64 mask;
|
||||
|
||||
if (!instance)
|
||||
return 0;
|
||||
|
||||
curframe = instance->callchain.curframe;
|
||||
idx = relative_idx(instance, liveness->write_insn_idx);
|
||||
for (frame = 0; frame <= curframe; frame++) {
|
||||
mask = liveness->write_masks_acc[frame];
|
||||
/* avoid allocating frames for zero masks */
|
||||
if (mask == 0 && !instance->must_write_set[idx])
|
||||
continue;
|
||||
masks = alloc_frame_masks(env, instance, frame, liveness->write_insn_idx);
|
||||
if (IS_ERR(masks))
|
||||
return PTR_ERR(masks);
|
||||
old_must_write = masks->must_write;
|
||||
/*
|
||||
* If instruction at this callchain is seen for a first time, set must_write equal
|
||||
* to @mask. Otherwise take intersection with the previous value.
|
||||
*/
|
||||
if (instance->must_write_set[idx])
|
||||
mask &= old_must_write;
|
||||
if (old_must_write != mask) {
|
||||
masks->must_write = mask;
|
||||
instance->updated = true;
|
||||
}
|
||||
if (old_must_write & ~mask)
|
||||
instance->must_write_dropped = true;
|
||||
}
|
||||
instance->must_write_set[idx] = true;
|
||||
liveness->write_insn_idx = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Merge stack writes marks in @env->liveness->write_masks_acc
|
||||
* with information already in @env->liveness->cur_instance.
|
||||
*/
|
||||
int bpf_commit_stack_write_marks(struct bpf_verifier_env *env)
|
||||
{
|
||||
return commit_stack_write_marks(env, env->liveness->cur_instance);
|
||||
}
|
||||
|
||||
static char *fmt_callchain(struct bpf_verifier_env *env, struct callchain *callchain)
|
||||
{
|
||||
char *buf_end = env->tmp_str_buf + sizeof(env->tmp_str_buf);
|
||||
char *buf = env->tmp_str_buf;
|
||||
int i;
|
||||
|
||||
buf += snprintf(buf, buf_end - buf, "(");
|
||||
for (i = 0; i <= callchain->curframe; i++)
|
||||
buf += snprintf(buf, buf_end - buf, "%s%d", i ? "," : "", callchain->callsites[i]);
|
||||
snprintf(buf, buf_end - buf, ")");
|
||||
return env->tmp_str_buf;
|
||||
}
|
||||
|
||||
static void log_mask_change(struct bpf_verifier_env *env, struct callchain *callchain,
|
||||
char *pfx, u32 frame, u32 insn_idx, u64 old, u64 new)
|
||||
{
|
||||
u64 changed_bits = old ^ new;
|
||||
u64 new_ones = new & changed_bits;
|
||||
u64 new_zeros = ~new & changed_bits;
|
||||
|
||||
if (!changed_bits)
|
||||
return;
|
||||
bpf_log(&env->log, "%s frame %d insn %d ", fmt_callchain(env, callchain), frame, insn_idx);
|
||||
if (new_ones) {
|
||||
bpf_fmt_stack_mask(env->tmp_str_buf, sizeof(env->tmp_str_buf), new_ones);
|
||||
bpf_log(&env->log, "+%s %s ", pfx, env->tmp_str_buf);
|
||||
}
|
||||
if (new_zeros) {
|
||||
bpf_fmt_stack_mask(env->tmp_str_buf, sizeof(env->tmp_str_buf), new_zeros);
|
||||
bpf_log(&env->log, "-%s %s", pfx, env->tmp_str_buf);
|
||||
}
|
||||
bpf_log(&env->log, "\n");
|
||||
}
|
||||
|
||||
int bpf_jmp_offset(struct bpf_insn *insn)
|
||||
{
|
||||
u8 code = insn->code;
|
||||
|
||||
if (code == (BPF_JMP32 | BPF_JA))
|
||||
return insn->imm;
|
||||
return insn->off;
|
||||
}
|
||||
|
||||
__diag_push();
|
||||
__diag_ignore_all("-Woverride-init", "Allow field initialization overrides for opcode_info_tbl");
|
||||
|
||||
inline int bpf_insn_successors(struct bpf_prog *prog, u32 idx, u32 succ[2])
|
||||
{
|
||||
static const struct opcode_info {
|
||||
bool can_jump;
|
||||
bool can_fallthrough;
|
||||
} opcode_info_tbl[256] = {
|
||||
[0 ... 255] = {.can_jump = false, .can_fallthrough = true},
|
||||
#define _J(code, ...) \
|
||||
[BPF_JMP | code] = __VA_ARGS__, \
|
||||
[BPF_JMP32 | code] = __VA_ARGS__
|
||||
|
||||
_J(BPF_EXIT, {.can_jump = false, .can_fallthrough = false}),
|
||||
_J(BPF_JA, {.can_jump = true, .can_fallthrough = false}),
|
||||
_J(BPF_JEQ, {.can_jump = true, .can_fallthrough = true}),
|
||||
_J(BPF_JNE, {.can_jump = true, .can_fallthrough = true}),
|
||||
_J(BPF_JLT, {.can_jump = true, .can_fallthrough = true}),
|
||||
_J(BPF_JLE, {.can_jump = true, .can_fallthrough = true}),
|
||||
_J(BPF_JGT, {.can_jump = true, .can_fallthrough = true}),
|
||||
_J(BPF_JGE, {.can_jump = true, .can_fallthrough = true}),
|
||||
_J(BPF_JSGT, {.can_jump = true, .can_fallthrough = true}),
|
||||
_J(BPF_JSGE, {.can_jump = true, .can_fallthrough = true}),
|
||||
_J(BPF_JSLT, {.can_jump = true, .can_fallthrough = true}),
|
||||
_J(BPF_JSLE, {.can_jump = true, .can_fallthrough = true}),
|
||||
_J(BPF_JCOND, {.can_jump = true, .can_fallthrough = true}),
|
||||
_J(BPF_JSET, {.can_jump = true, .can_fallthrough = true}),
|
||||
#undef _J
|
||||
};
|
||||
struct bpf_insn *insn = &prog->insnsi[idx];
|
||||
const struct opcode_info *opcode_info;
|
||||
int i = 0, insn_sz;
|
||||
|
||||
opcode_info = &opcode_info_tbl[BPF_CLASS(insn->code) | BPF_OP(insn->code)];
|
||||
insn_sz = bpf_is_ldimm64(insn) ? 2 : 1;
|
||||
if (opcode_info->can_fallthrough)
|
||||
succ[i++] = idx + insn_sz;
|
||||
|
||||
if (opcode_info->can_jump)
|
||||
succ[i++] = idx + bpf_jmp_offset(insn) + 1;
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
__diag_pop();
|
||||
|
||||
static struct func_instance *get_outer_instance(struct bpf_verifier_env *env,
|
||||
struct func_instance *instance)
|
||||
{
|
||||
struct callchain callchain = instance->callchain;
|
||||
|
||||
/* Adjust @callchain to represent callchain one frame up */
|
||||
callchain.callsites[callchain.curframe] = 0;
|
||||
callchain.sp_starts[callchain.curframe] = 0;
|
||||
callchain.curframe--;
|
||||
callchain.callsites[callchain.curframe] = callchain.sp_starts[callchain.curframe];
|
||||
return __lookup_instance(env, &callchain);
|
||||
}
|
||||
|
||||
static u32 callchain_subprog_start(struct callchain *callchain)
|
||||
{
|
||||
return callchain->sp_starts[callchain->curframe];
|
||||
}
|
||||
|
||||
/*
|
||||
* Transfer @may_read and @must_write_acc marks from the first instruction of @instance,
|
||||
* to the call instruction in function instance calling @instance.
|
||||
*/
|
||||
static int propagate_to_outer_instance(struct bpf_verifier_env *env,
|
||||
struct func_instance *instance)
|
||||
{
|
||||
struct callchain *callchain = &instance->callchain;
|
||||
u32 this_subprog_start, callsite, frame;
|
||||
struct func_instance *outer_instance;
|
||||
struct per_frame_masks *insn;
|
||||
int err;
|
||||
|
||||
this_subprog_start = callchain_subprog_start(callchain);
|
||||
outer_instance = get_outer_instance(env, instance);
|
||||
callsite = callchain->callsites[callchain->curframe - 1];
|
||||
|
||||
reset_stack_write_marks(env, outer_instance, callsite);
|
||||
for (frame = 0; frame < callchain->curframe; frame++) {
|
||||
insn = get_frame_masks(instance, frame, this_subprog_start);
|
||||
if (!insn)
|
||||
continue;
|
||||
bpf_mark_stack_write(env, frame, insn->must_write_acc);
|
||||
err = mark_stack_read(env, outer_instance, frame, callsite, insn->live_before);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
commit_stack_write_marks(env, outer_instance);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline bool update_insn(struct bpf_verifier_env *env,
|
||||
struct func_instance *instance, u32 frame, u32 insn_idx)
|
||||
{
|
||||
struct bpf_insn_aux_data *aux = env->insn_aux_data;
|
||||
u64 new_before, new_after, must_write_acc;
|
||||
struct per_frame_masks *insn, *succ_insn;
|
||||
u32 succ_num, s, succ[2];
|
||||
bool changed;
|
||||
|
||||
succ_num = bpf_insn_successors(env->prog, insn_idx, succ);
|
||||
if (unlikely(succ_num == 0))
|
||||
return false;
|
||||
|
||||
changed = false;
|
||||
insn = get_frame_masks(instance, frame, insn_idx);
|
||||
new_before = 0;
|
||||
new_after = 0;
|
||||
/*
|
||||
* New "must_write_acc" is an intersection of all "must_write_acc"
|
||||
* of successors plus all "must_write" slots of instruction itself.
|
||||
*/
|
||||
must_write_acc = U64_MAX;
|
||||
for (s = 0; s < succ_num; ++s) {
|
||||
succ_insn = get_frame_masks(instance, frame, succ[s]);
|
||||
new_after |= succ_insn->live_before;
|
||||
must_write_acc &= succ_insn->must_write_acc;
|
||||
}
|
||||
must_write_acc |= insn->must_write;
|
||||
/*
|
||||
* New "live_before" is a union of all "live_before" of successors
|
||||
* minus slots written by instruction plus slots read by instruction.
|
||||
*/
|
||||
new_before = (new_after & ~insn->must_write) | insn->may_read;
|
||||
changed |= new_before != insn->live_before;
|
||||
changed |= must_write_acc != insn->must_write_acc;
|
||||
if (unlikely(env->log.level & BPF_LOG_LEVEL2) &&
|
||||
(insn->may_read || insn->must_write ||
|
||||
insn_idx == callchain_subprog_start(&instance->callchain) ||
|
||||
aux[insn_idx].prune_point)) {
|
||||
log_mask_change(env, &instance->callchain, "live",
|
||||
frame, insn_idx, insn->live_before, new_before);
|
||||
log_mask_change(env, &instance->callchain, "written",
|
||||
frame, insn_idx, insn->must_write_acc, must_write_acc);
|
||||
}
|
||||
insn->live_before = new_before;
|
||||
insn->must_write_acc = must_write_acc;
|
||||
return changed;
|
||||
}
|
||||
|
||||
/* Fixed-point computation of @live_before and @must_write_acc marks */
|
||||
static int update_instance(struct bpf_verifier_env *env, struct func_instance *instance)
|
||||
{
|
||||
u32 i, frame, po_start, po_end, cnt, this_subprog_start;
|
||||
struct callchain *callchain = &instance->callchain;
|
||||
int *insn_postorder = env->cfg.insn_postorder;
|
||||
struct bpf_subprog_info *subprog;
|
||||
struct per_frame_masks *insn;
|
||||
bool changed;
|
||||
int err;
|
||||
|
||||
this_subprog_start = callchain_subprog_start(callchain);
|
||||
/*
|
||||
* If must_write marks were updated must_write_acc needs to be reset
|
||||
* (to account for the case when new must_write sets became smaller).
|
||||
*/
|
||||
if (instance->must_write_dropped) {
|
||||
for (frame = 0; frame <= callchain->curframe; frame++) {
|
||||
if (!instance->frames[frame])
|
||||
continue;
|
||||
|
||||
for (i = 0; i < instance->insn_cnt; i++) {
|
||||
insn = get_frame_masks(instance, frame, this_subprog_start + i);
|
||||
insn->must_write_acc = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
subprog = bpf_find_containing_subprog(env, this_subprog_start);
|
||||
po_start = subprog->postorder_start;
|
||||
po_end = (subprog + 1)->postorder_start;
|
||||
cnt = 0;
|
||||
/* repeat until fixed point is reached */
|
||||
do {
|
||||
cnt++;
|
||||
changed = false;
|
||||
for (frame = 0; frame <= instance->callchain.curframe; frame++) {
|
||||
if (!instance->frames[frame])
|
||||
continue;
|
||||
|
||||
for (i = po_start; i < po_end; i++)
|
||||
changed |= update_insn(env, instance, frame, insn_postorder[i]);
|
||||
}
|
||||
} while (changed);
|
||||
|
||||
if (env->log.level & BPF_LOG_LEVEL2)
|
||||
bpf_log(&env->log, "%s live stack update done in %d iterations\n",
|
||||
fmt_callchain(env, callchain), cnt);
|
||||
|
||||
/* transfer marks accumulated for outer frames to outer func instance (caller) */
|
||||
if (callchain->curframe > 0) {
|
||||
err = propagate_to_outer_instance(env, instance);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Prepare all callchains within @env->cur_state for querying.
|
||||
* This function should be called after each verifier.c:pop_stack()
|
||||
* and whenever verifier.c:do_check_insn() processes subprogram exit.
|
||||
* This would guarantee that visited verifier states with zero branches
|
||||
* have their bpf_mark_stack_{read,write}() effects propagated in
|
||||
* @env->liveness.
|
||||
*/
|
||||
int bpf_update_live_stack(struct bpf_verifier_env *env)
|
||||
{
|
||||
struct func_instance *instance;
|
||||
int err, frame;
|
||||
|
||||
bpf_reset_live_stack_callchain(env);
|
||||
for (frame = env->cur_state->curframe; frame >= 0; --frame) {
|
||||
instance = lookup_instance(env, env->cur_state, frame);
|
||||
if (IS_ERR(instance))
|
||||
return PTR_ERR(instance);
|
||||
|
||||
if (instance->updated) {
|
||||
err = update_instance(env, instance);
|
||||
if (err)
|
||||
return err;
|
||||
instance->updated = false;
|
||||
instance->must_write_dropped = false;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool is_live_before(struct func_instance *instance, u32 insn_idx, u32 frameno, u32 spi)
|
||||
{
|
||||
struct per_frame_masks *masks;
|
||||
|
||||
masks = get_frame_masks(instance, frameno, insn_idx);
|
||||
return masks && (masks->live_before & BIT(spi));
|
||||
}
|
||||
|
||||
int bpf_live_stack_query_init(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
|
||||
{
|
||||
struct live_stack_query *q = &env->liveness->live_stack_query;
|
||||
struct func_instance *instance;
|
||||
u32 frame;
|
||||
|
||||
memset(q, 0, sizeof(*q));
|
||||
for (frame = 0; frame <= st->curframe; frame++) {
|
||||
instance = lookup_instance(env, st, frame);
|
||||
if (IS_ERR(instance))
|
||||
return PTR_ERR(instance);
|
||||
q->instances[frame] = instance;
|
||||
}
|
||||
q->curframe = st->curframe;
|
||||
q->insn_idx = st->insn_idx;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool bpf_stack_slot_alive(struct bpf_verifier_env *env, u32 frameno, u32 spi)
|
||||
{
|
||||
/*
|
||||
* Slot is alive if it is read before q->st->insn_idx in current func instance,
|
||||
* or if for some outer func instance:
|
||||
* - alive before callsite if callsite calls callback, otherwise
|
||||
* - alive after callsite
|
||||
*/
|
||||
struct live_stack_query *q = &env->liveness->live_stack_query;
|
||||
struct func_instance *instance, *curframe_instance;
|
||||
u32 i, callsite;
|
||||
bool alive;
|
||||
|
||||
curframe_instance = q->instances[q->curframe];
|
||||
if (is_live_before(curframe_instance, q->insn_idx, frameno, spi))
|
||||
return true;
|
||||
|
||||
for (i = frameno; i < q->curframe; i++) {
|
||||
callsite = curframe_instance->callchain.callsites[i];
|
||||
instance = q->instances[i];
|
||||
alive = bpf_calls_callback(env, callsite)
|
||||
? is_live_before(instance, callsite, frameno, spi)
|
||||
: is_live_before(instance, callsite + 1, frameno, spi);
|
||||
if (alive)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
|
@ -165,7 +165,7 @@ static long cgroup_storage_update_elem(struct bpf_map *map, void *key,
|
|||
}
|
||||
|
||||
new = bpf_map_kmalloc_node(map, struct_size(new, data, map->value_size),
|
||||
__GFP_ZERO | GFP_NOWAIT | __GFP_NOWARN,
|
||||
__GFP_ZERO | GFP_NOWAIT,
|
||||
map->numa_node);
|
||||
if (!new)
|
||||
return -ENOMEM;
|
||||
|
|
|
|||
|
|
@ -498,6 +498,8 @@ const char *dynptr_type_str(enum bpf_dynptr_type type)
|
|||
return "skb";
|
||||
case BPF_DYNPTR_TYPE_XDP:
|
||||
return "xdp";
|
||||
case BPF_DYNPTR_TYPE_SKB_META:
|
||||
return "skb_meta";
|
||||
case BPF_DYNPTR_TYPE_INVALID:
|
||||
return "<invalid>";
|
||||
default:
|
||||
|
|
@ -540,19 +542,6 @@ static char slot_type_char[] = {
|
|||
[STACK_IRQ_FLAG] = 'f'
|
||||
};
|
||||
|
||||
static void print_liveness(struct bpf_verifier_env *env,
|
||||
enum bpf_reg_liveness live)
|
||||
{
|
||||
if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE))
|
||||
verbose(env, "_");
|
||||
if (live & REG_LIVE_READ)
|
||||
verbose(env, "r");
|
||||
if (live & REG_LIVE_WRITTEN)
|
||||
verbose(env, "w");
|
||||
if (live & REG_LIVE_DONE)
|
||||
verbose(env, "D");
|
||||
}
|
||||
|
||||
#define UNUM_MAX_DECIMAL U16_MAX
|
||||
#define SNUM_MAX_DECIMAL S16_MAX
|
||||
#define SNUM_MIN_DECIMAL S16_MIN
|
||||
|
|
@ -770,7 +759,6 @@ void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_verifie
|
|||
if (!print_all && !reg_scratched(env, i))
|
||||
continue;
|
||||
verbose(env, " R%d", i);
|
||||
print_liveness(env, reg->live);
|
||||
verbose(env, "=");
|
||||
print_reg_state(env, state, reg);
|
||||
}
|
||||
|
|
@ -803,9 +791,7 @@ void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_verifie
|
|||
break;
|
||||
types_buf[j] = '\0';
|
||||
|
||||
verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
|
||||
print_liveness(env, reg->live);
|
||||
verbose(env, "=%s", types_buf);
|
||||
verbose(env, " fp%d=%s", (-i - 1) * BPF_REG_SIZE, types_buf);
|
||||
print_reg_state(env, state, reg);
|
||||
break;
|
||||
case STACK_DYNPTR:
|
||||
|
|
@ -814,7 +800,6 @@ void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_verifie
|
|||
reg = &state->stack[i].spilled_ptr;
|
||||
|
||||
verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
|
||||
print_liveness(env, reg->live);
|
||||
verbose(env, "=dynptr_%s(", dynptr_type_str(reg->dynptr.type));
|
||||
if (reg->id)
|
||||
verbose_a("id=%d", reg->id);
|
||||
|
|
@ -829,9 +814,8 @@ void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_verifie
|
|||
if (!reg->ref_obj_id)
|
||||
continue;
|
||||
|
||||
verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
|
||||
print_liveness(env, reg->live);
|
||||
verbose(env, "=iter_%s(ref_id=%d,state=%s,depth=%u)",
|
||||
verbose(env, " fp%d=iter_%s(ref_id=%d,state=%s,depth=%u)",
|
||||
(-i - 1) * BPF_REG_SIZE,
|
||||
iter_type_str(reg->iter.btf, reg->iter.btf_id),
|
||||
reg->ref_obj_id, iter_state_str(reg->iter.state),
|
||||
reg->iter.depth);
|
||||
|
|
@ -839,9 +823,7 @@ void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_verifie
|
|||
case STACK_MISC:
|
||||
case STACK_ZERO:
|
||||
default:
|
||||
verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
|
||||
print_liveness(env, reg->live);
|
||||
verbose(env, "=%s", types_buf);
|
||||
verbose(env, " fp%d=%s", (-i - 1) * BPF_REG_SIZE, types_buf);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -736,7 +736,7 @@ static void destroy_mem_alloc(struct bpf_mem_alloc *ma, int rcu_in_progress)
|
|||
/* Defer barriers into worker to let the rest of map memory to be freed */
|
||||
memset(ma, 0, sizeof(*ma));
|
||||
INIT_WORK(©->work, free_mem_alloc_deferred);
|
||||
queue_work(system_unbound_wq, ©->work);
|
||||
queue_work(system_dfl_wq, ©->work);
|
||||
}
|
||||
|
||||
void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma)
|
||||
|
|
|
|||
|
|
@ -646,7 +646,15 @@ static void *stack_map_lookup_elem(struct bpf_map *map, void *key)
|
|||
}
|
||||
|
||||
/* Called from syscall */
|
||||
int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
|
||||
static int stack_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
|
||||
void *value, u64 flags)
|
||||
{
|
||||
return bpf_stackmap_extract(map, key, value, true);
|
||||
}
|
||||
|
||||
/* Called from syscall */
|
||||
int bpf_stackmap_extract(struct bpf_map *map, void *key, void *value,
|
||||
bool delete)
|
||||
{
|
||||
struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
|
||||
struct stack_map_bucket *bucket, *old_bucket;
|
||||
|
|
@ -663,7 +671,10 @@ int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
|
|||
memcpy(value, bucket->data, trace_len);
|
||||
memset(value + trace_len, 0, map->value_size - trace_len);
|
||||
|
||||
old_bucket = xchg(&smap->buckets[id], bucket);
|
||||
if (delete)
|
||||
old_bucket = bucket;
|
||||
else
|
||||
old_bucket = xchg(&smap->buckets[id], bucket);
|
||||
if (old_bucket)
|
||||
pcpu_freelist_push(&smap->freelist, &old_bucket->fnode);
|
||||
return 0;
|
||||
|
|
@ -754,6 +765,7 @@ const struct bpf_map_ops stack_trace_map_ops = {
|
|||
.map_free = stack_map_free,
|
||||
.map_get_next_key = stack_map_get_next_key,
|
||||
.map_lookup_elem = stack_map_lookup_elem,
|
||||
.map_lookup_and_delete_elem = stack_map_lookup_and_delete_elem,
|
||||
.map_update_elem = stack_map_update_elem,
|
||||
.map_delete_elem = stack_map_delete_elem,
|
||||
.map_check_btf = map_check_no_btf,
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
|
||||
*/
|
||||
#include <crypto/sha2.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/bpf-cgroup.h>
|
||||
#include <linux/bpf_trace.h>
|
||||
|
|
@ -38,6 +39,7 @@
|
|||
#include <linux/tracepoint.h>
|
||||
#include <linux/overflow.h>
|
||||
#include <linux/cookie.h>
|
||||
#include <linux/verification.h>
|
||||
|
||||
#include <net/netfilter/nf_bpf_link.h>
|
||||
#include <net/netkit.h>
|
||||
|
|
@ -318,7 +320,7 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value,
|
|||
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
|
||||
err = bpf_percpu_cgroup_storage_copy(map, key, value);
|
||||
} else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
|
||||
err = bpf_stackmap_copy(map, key, value);
|
||||
err = bpf_stackmap_extract(map, key, value, false);
|
||||
} else if (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map)) {
|
||||
err = bpf_fd_array_map_lookup_elem(map, key, value);
|
||||
} else if (IS_FD_HASH(map)) {
|
||||
|
|
@ -672,6 +674,7 @@ void btf_record_free(struct btf_record *rec)
|
|||
case BPF_TIMER:
|
||||
case BPF_REFCOUNT:
|
||||
case BPF_WORKQUEUE:
|
||||
case BPF_TASK_WORK:
|
||||
/* Nothing to release */
|
||||
break;
|
||||
default:
|
||||
|
|
@ -725,6 +728,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec)
|
|||
case BPF_TIMER:
|
||||
case BPF_REFCOUNT:
|
||||
case BPF_WORKQUEUE:
|
||||
case BPF_TASK_WORK:
|
||||
/* Nothing to acquire */
|
||||
break;
|
||||
default:
|
||||
|
|
@ -783,6 +787,13 @@ void bpf_obj_free_workqueue(const struct btf_record *rec, void *obj)
|
|||
bpf_wq_cancel_and_free(obj + rec->wq_off);
|
||||
}
|
||||
|
||||
void bpf_obj_free_task_work(const struct btf_record *rec, void *obj)
|
||||
{
|
||||
if (WARN_ON_ONCE(!btf_record_has_field(rec, BPF_TASK_WORK)))
|
||||
return;
|
||||
bpf_task_work_cancel_and_free(obj + rec->task_work_off);
|
||||
}
|
||||
|
||||
void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
|
||||
{
|
||||
const struct btf_field *fields;
|
||||
|
|
@ -807,6 +818,9 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
|
|||
case BPF_WORKQUEUE:
|
||||
bpf_wq_cancel_and_free(field_ptr);
|
||||
break;
|
||||
case BPF_TASK_WORK:
|
||||
bpf_task_work_cancel_and_free(field_ptr);
|
||||
break;
|
||||
case BPF_KPTR_UNREF:
|
||||
WRITE_ONCE(*(u64 *)field_ptr, 0);
|
||||
break;
|
||||
|
|
@ -860,6 +874,7 @@ static void bpf_map_free(struct bpf_map *map)
|
|||
* the free of values or special fields allocated from bpf memory
|
||||
* allocator.
|
||||
*/
|
||||
kfree(map->excl_prog_sha);
|
||||
migrate_disable();
|
||||
map->ops->map_free(map);
|
||||
migrate_enable();
|
||||
|
|
@ -905,7 +920,7 @@ static void bpf_map_free_in_work(struct bpf_map *map)
|
|||
/* Avoid spawning kworkers, since they all might contend
|
||||
* for the same mutex like slab_mutex.
|
||||
*/
|
||||
queue_work(system_unbound_wq, &map->work);
|
||||
queue_work(system_dfl_wq, &map->work);
|
||||
}
|
||||
|
||||
static void bpf_map_free_rcu_gp(struct rcu_head *rcu)
|
||||
|
|
@ -1237,7 +1252,8 @@ static int map_check_btf(struct bpf_map *map, struct bpf_token *token,
|
|||
|
||||
map->record = btf_parse_fields(btf, value_type,
|
||||
BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD |
|
||||
BPF_RB_ROOT | BPF_REFCOUNT | BPF_WORKQUEUE | BPF_UPTR,
|
||||
BPF_RB_ROOT | BPF_REFCOUNT | BPF_WORKQUEUE | BPF_UPTR |
|
||||
BPF_TASK_WORK,
|
||||
map->value_size);
|
||||
if (!IS_ERR_OR_NULL(map->record)) {
|
||||
int i;
|
||||
|
|
@ -1269,6 +1285,7 @@ static int map_check_btf(struct bpf_map *map, struct bpf_token *token,
|
|||
break;
|
||||
case BPF_TIMER:
|
||||
case BPF_WORKQUEUE:
|
||||
case BPF_TASK_WORK:
|
||||
if (map->map_type != BPF_MAP_TYPE_HASH &&
|
||||
map->map_type != BPF_MAP_TYPE_LRU_HASH &&
|
||||
map->map_type != BPF_MAP_TYPE_ARRAY) {
|
||||
|
|
@ -1338,9 +1355,9 @@ static bool bpf_net_capable(void)
|
|||
return capable(CAP_NET_ADMIN) || capable(CAP_SYS_ADMIN);
|
||||
}
|
||||
|
||||
#define BPF_MAP_CREATE_LAST_FIELD map_token_fd
|
||||
#define BPF_MAP_CREATE_LAST_FIELD excl_prog_hash_size
|
||||
/* called via syscall */
|
||||
static int map_create(union bpf_attr *attr, bool kernel)
|
||||
static int map_create(union bpf_attr *attr, bpfptr_t uattr)
|
||||
{
|
||||
const struct bpf_map_ops *ops;
|
||||
struct bpf_token *token = NULL;
|
||||
|
|
@ -1534,7 +1551,29 @@ static int map_create(union bpf_attr *attr, bool kernel)
|
|||
attr->btf_vmlinux_value_type_id;
|
||||
}
|
||||
|
||||
err = security_bpf_map_create(map, attr, token, kernel);
|
||||
if (attr->excl_prog_hash) {
|
||||
bpfptr_t uprog_hash = make_bpfptr(attr->excl_prog_hash, uattr.is_kernel);
|
||||
|
||||
if (attr->excl_prog_hash_size != SHA256_DIGEST_SIZE) {
|
||||
err = -EINVAL;
|
||||
goto free_map;
|
||||
}
|
||||
|
||||
map->excl_prog_sha = kzalloc(SHA256_DIGEST_SIZE, GFP_KERNEL);
|
||||
if (!map->excl_prog_sha) {
|
||||
err = -ENOMEM;
|
||||
goto free_map;
|
||||
}
|
||||
|
||||
if (copy_from_bpfptr(map->excl_prog_sha, uprog_hash, SHA256_DIGEST_SIZE)) {
|
||||
err = -EFAULT;
|
||||
goto free_map;
|
||||
}
|
||||
} else if (attr->excl_prog_hash_size) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
err = security_bpf_map_create(map, attr, token, uattr.is_kernel);
|
||||
if (err)
|
||||
goto free_map_sec;
|
||||
|
||||
|
|
@ -1627,7 +1666,8 @@ struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(bpf_map_inc_not_zero);
|
||||
|
||||
int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
|
||||
int __weak bpf_stackmap_extract(struct bpf_map *map, void *key, void *value,
|
||||
bool delete)
|
||||
{
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
|
|
@ -2158,7 +2198,8 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
|
|||
} else if (map->map_type == BPF_MAP_TYPE_HASH ||
|
||||
map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
|
||||
map->map_type == BPF_MAP_TYPE_LRU_HASH ||
|
||||
map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
|
||||
map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
|
||||
map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
|
||||
if (!bpf_map_is_offloaded(map)) {
|
||||
bpf_disable_instrumentation();
|
||||
rcu_read_lock();
|
||||
|
|
@ -2761,8 +2802,44 @@ static bool is_perfmon_prog_type(enum bpf_prog_type prog_type)
|
|||
}
|
||||
}
|
||||
|
||||
static int bpf_prog_verify_signature(struct bpf_prog *prog, union bpf_attr *attr,
|
||||
bool is_kernel)
|
||||
{
|
||||
bpfptr_t usig = make_bpfptr(attr->signature, is_kernel);
|
||||
struct bpf_dynptr_kern sig_ptr, insns_ptr;
|
||||
struct bpf_key *key = NULL;
|
||||
void *sig;
|
||||
int err = 0;
|
||||
|
||||
if (system_keyring_id_check(attr->keyring_id) == 0)
|
||||
key = bpf_lookup_system_key(attr->keyring_id);
|
||||
else
|
||||
key = bpf_lookup_user_key(attr->keyring_id, 0);
|
||||
|
||||
if (!key)
|
||||
return -EINVAL;
|
||||
|
||||
sig = kvmemdup_bpfptr(usig, attr->signature_size);
|
||||
if (IS_ERR(sig)) {
|
||||
bpf_key_put(key);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
bpf_dynptr_init(&sig_ptr, sig, BPF_DYNPTR_TYPE_LOCAL, 0,
|
||||
attr->signature_size);
|
||||
bpf_dynptr_init(&insns_ptr, prog->insnsi, BPF_DYNPTR_TYPE_LOCAL, 0,
|
||||
prog->len * sizeof(struct bpf_insn));
|
||||
|
||||
err = bpf_verify_pkcs7_signature((struct bpf_dynptr *)&insns_ptr,
|
||||
(struct bpf_dynptr *)&sig_ptr, key);
|
||||
|
||||
bpf_key_put(key);
|
||||
kvfree(sig);
|
||||
return err;
|
||||
}
|
||||
|
||||
/* last field in 'union bpf_attr' used by this command */
|
||||
#define BPF_PROG_LOAD_LAST_FIELD fd_array_cnt
|
||||
#define BPF_PROG_LOAD_LAST_FIELD keyring_id
|
||||
|
||||
static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
|
||||
{
|
||||
|
|
@ -2926,6 +3003,12 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
|
|||
/* eBPF programs must be GPL compatible to use GPL-ed functions */
|
||||
prog->gpl_compatible = license_is_gpl_compatible(license) ? 1 : 0;
|
||||
|
||||
if (attr->signature) {
|
||||
err = bpf_prog_verify_signature(prog, attr, uattr.is_kernel);
|
||||
if (err)
|
||||
goto free_prog;
|
||||
}
|
||||
|
||||
prog->orig_prog = NULL;
|
||||
prog->jited = 0;
|
||||
|
||||
|
|
@ -5161,6 +5244,9 @@ static int bpf_map_get_info_by_fd(struct file *file,
|
|||
info_len = min_t(u32, sizeof(info), info_len);
|
||||
|
||||
memset(&info, 0, sizeof(info));
|
||||
if (copy_from_user(&info, uinfo, info_len))
|
||||
return -EFAULT;
|
||||
|
||||
info.type = map->map_type;
|
||||
info.id = map->id;
|
||||
info.key_size = map->key_size;
|
||||
|
|
@ -5185,6 +5271,25 @@ static int bpf_map_get_info_by_fd(struct file *file,
|
|||
return err;
|
||||
}
|
||||
|
||||
if (info.hash) {
|
||||
char __user *uhash = u64_to_user_ptr(info.hash);
|
||||
|
||||
if (!map->ops->map_get_hash)
|
||||
return -EINVAL;
|
||||
|
||||
if (info.hash_size != SHA256_DIGEST_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
err = map->ops->map_get_hash(map, SHA256_DIGEST_SIZE, map->sha);
|
||||
if (err != 0)
|
||||
return err;
|
||||
|
||||
if (copy_to_user(uhash, map->sha, SHA256_DIGEST_SIZE) != 0)
|
||||
return -EFAULT;
|
||||
} else if (info.hash_size) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (copy_to_user(uinfo, &info, info_len) ||
|
||||
put_user(info_len, &uattr->info.info_len))
|
||||
return -EFAULT;
|
||||
|
|
@ -6008,7 +6113,7 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size)
|
|||
|
||||
switch (cmd) {
|
||||
case BPF_MAP_CREATE:
|
||||
err = map_create(&attr, uattr.is_kernel);
|
||||
err = map_create(&attr, uattr);
|
||||
break;
|
||||
case BPF_MAP_LOOKUP_ELEM:
|
||||
err = map_lookup_elem(&attr);
|
||||
|
|
|
|||
|
|
@ -116,31 +116,55 @@ struct tnum tnum_xor(struct tnum a, struct tnum b)
|
|||
return TNUM(v & ~mu, mu);
|
||||
}
|
||||
|
||||
/* Generate partial products by multiplying each bit in the multiplier (tnum a)
|
||||
* with the multiplicand (tnum b), and add the partial products after
|
||||
* appropriately bit-shifting them. Instead of directly performing tnum addition
|
||||
* on the generated partial products, equivalenty, decompose each partial
|
||||
* product into two tnums, consisting of the value-sum (acc_v) and the
|
||||
* mask-sum (acc_m) and then perform tnum addition on them. The following paper
|
||||
* explains the algorithm in more detail: https://arxiv.org/abs/2105.05398.
|
||||
/* Perform long multiplication, iterating through the bits in a using rshift:
|
||||
* - if LSB(a) is a known 0, keep current accumulator
|
||||
* - if LSB(a) is a known 1, add b to current accumulator
|
||||
* - if LSB(a) is unknown, take a union of the above cases.
|
||||
*
|
||||
* For example:
|
||||
*
|
||||
* acc_0: acc_1:
|
||||
*
|
||||
* 11 * -> 11 * -> 11 * -> union(0011, 1001) == x0x1
|
||||
* x1 01 11
|
||||
* ------ ------ ------
|
||||
* 11 11 11
|
||||
* xx 00 11
|
||||
* ------ ------ ------
|
||||
* ???? 0011 1001
|
||||
*/
|
||||
struct tnum tnum_mul(struct tnum a, struct tnum b)
|
||||
{
|
||||
u64 acc_v = a.value * b.value;
|
||||
struct tnum acc_m = TNUM(0, 0);
|
||||
struct tnum acc = TNUM(0, 0);
|
||||
|
||||
while (a.value || a.mask) {
|
||||
/* LSB of tnum a is a certain 1 */
|
||||
if (a.value & 1)
|
||||
acc_m = tnum_add(acc_m, TNUM(0, b.mask));
|
||||
acc = tnum_add(acc, b);
|
||||
/* LSB of tnum a is uncertain */
|
||||
else if (a.mask & 1)
|
||||
acc_m = tnum_add(acc_m, TNUM(0, b.value | b.mask));
|
||||
else if (a.mask & 1) {
|
||||
/* acc = tnum_union(acc_0, acc_1), where acc_0 and
|
||||
* acc_1 are partial accumulators for cases
|
||||
* LSB(a) = certain 0 and LSB(a) = certain 1.
|
||||
* acc_0 = acc + 0 * b = acc.
|
||||
* acc_1 = acc + 1 * b = tnum_add(acc, b).
|
||||
*/
|
||||
|
||||
acc = tnum_union(acc, tnum_add(acc, b));
|
||||
}
|
||||
/* Note: no case for LSB is certain 0 */
|
||||
a = tnum_rshift(a, 1);
|
||||
b = tnum_lshift(b, 1);
|
||||
}
|
||||
return tnum_add(TNUM(acc_v, 0), acc_m);
|
||||
return acc;
|
||||
}
|
||||
|
||||
bool tnum_overlap(struct tnum a, struct tnum b)
|
||||
{
|
||||
u64 mu;
|
||||
|
||||
mu = ~a.mask & ~b.mask;
|
||||
return (a.value & mu) == (b.value & mu);
|
||||
}
|
||||
|
||||
/* Note that if a and b disagree - i.e. one has a 'known 1' where the other has
|
||||
|
|
@ -155,6 +179,19 @@ struct tnum tnum_intersect(struct tnum a, struct tnum b)
|
|||
return TNUM(v & ~mu, mu);
|
||||
}
|
||||
|
||||
/* Returns a tnum with the uncertainty from both a and b, and in addition, new
|
||||
* uncertainty at any position that a and b disagree. This represents a
|
||||
* superset of the union of the concrete sets of both a and b. Despite the
|
||||
* overapproximation, it is optimal.
|
||||
*/
|
||||
struct tnum tnum_union(struct tnum a, struct tnum b)
|
||||
{
|
||||
u64 v = a.value & b.value;
|
||||
u64 mu = (a.value ^ b.value) | a.mask | b.mask;
|
||||
|
||||
return TNUM(v & ~mu, mu);
|
||||
}
|
||||
|
||||
struct tnum tnum_cast(struct tnum a, u8 size)
|
||||
{
|
||||
a.value &= (1ULL << (size * 8)) - 1;
|
||||
|
|
|
|||
|
|
@ -899,8 +899,7 @@ static __always_inline u64 notrace bpf_prog_start_time(void)
|
|||
static u64 notrace __bpf_prog_enter_recur(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx)
|
||||
__acquires(RCU)
|
||||
{
|
||||
rcu_read_lock();
|
||||
migrate_disable();
|
||||
rcu_read_lock_dont_migrate();
|
||||
|
||||
run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
|
||||
|
||||
|
|
@ -949,8 +948,7 @@ static void notrace __bpf_prog_exit_recur(struct bpf_prog *prog, u64 start,
|
|||
|
||||
update_prog_stats(prog, start);
|
||||
this_cpu_dec(*(prog->active));
|
||||
migrate_enable();
|
||||
rcu_read_unlock();
|
||||
rcu_read_unlock_migrate();
|
||||
}
|
||||
|
||||
static u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog,
|
||||
|
|
@ -960,8 +958,7 @@ static u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog,
|
|||
/* Runtime stats are exported via actual BPF_LSM_CGROUP
|
||||
* programs, not the shims.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
migrate_disable();
|
||||
rcu_read_lock_dont_migrate();
|
||||
|
||||
run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
|
||||
|
||||
|
|
@ -974,8 +971,7 @@ static void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start,
|
|||
{
|
||||
bpf_reset_run_ctx(run_ctx->saved_run_ctx);
|
||||
|
||||
migrate_enable();
|
||||
rcu_read_unlock();
|
||||
rcu_read_unlock_migrate();
|
||||
}
|
||||
|
||||
u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog,
|
||||
|
|
@ -1033,8 +1029,7 @@ static u64 notrace __bpf_prog_enter(struct bpf_prog *prog,
|
|||
struct bpf_tramp_run_ctx *run_ctx)
|
||||
__acquires(RCU)
|
||||
{
|
||||
rcu_read_lock();
|
||||
migrate_disable();
|
||||
rcu_read_lock_dont_migrate();
|
||||
|
||||
run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
|
||||
|
||||
|
|
@ -1048,8 +1043,7 @@ static void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start,
|
|||
bpf_reset_run_ctx(run_ctx->saved_run_ctx);
|
||||
|
||||
update_prog_stats(prog, start);
|
||||
migrate_enable();
|
||||
rcu_read_unlock();
|
||||
rcu_read_unlock_migrate();
|
||||
}
|
||||
|
||||
void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr)
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -6481,15 +6481,15 @@ void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen)
|
|||
}
|
||||
|
||||
/*
|
||||
* cgroup_get_from_id : get the cgroup associated with cgroup id
|
||||
* __cgroup_get_from_id : get the cgroup associated with cgroup id
|
||||
* @id: cgroup id
|
||||
* On success return the cgrp or ERR_PTR on failure
|
||||
* Only cgroups within current task's cgroup NS are valid.
|
||||
* There are no cgroup NS restrictions.
|
||||
*/
|
||||
struct cgroup *cgroup_get_from_id(u64 id)
|
||||
struct cgroup *__cgroup_get_from_id(u64 id)
|
||||
{
|
||||
struct kernfs_node *kn;
|
||||
struct cgroup *cgrp, *root_cgrp;
|
||||
struct cgroup *cgrp;
|
||||
|
||||
kn = kernfs_find_and_get_node_by_id(cgrp_dfl_root.kf_root, id);
|
||||
if (!kn)
|
||||
|
|
@ -6511,6 +6511,22 @@ struct cgroup *cgroup_get_from_id(u64 id)
|
|||
|
||||
if (!cgrp)
|
||||
return ERR_PTR(-ENOENT);
|
||||
return cgrp;
|
||||
}
|
||||
|
||||
/*
|
||||
* cgroup_get_from_id : get the cgroup associated with cgroup id
|
||||
* @id: cgroup id
|
||||
* On success return the cgrp or ERR_PTR on failure
|
||||
* Only cgroups within current task's cgroup NS are valid.
|
||||
*/
|
||||
struct cgroup *cgroup_get_from_id(u64 id)
|
||||
{
|
||||
struct cgroup *cgrp, *root_cgrp;
|
||||
|
||||
cgrp = __cgroup_get_from_id(id);
|
||||
if (IS_ERR(cgrp))
|
||||
return cgrp;
|
||||
|
||||
root_cgrp = current_cgns_cgroup_dfl();
|
||||
if (!cgroup_is_descendant(cgrp, root_cgrp)) {
|
||||
|
|
|
|||
|
|
@ -11245,6 +11245,10 @@ static int __perf_event_set_bpf_prog(struct perf_event *event,
|
|||
if (prog->kprobe_override && !is_kprobe)
|
||||
return -EINVAL;
|
||||
|
||||
/* Writing to context allowed only for uprobes. */
|
||||
if (prog->aux->kprobe_write_ctx && !is_uprobe)
|
||||
return -EINVAL;
|
||||
|
||||
if (is_tracepoint || is_syscall_tp) {
|
||||
int off = trace_event_get_offsets(event->tp_event);
|
||||
|
||||
|
|
|
|||
|
|
@ -2765,6 +2765,13 @@ static void handle_swbp(struct pt_regs *regs)
|
|||
|
||||
handler_chain(uprobe, regs);
|
||||
|
||||
/*
|
||||
* If user decided to take execution elsewhere, it makes little sense
|
||||
* to execute the original instruction, so let's skip it.
|
||||
*/
|
||||
if (instruction_pointer(regs) != bp_vaddr)
|
||||
goto out;
|
||||
|
||||
/* Try to optimize after first hit. */
|
||||
arch_uprobe_optimize(&uprobe->arch, bp_vaddr);
|
||||
|
||||
|
|
|
|||
|
|
@ -22,7 +22,6 @@
|
|||
#include <linux/bsearch.h>
|
||||
#include <linux/sort.h>
|
||||
#include <linux/key.h>
|
||||
#include <linux/verification.h>
|
||||
#include <linux/namei.h>
|
||||
|
||||
#include <net/bpf_sk_storage.h>
|
||||
|
|
@ -1241,188 +1240,6 @@ static const struct bpf_func_proto bpf_get_func_arg_cnt_proto = {
|
|||
.arg1_type = ARG_PTR_TO_CTX,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_KEYS
|
||||
__bpf_kfunc_start_defs();
|
||||
|
||||
/**
|
||||
* bpf_lookup_user_key - lookup a key by its serial
|
||||
* @serial: key handle serial number
|
||||
* @flags: lookup-specific flags
|
||||
*
|
||||
* Search a key with a given *serial* and the provided *flags*.
|
||||
* If found, increment the reference count of the key by one, and
|
||||
* return it in the bpf_key structure.
|
||||
*
|
||||
* The bpf_key structure must be passed to bpf_key_put() when done
|
||||
* with it, so that the key reference count is decremented and the
|
||||
* bpf_key structure is freed.
|
||||
*
|
||||
* Permission checks are deferred to the time the key is used by
|
||||
* one of the available key-specific kfuncs.
|
||||
*
|
||||
* Set *flags* with KEY_LOOKUP_CREATE, to attempt creating a requested
|
||||
* special keyring (e.g. session keyring), if it doesn't yet exist.
|
||||
* Set *flags* with KEY_LOOKUP_PARTIAL, to lookup a key without waiting
|
||||
* for the key construction, and to retrieve uninstantiated keys (keys
|
||||
* without data attached to them).
|
||||
*
|
||||
* Return: a bpf_key pointer with a valid key pointer if the key is found, a
|
||||
* NULL pointer otherwise.
|
||||
*/
|
||||
__bpf_kfunc struct bpf_key *bpf_lookup_user_key(s32 serial, u64 flags)
|
||||
{
|
||||
key_ref_t key_ref;
|
||||
struct bpf_key *bkey;
|
||||
|
||||
if (flags & ~KEY_LOOKUP_ALL)
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* Permission check is deferred until the key is used, as the
|
||||
* intent of the caller is unknown here.
|
||||
*/
|
||||
key_ref = lookup_user_key(serial, flags, KEY_DEFER_PERM_CHECK);
|
||||
if (IS_ERR(key_ref))
|
||||
return NULL;
|
||||
|
||||
bkey = kmalloc(sizeof(*bkey), GFP_KERNEL);
|
||||
if (!bkey) {
|
||||
key_put(key_ref_to_ptr(key_ref));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bkey->key = key_ref_to_ptr(key_ref);
|
||||
bkey->has_ref = true;
|
||||
|
||||
return bkey;
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_lookup_system_key - lookup a key by a system-defined ID
|
||||
* @id: key ID
|
||||
*
|
||||
* Obtain a bpf_key structure with a key pointer set to the passed key ID.
|
||||
* The key pointer is marked as invalid, to prevent bpf_key_put() from
|
||||
* attempting to decrement the key reference count on that pointer. The key
|
||||
* pointer set in such way is currently understood only by
|
||||
* verify_pkcs7_signature().
|
||||
*
|
||||
* Set *id* to one of the values defined in include/linux/verification.h:
|
||||
* 0 for the primary keyring (immutable keyring of system keys);
|
||||
* VERIFY_USE_SECONDARY_KEYRING for both the primary and secondary keyring
|
||||
* (where keys can be added only if they are vouched for by existing keys
|
||||
* in those keyrings); VERIFY_USE_PLATFORM_KEYRING for the platform
|
||||
* keyring (primarily used by the integrity subsystem to verify a kexec'ed
|
||||
* kerned image and, possibly, the initramfs signature).
|
||||
*
|
||||
* Return: a bpf_key pointer with an invalid key pointer set from the
|
||||
* pre-determined ID on success, a NULL pointer otherwise
|
||||
*/
|
||||
__bpf_kfunc struct bpf_key *bpf_lookup_system_key(u64 id)
|
||||
{
|
||||
struct bpf_key *bkey;
|
||||
|
||||
if (system_keyring_id_check(id) < 0)
|
||||
return NULL;
|
||||
|
||||
bkey = kmalloc(sizeof(*bkey), GFP_ATOMIC);
|
||||
if (!bkey)
|
||||
return NULL;
|
||||
|
||||
bkey->key = (struct key *)(unsigned long)id;
|
||||
bkey->has_ref = false;
|
||||
|
||||
return bkey;
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_key_put - decrement key reference count if key is valid and free bpf_key
|
||||
* @bkey: bpf_key structure
|
||||
*
|
||||
* Decrement the reference count of the key inside *bkey*, if the pointer
|
||||
* is valid, and free *bkey*.
|
||||
*/
|
||||
__bpf_kfunc void bpf_key_put(struct bpf_key *bkey)
|
||||
{
|
||||
if (bkey->has_ref)
|
||||
key_put(bkey->key);
|
||||
|
||||
kfree(bkey);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SYSTEM_DATA_VERIFICATION
|
||||
/**
|
||||
* bpf_verify_pkcs7_signature - verify a PKCS#7 signature
|
||||
* @data_p: data to verify
|
||||
* @sig_p: signature of the data
|
||||
* @trusted_keyring: keyring with keys trusted for signature verification
|
||||
*
|
||||
* Verify the PKCS#7 signature *sig_ptr* against the supplied *data_ptr*
|
||||
* with keys in a keyring referenced by *trusted_keyring*.
|
||||
*
|
||||
* Return: 0 on success, a negative value on error.
|
||||
*/
|
||||
__bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_p,
|
||||
struct bpf_dynptr *sig_p,
|
||||
struct bpf_key *trusted_keyring)
|
||||
{
|
||||
struct bpf_dynptr_kern *data_ptr = (struct bpf_dynptr_kern *)data_p;
|
||||
struct bpf_dynptr_kern *sig_ptr = (struct bpf_dynptr_kern *)sig_p;
|
||||
const void *data, *sig;
|
||||
u32 data_len, sig_len;
|
||||
int ret;
|
||||
|
||||
if (trusted_keyring->has_ref) {
|
||||
/*
|
||||
* Do the permission check deferred in bpf_lookup_user_key().
|
||||
* See bpf_lookup_user_key() for more details.
|
||||
*
|
||||
* A call to key_task_permission() here would be redundant, as
|
||||
* it is already done by keyring_search() called by
|
||||
* find_asymmetric_key().
|
||||
*/
|
||||
ret = key_validate(trusted_keyring->key);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
|
||||
data_len = __bpf_dynptr_size(data_ptr);
|
||||
data = __bpf_dynptr_data(data_ptr, data_len);
|
||||
sig_len = __bpf_dynptr_size(sig_ptr);
|
||||
sig = __bpf_dynptr_data(sig_ptr, sig_len);
|
||||
|
||||
return verify_pkcs7_signature(data, data_len, sig, sig_len,
|
||||
trusted_keyring->key,
|
||||
VERIFYING_UNSPECIFIED_SIGNATURE, NULL,
|
||||
NULL);
|
||||
}
|
||||
#endif /* CONFIG_SYSTEM_DATA_VERIFICATION */
|
||||
|
||||
__bpf_kfunc_end_defs();
|
||||
|
||||
BTF_KFUNCS_START(key_sig_kfunc_set)
|
||||
BTF_ID_FLAGS(func, bpf_lookup_user_key, KF_ACQUIRE | KF_RET_NULL | KF_SLEEPABLE)
|
||||
BTF_ID_FLAGS(func, bpf_lookup_system_key, KF_ACQUIRE | KF_RET_NULL)
|
||||
BTF_ID_FLAGS(func, bpf_key_put, KF_RELEASE)
|
||||
#ifdef CONFIG_SYSTEM_DATA_VERIFICATION
|
||||
BTF_ID_FLAGS(func, bpf_verify_pkcs7_signature, KF_SLEEPABLE)
|
||||
#endif
|
||||
BTF_KFUNCS_END(key_sig_kfunc_set)
|
||||
|
||||
static const struct btf_kfunc_id_set bpf_key_sig_kfunc_set = {
|
||||
.owner = THIS_MODULE,
|
||||
.set = &key_sig_kfunc_set,
|
||||
};
|
||||
|
||||
static int __init bpf_key_sig_kfuncs_init(void)
|
||||
{
|
||||
return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING,
|
||||
&bpf_key_sig_kfunc_set);
|
||||
}
|
||||
|
||||
late_initcall(bpf_key_sig_kfuncs_init);
|
||||
#endif /* CONFIG_KEYS */
|
||||
|
||||
static const struct bpf_func_proto *
|
||||
bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
{
|
||||
|
|
@ -1521,8 +1338,6 @@ static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type
|
|||
{
|
||||
if (off < 0 || off >= sizeof(struct pt_regs))
|
||||
return false;
|
||||
if (type != BPF_READ)
|
||||
return false;
|
||||
if (off % size != 0)
|
||||
return false;
|
||||
/*
|
||||
|
|
@ -1532,6 +1347,9 @@ static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type
|
|||
if (off + size > sizeof(struct pt_regs))
|
||||
return false;
|
||||
|
||||
if (type == BPF_WRITE)
|
||||
prog->aux->kprobe_write_ctx = true;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -2728,20 +2546,25 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link,
|
|||
struct pt_regs *regs;
|
||||
int err;
|
||||
|
||||
/*
|
||||
* graph tracer framework ensures we won't migrate, so there is no need
|
||||
* to use migrate_disable for bpf_prog_run again. The check here just for
|
||||
* __this_cpu_inc_return.
|
||||
*/
|
||||
cant_sleep();
|
||||
|
||||
if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
|
||||
bpf_prog_inc_misses_counter(link->link.prog);
|
||||
err = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
migrate_disable();
|
||||
rcu_read_lock();
|
||||
regs = ftrace_partial_regs(fregs, bpf_kprobe_multi_pt_regs_ptr());
|
||||
old_run_ctx = bpf_set_run_ctx(&run_ctx.session_ctx.run_ctx);
|
||||
err = bpf_prog_run(link->link.prog, regs);
|
||||
bpf_reset_run_ctx(old_run_ctx);
|
||||
rcu_read_unlock();
|
||||
migrate_enable();
|
||||
|
||||
out:
|
||||
__this_cpu_dec(bpf_prog_active);
|
||||
|
|
@ -2913,6 +2736,10 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
|
|||
if (!is_kprobe_multi(prog))
|
||||
return -EINVAL;
|
||||
|
||||
/* Writing to context is not allowed for kprobes. */
|
||||
if (prog->aux->kprobe_write_ctx)
|
||||
return -EINVAL;
|
||||
|
||||
flags = attr->link_create.kprobe_multi.flags;
|
||||
if (flags & ~BPF_F_KPROBE_MULTI_RETURN)
|
||||
return -EINVAL;
|
||||
|
|
|
|||
|
|
@ -524,27 +524,27 @@ __bpf_kfunc int bpf_fentry_test1(int a)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(bpf_fentry_test1);
|
||||
|
||||
int noinline bpf_fentry_test2(int a, u64 b)
|
||||
noinline int bpf_fentry_test2(int a, u64 b)
|
||||
{
|
||||
return a + b;
|
||||
}
|
||||
|
||||
int noinline bpf_fentry_test3(char a, int b, u64 c)
|
||||
noinline int bpf_fentry_test3(char a, int b, u64 c)
|
||||
{
|
||||
return a + b + c;
|
||||
}
|
||||
|
||||
int noinline bpf_fentry_test4(void *a, char b, int c, u64 d)
|
||||
noinline int bpf_fentry_test4(void *a, char b, int c, u64 d)
|
||||
{
|
||||
return (long)a + b + c + d;
|
||||
}
|
||||
|
||||
int noinline bpf_fentry_test5(u64 a, void *b, short c, int d, u64 e)
|
||||
noinline int bpf_fentry_test5(u64 a, void *b, short c, int d, u64 e)
|
||||
{
|
||||
return a + (long)b + c + d + e;
|
||||
}
|
||||
|
||||
int noinline bpf_fentry_test6(u64 a, void *b, short c, int d, void *e, u64 f)
|
||||
noinline int bpf_fentry_test6(u64 a, void *b, short c, int d, void *e, u64 f)
|
||||
{
|
||||
return a + (long)b + c + d + (long)e + f;
|
||||
}
|
||||
|
|
@ -553,13 +553,13 @@ struct bpf_fentry_test_t {
|
|||
struct bpf_fentry_test_t *a;
|
||||
};
|
||||
|
||||
int noinline bpf_fentry_test7(struct bpf_fentry_test_t *arg)
|
||||
noinline int bpf_fentry_test7(struct bpf_fentry_test_t *arg)
|
||||
{
|
||||
asm volatile ("": "+r"(arg));
|
||||
asm volatile ("" : "+r"(arg));
|
||||
return (long)arg;
|
||||
}
|
||||
|
||||
int noinline bpf_fentry_test8(struct bpf_fentry_test_t *arg)
|
||||
noinline int bpf_fentry_test8(struct bpf_fentry_test_t *arg)
|
||||
{
|
||||
return (long)arg->a;
|
||||
}
|
||||
|
|
@ -569,12 +569,12 @@ __bpf_kfunc u32 bpf_fentry_test9(u32 *a)
|
|||
return *a;
|
||||
}
|
||||
|
||||
int noinline bpf_fentry_test10(const void *a)
|
||||
noinline int bpf_fentry_test10(const void *a)
|
||||
{
|
||||
return (long)a;
|
||||
}
|
||||
|
||||
void noinline bpf_fentry_test_sinfo(struct skb_shared_info *sinfo)
|
||||
noinline void bpf_fentry_test_sinfo(struct skb_shared_info *sinfo)
|
||||
{
|
||||
}
|
||||
|
||||
|
|
@ -598,7 +598,7 @@ __bpf_kfunc int bpf_modify_return_test_tp(int nonce)
|
|||
return nonce;
|
||||
}
|
||||
|
||||
int noinline bpf_fentry_shadow_test(int a)
|
||||
noinline int bpf_fentry_shadow_test(int a)
|
||||
{
|
||||
return a + 1;
|
||||
}
|
||||
|
|
@ -665,7 +665,7 @@ static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size,
|
|||
void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
|
||||
void *data;
|
||||
|
||||
if (user_size < ETH_HLEN || user_size > PAGE_SIZE - headroom - tailroom)
|
||||
if (user_size > PAGE_SIZE - headroom - tailroom)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
size = SKB_DATA_ALIGN(size);
|
||||
|
|
@ -1001,6 +1001,9 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
|
|||
kattr->test.cpu || kattr->test.batch_size)
|
||||
return -EINVAL;
|
||||
|
||||
if (size < ETH_HLEN)
|
||||
return -EINVAL;
|
||||
|
||||
data = bpf_test_init(kattr, kattr->test.data_size_in,
|
||||
size, NET_SKB_PAD + NET_IP_ALIGN,
|
||||
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
|
||||
|
|
@ -1207,9 +1210,9 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
|
|||
{
|
||||
bool do_live = (kattr->test.flags & BPF_F_TEST_XDP_LIVE_FRAMES);
|
||||
u32 tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
|
||||
u32 retval = 0, meta_sz = 0, duration, max_linear_sz, size;
|
||||
u32 linear_sz = kattr->test.data_size_in;
|
||||
u32 batch_size = kattr->test.batch_size;
|
||||
u32 retval = 0, duration, max_data_sz;
|
||||
u32 size = kattr->test.data_size_in;
|
||||
u32 headroom = XDP_PACKET_HEADROOM;
|
||||
u32 repeat = kattr->test.repeat;
|
||||
struct netdev_rx_queue *rxqueue;
|
||||
|
|
@ -1246,39 +1249,45 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
|
|||
|
||||
if (ctx) {
|
||||
/* There can't be user provided data before the meta data */
|
||||
if (ctx->data_meta || ctx->data_end != size ||
|
||||
if (ctx->data_meta || ctx->data_end > kattr->test.data_size_in ||
|
||||
ctx->data > ctx->data_end ||
|
||||
unlikely(xdp_metalen_invalid(ctx->data)) ||
|
||||
(do_live && (kattr->test.data_out || kattr->test.ctx_out)))
|
||||
goto free_ctx;
|
||||
/* Meta data is allocated from the headroom */
|
||||
headroom -= ctx->data;
|
||||
|
||||
meta_sz = ctx->data;
|
||||
linear_sz = ctx->data_end;
|
||||
}
|
||||
|
||||
max_data_sz = PAGE_SIZE - headroom - tailroom;
|
||||
if (size > max_data_sz) {
|
||||
/* disallow live data mode for jumbo frames */
|
||||
if (do_live)
|
||||
goto free_ctx;
|
||||
size = max_data_sz;
|
||||
}
|
||||
max_linear_sz = PAGE_SIZE - headroom - tailroom;
|
||||
linear_sz = min_t(u32, linear_sz, max_linear_sz);
|
||||
|
||||
data = bpf_test_init(kattr, size, max_data_sz, headroom, tailroom);
|
||||
/* disallow live data mode for jumbo frames */
|
||||
if (do_live && kattr->test.data_size_in > linear_sz)
|
||||
goto free_ctx;
|
||||
|
||||
if (kattr->test.data_size_in - meta_sz < ETH_HLEN)
|
||||
return -EINVAL;
|
||||
|
||||
data = bpf_test_init(kattr, linear_sz, max_linear_sz, headroom, tailroom);
|
||||
if (IS_ERR(data)) {
|
||||
ret = PTR_ERR(data);
|
||||
goto free_ctx;
|
||||
}
|
||||
|
||||
rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0);
|
||||
rxqueue->xdp_rxq.frag_size = headroom + max_data_sz + tailroom;
|
||||
rxqueue->xdp_rxq.frag_size = PAGE_SIZE;
|
||||
xdp_init_buff(&xdp, rxqueue->xdp_rxq.frag_size, &rxqueue->xdp_rxq);
|
||||
xdp_prepare_buff(&xdp, data, headroom, size, true);
|
||||
xdp_prepare_buff(&xdp, data, headroom, linear_sz, true);
|
||||
sinfo = xdp_get_shared_info_from_buff(&xdp);
|
||||
|
||||
ret = xdp_convert_md_to_buff(ctx, &xdp);
|
||||
if (ret)
|
||||
goto free_data;
|
||||
|
||||
size = linear_sz;
|
||||
if (unlikely(kattr->test.data_size_in > size)) {
|
||||
void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
|
||||
|
||||
|
|
|
|||
|
|
@ -4153,34 +4153,45 @@ static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void bpf_xdp_shrink_data_zc(struct xdp_buff *xdp, int shrink,
|
||||
enum xdp_mem_type mem_type, bool release)
|
||||
static struct xdp_buff *bpf_xdp_shrink_data_zc(struct xdp_buff *xdp, int shrink,
|
||||
bool tail, bool release)
|
||||
{
|
||||
struct xdp_buff *zc_frag = xsk_buff_get_tail(xdp);
|
||||
struct xdp_buff *zc_frag = tail ? xsk_buff_get_tail(xdp) :
|
||||
xsk_buff_get_head(xdp);
|
||||
|
||||
if (release) {
|
||||
xsk_buff_del_tail(zc_frag);
|
||||
__xdp_return(0, mem_type, false, zc_frag);
|
||||
xsk_buff_del_frag(zc_frag);
|
||||
} else {
|
||||
zc_frag->data_end -= shrink;
|
||||
if (tail)
|
||||
zc_frag->data_end -= shrink;
|
||||
else
|
||||
zc_frag->data += shrink;
|
||||
}
|
||||
|
||||
return zc_frag;
|
||||
}
|
||||
|
||||
static bool bpf_xdp_shrink_data(struct xdp_buff *xdp, skb_frag_t *frag,
|
||||
int shrink)
|
||||
int shrink, bool tail)
|
||||
{
|
||||
enum xdp_mem_type mem_type = xdp->rxq->mem.type;
|
||||
bool release = skb_frag_size(frag) == shrink;
|
||||
netmem_ref netmem = skb_frag_netmem(frag);
|
||||
struct xdp_buff *zc_frag = NULL;
|
||||
|
||||
if (mem_type == MEM_TYPE_XSK_BUFF_POOL) {
|
||||
bpf_xdp_shrink_data_zc(xdp, shrink, mem_type, release);
|
||||
goto out;
|
||||
netmem = 0;
|
||||
zc_frag = bpf_xdp_shrink_data_zc(xdp, shrink, tail, release);
|
||||
}
|
||||
|
||||
if (release)
|
||||
__xdp_return(skb_frag_netmem(frag), mem_type, false, NULL);
|
||||
if (release) {
|
||||
__xdp_return(netmem, mem_type, false, zc_frag);
|
||||
} else {
|
||||
if (!tail)
|
||||
skb_frag_off_add(frag, shrink);
|
||||
skb_frag_size_sub(frag, shrink);
|
||||
}
|
||||
|
||||
out:
|
||||
return release;
|
||||
}
|
||||
|
||||
|
|
@ -4198,18 +4209,15 @@ static int bpf_xdp_frags_shrink_tail(struct xdp_buff *xdp, int offset)
|
|||
|
||||
len_free += shrink;
|
||||
offset -= shrink;
|
||||
if (bpf_xdp_shrink_data(xdp, frag, shrink)) {
|
||||
if (bpf_xdp_shrink_data(xdp, frag, shrink, true))
|
||||
n_frags_free++;
|
||||
} else {
|
||||
skb_frag_size_sub(frag, shrink);
|
||||
break;
|
||||
}
|
||||
}
|
||||
sinfo->nr_frags -= n_frags_free;
|
||||
sinfo->xdp_frags_size -= len_free;
|
||||
|
||||
if (unlikely(!sinfo->nr_frags)) {
|
||||
xdp_buff_clear_frags_flag(xdp);
|
||||
xdp_buff_clear_frag_pfmemalloc(xdp);
|
||||
xdp->data_end -= offset;
|
||||
}
|
||||
|
||||
|
|
@ -7431,6 +7439,8 @@ u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
|
|||
offsetof(struct xdp_sock, FIELD)); \
|
||||
} while (0)
|
||||
|
||||
BTF_TYPE_EMIT(struct bpf_xdp_sock);
|
||||
|
||||
switch (si->off) {
|
||||
case offsetof(struct bpf_xdp_sock, queue_id):
|
||||
BPF_XDP_SOCK_GET(queue_id);
|
||||
|
|
@ -9284,13 +9294,17 @@ static bool sock_addr_is_valid_access(int off, int size,
|
|||
return false;
|
||||
info->reg_type = PTR_TO_SOCKET;
|
||||
break;
|
||||
default:
|
||||
if (type == BPF_READ) {
|
||||
if (size != size_default)
|
||||
return false;
|
||||
} else {
|
||||
case bpf_ctx_range(struct bpf_sock_addr, user_family):
|
||||
case bpf_ctx_range(struct bpf_sock_addr, family):
|
||||
case bpf_ctx_range(struct bpf_sock_addr, type):
|
||||
case bpf_ctx_range(struct bpf_sock_addr, protocol):
|
||||
if (type != BPF_READ)
|
||||
return false;
|
||||
}
|
||||
if (size != size_default)
|
||||
return false;
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
@ -11990,6 +12004,16 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
|||
return func;
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_skb_meta_pointer() - Gets a mutable pointer within the skb metadata area.
|
||||
* @skb: socket buffer carrying the metadata
|
||||
* @offset: offset into the metadata area, must be <= skb_metadata_len()
|
||||
*/
|
||||
void *bpf_skb_meta_pointer(struct sk_buff *skb, u32 offset)
|
||||
{
|
||||
return skb_metadata_end(skb) - skb_metadata_len(skb) + offset;
|
||||
}
|
||||
|
||||
__bpf_kfunc_start_defs();
|
||||
__bpf_kfunc int bpf_dynptr_from_skb(struct __sk_buff *s, u64 flags,
|
||||
struct bpf_dynptr *ptr__uninit)
|
||||
|
|
@ -12007,6 +12031,42 @@ __bpf_kfunc int bpf_dynptr_from_skb(struct __sk_buff *s, u64 flags,
|
|||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_dynptr_from_skb_meta() - Initialize a dynptr to the skb metadata area.
|
||||
* @skb_: socket buffer carrying the metadata
|
||||
* @flags: future use, must be zero
|
||||
* @ptr__uninit: dynptr to initialize
|
||||
*
|
||||
* Set up a dynptr for access to the metadata area earlier allocated from the
|
||||
* XDP context with bpf_xdp_adjust_meta(). Serves as an alternative to
|
||||
* &__sk_buff->data_meta.
|
||||
*
|
||||
* If passed @skb_ is a clone which shares the data with the original, the
|
||||
* dynptr will be read-only. This limitation may be lifted in the future.
|
||||
*
|
||||
* Return:
|
||||
* * %0 - dynptr ready to use
|
||||
* * %-EINVAL - invalid flags, dynptr set to null
|
||||
*/
|
||||
__bpf_kfunc int bpf_dynptr_from_skb_meta(struct __sk_buff *skb_, u64 flags,
|
||||
struct bpf_dynptr *ptr__uninit)
|
||||
{
|
||||
struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)ptr__uninit;
|
||||
struct sk_buff *skb = (struct sk_buff *)skb_;
|
||||
|
||||
if (flags) {
|
||||
bpf_dynptr_set_null(ptr);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
bpf_dynptr_init(ptr, skb, BPF_DYNPTR_TYPE_SKB_META, 0, skb_metadata_len(skb));
|
||||
|
||||
if (skb_cloned(skb))
|
||||
bpf_dynptr_set_rdonly(ptr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
__bpf_kfunc int bpf_dynptr_from_xdp(struct xdp_md *x, u64 flags,
|
||||
struct bpf_dynptr *ptr__uninit)
|
||||
{
|
||||
|
|
@ -12160,6 +12220,98 @@ __bpf_kfunc int bpf_sock_ops_enable_tx_tstamp(struct bpf_sock_ops_kern *skops,
|
|||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_xdp_pull_data() - Pull in non-linear xdp data.
|
||||
* @x: &xdp_md associated with the XDP buffer
|
||||
* @len: length of data to be made directly accessible in the linear part
|
||||
*
|
||||
* Pull in data in case the XDP buffer associated with @x is non-linear and
|
||||
* not all @len are in the linear data area.
|
||||
*
|
||||
* Direct packet access allows reading and writing linear XDP data through
|
||||
* packet pointers (i.e., &xdp_md->data + offsets). The amount of data which
|
||||
* ends up in the linear part of the xdp_buff depends on the NIC and its
|
||||
* configuration. When a frag-capable XDP program wants to directly access
|
||||
* headers that may be in the non-linear area, call this kfunc to make sure
|
||||
* the data is available in the linear area. Alternatively, use dynptr or
|
||||
* bpf_xdp_{load,store}_bytes() to access data without pulling.
|
||||
*
|
||||
* This kfunc can also be used with bpf_xdp_adjust_head() to decapsulate
|
||||
* headers in the non-linear data area.
|
||||
*
|
||||
* A call to this kfunc may reduce headroom. If there is not enough tailroom
|
||||
* in the linear data area, metadata and data will be shifted down.
|
||||
*
|
||||
* A call to this kfunc is susceptible to change the buffer geometry.
|
||||
* Therefore, at load time, all checks on pointers previously done by the
|
||||
* verifier are invalidated and must be performed again, if the kfunc is used
|
||||
* in combination with direct packet access.
|
||||
*
|
||||
* Return:
|
||||
* * %0 - success
|
||||
* * %-EINVAL - invalid len
|
||||
*/
|
||||
__bpf_kfunc int bpf_xdp_pull_data(struct xdp_md *x, u32 len)
|
||||
{
|
||||
struct xdp_buff *xdp = (struct xdp_buff *)x;
|
||||
struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
|
||||
int i, delta, shift, headroom, tailroom, n_frags_free = 0;
|
||||
void *data_hard_end = xdp_data_hard_end(xdp);
|
||||
int data_len = xdp->data_end - xdp->data;
|
||||
void *start;
|
||||
|
||||
if (len <= data_len)
|
||||
return 0;
|
||||
|
||||
if (unlikely(len > xdp_get_buff_len(xdp)))
|
||||
return -EINVAL;
|
||||
|
||||
start = xdp_data_meta_unsupported(xdp) ? xdp->data : xdp->data_meta;
|
||||
|
||||
headroom = start - xdp->data_hard_start - sizeof(struct xdp_frame);
|
||||
tailroom = data_hard_end - xdp->data_end;
|
||||
|
||||
delta = len - data_len;
|
||||
if (unlikely(delta > tailroom + headroom))
|
||||
return -EINVAL;
|
||||
|
||||
shift = delta - tailroom;
|
||||
if (shift > 0) {
|
||||
memmove(start - shift, start, xdp->data_end - start);
|
||||
|
||||
xdp->data_meta -= shift;
|
||||
xdp->data -= shift;
|
||||
xdp->data_end -= shift;
|
||||
}
|
||||
|
||||
for (i = 0; i < sinfo->nr_frags && delta; i++) {
|
||||
skb_frag_t *frag = &sinfo->frags[i];
|
||||
u32 shrink = min_t(u32, delta, skb_frag_size(frag));
|
||||
|
||||
memcpy(xdp->data_end, skb_frag_address(frag), shrink);
|
||||
|
||||
xdp->data_end += shrink;
|
||||
sinfo->xdp_frags_size -= shrink;
|
||||
delta -= shrink;
|
||||
if (bpf_xdp_shrink_data(xdp, frag, shrink, false))
|
||||
n_frags_free++;
|
||||
}
|
||||
|
||||
if (unlikely(n_frags_free)) {
|
||||
memmove(sinfo->frags, sinfo->frags + n_frags_free,
|
||||
(sinfo->nr_frags - n_frags_free) * sizeof(skb_frag_t));
|
||||
|
||||
sinfo->nr_frags -= n_frags_free;
|
||||
|
||||
if (!sinfo->nr_frags) {
|
||||
xdp_buff_clear_frags_flag(xdp);
|
||||
xdp_buff_clear_frag_pfmemalloc(xdp);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
__bpf_kfunc_end_defs();
|
||||
|
||||
int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags,
|
||||
|
|
@ -12181,8 +12333,13 @@ BTF_KFUNCS_START(bpf_kfunc_check_set_skb)
|
|||
BTF_ID_FLAGS(func, bpf_dynptr_from_skb, KF_TRUSTED_ARGS)
|
||||
BTF_KFUNCS_END(bpf_kfunc_check_set_skb)
|
||||
|
||||
BTF_KFUNCS_START(bpf_kfunc_check_set_skb_meta)
|
||||
BTF_ID_FLAGS(func, bpf_dynptr_from_skb_meta, KF_TRUSTED_ARGS)
|
||||
BTF_KFUNCS_END(bpf_kfunc_check_set_skb_meta)
|
||||
|
||||
BTF_KFUNCS_START(bpf_kfunc_check_set_xdp)
|
||||
BTF_ID_FLAGS(func, bpf_dynptr_from_xdp)
|
||||
BTF_ID_FLAGS(func, bpf_xdp_pull_data)
|
||||
BTF_KFUNCS_END(bpf_kfunc_check_set_xdp)
|
||||
|
||||
BTF_KFUNCS_START(bpf_kfunc_check_set_sock_addr)
|
||||
|
|
@ -12202,6 +12359,11 @@ static const struct btf_kfunc_id_set bpf_kfunc_set_skb = {
|
|||
.set = &bpf_kfunc_check_set_skb,
|
||||
};
|
||||
|
||||
static const struct btf_kfunc_id_set bpf_kfunc_set_skb_meta = {
|
||||
.owner = THIS_MODULE,
|
||||
.set = &bpf_kfunc_check_set_skb_meta,
|
||||
};
|
||||
|
||||
static const struct btf_kfunc_id_set bpf_kfunc_set_xdp = {
|
||||
.owner = THIS_MODULE,
|
||||
.set = &bpf_kfunc_check_set_xdp,
|
||||
|
|
@ -12237,6 +12399,8 @@ static int __init bpf_kfunc_init(void)
|
|||
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_SEG6LOCAL, &bpf_kfunc_set_skb);
|
||||
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_NETFILTER, &bpf_kfunc_set_skb);
|
||||
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_kfunc_set_skb);
|
||||
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_kfunc_set_skb_meta);
|
||||
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_ACT, &bpf_kfunc_set_skb_meta);
|
||||
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp);
|
||||
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
|
||||
&bpf_kfunc_set_sock_addr);
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ SYNOPSIS
|
|||
|
||||
**bpftool** [*OPTIONS*] **gen** *COMMAND*
|
||||
|
||||
*OPTIONS* := { |COMMON_OPTIONS| | { **-L** | **--use-loader** } }
|
||||
*OPTIONS* := { |COMMON_OPTIONS| | { **-L** | **--use-loader** } | [ { **-S** | **--sign** } {**-k** <private_key.pem>} **-i** <certificate.x509> ] }
|
||||
|
||||
*COMMAND* := { **object** | **skeleton** | **help** }
|
||||
|
||||
|
|
@ -186,6 +186,17 @@ OPTIONS
|
|||
skeleton). A light skeleton contains a loader eBPF program. It does not use
|
||||
the majority of the libbpf infrastructure, and does not need libelf.
|
||||
|
||||
-S, --sign
|
||||
For skeletons, generate a signed skeleton. This option must be used with
|
||||
**-k** and **-i**. Using this flag implicitly enables **--use-loader**.
|
||||
|
||||
-k <private_key.pem>
|
||||
Path to the private key file in PEM format, required for signing.
|
||||
|
||||
-i <certificate.x509>
|
||||
Path to the X.509 certificate file in PEM or DER format, required for
|
||||
signing.
|
||||
|
||||
EXAMPLES
|
||||
========
|
||||
**$ cat example1.bpf.c**
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ SYNOPSIS
|
|||
|
||||
*OPTIONS* := { |COMMON_OPTIONS| |
|
||||
{ **-f** | **--bpffs** } | { **-m** | **--mapcompat** } | { **-n** | **--nomount** } |
|
||||
{ **-L** | **--use-loader** } }
|
||||
{ **-L** | **--use-loader** } | [ { **-S** | **--sign** } **-k** <private_key.pem> **-i** <certificate.x509> ] }
|
||||
|
||||
*COMMANDS* :=
|
||||
{ **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load** |
|
||||
|
|
@ -248,6 +248,18 @@ OPTIONS
|
|||
creating the maps, and loading the programs (see **bpftool prog tracelog**
|
||||
as a way to dump those messages).
|
||||
|
||||
-S, --sign
|
||||
Enable signing of the BPF program before loading. This option must be
|
||||
used with **-k** and **-i**. Using this flag implicitly enables
|
||||
**--use-loader**.
|
||||
|
||||
-k <private_key.pem>
|
||||
Path to the private key file in PEM format, required when signing.
|
||||
|
||||
-i <certificate.x509>
|
||||
Path to the X.509 certificate file in PEM or DER format, required when
|
||||
signing.
|
||||
|
||||
EXAMPLES
|
||||
========
|
||||
**# bpftool prog show**
|
||||
|
|
|
|||
64
tools/bpf/bpftool/Documentation/bpftool-token.rst
Normal file
64
tools/bpf/bpftool/Documentation/bpftool-token.rst
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
|
||||
|
||||
================
|
||||
bpftool-token
|
||||
================
|
||||
-------------------------------------------------------------------------------
|
||||
tool for inspection and simple manipulation of eBPF tokens
|
||||
-------------------------------------------------------------------------------
|
||||
|
||||
:Manual section: 8
|
||||
|
||||
.. include:: substitutions.rst
|
||||
|
||||
SYNOPSIS
|
||||
========
|
||||
|
||||
**bpftool** [*OPTIONS*] **token** *COMMAND*
|
||||
|
||||
*OPTIONS* := { |COMMON_OPTIONS| }
|
||||
|
||||
*COMMANDS* := { **show** | **list** | **help** }
|
||||
|
||||
TOKEN COMMANDS
|
||||
===============
|
||||
|
||||
| **bpftool** **token** { **show** | **list** }
|
||||
| **bpftool** **token help**
|
||||
|
|
||||
|
||||
DESCRIPTION
|
||||
===========
|
||||
bpftool token { show | list }
|
||||
List BPF token information for each *bpffs* mount point containing token
|
||||
information on the system. Information include mount point path, allowed
|
||||
**bpf**\ () system call commands, maps, programs, and attach types for the
|
||||
token.
|
||||
|
||||
bpftool prog help
|
||||
Print short help message.
|
||||
|
||||
OPTIONS
|
||||
========
|
||||
.. include:: common_options.rst
|
||||
|
||||
EXAMPLES
|
||||
========
|
||||
|
|
||||
| **# mkdir -p /sys/fs/bpf/token**
|
||||
| **# mount -t bpf bpffs /sys/fs/bpf/token** \
|
||||
| **-o delegate_cmds=prog_load:map_create** \
|
||||
| **-o delegate_progs=kprobe** \
|
||||
| **-o delegate_attachs=xdp**
|
||||
| **# bpftool token list**
|
||||
|
||||
::
|
||||
|
||||
token_info /sys/fs/bpf/token
|
||||
allowed_cmds:
|
||||
map_create prog_load
|
||||
allowed_maps:
|
||||
allowed_progs:
|
||||
kprobe
|
||||
allowed_attachs:
|
||||
xdp
|
||||
|
|
@ -130,8 +130,8 @@ include $(FEATURES_DUMP)
|
|||
endif
|
||||
endif
|
||||
|
||||
LIBS = $(LIBBPF) -lelf -lz
|
||||
LIBS_BOOTSTRAP = $(LIBBPF_BOOTSTRAP) -lelf -lz
|
||||
LIBS = $(LIBBPF) -lelf -lz -lcrypto
|
||||
LIBS_BOOTSTRAP = $(LIBBPF_BOOTSTRAP) -lelf -lz -lcrypto
|
||||
|
||||
ifeq ($(feature-libelf-zstd),1)
|
||||
LIBS += -lzstd
|
||||
|
|
@ -194,7 +194,7 @@ endif
|
|||
|
||||
BPFTOOL_BOOTSTRAP := $(BOOTSTRAP_OUTPUT)bpftool
|
||||
|
||||
BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o gen.o btf.o)
|
||||
BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o gen.o btf.o sign.o)
|
||||
$(BOOTSTRAP_OBJS): $(LIBBPF_BOOTSTRAP)
|
||||
|
||||
OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o
|
||||
|
|
|
|||
|
|
@ -262,7 +262,7 @@ _bpftool()
|
|||
# Deal with options
|
||||
if [[ ${words[cword]} == -* ]]; then
|
||||
local c='--version --json --pretty --bpffs --mapcompat --debug \
|
||||
--use-loader --base-btf'
|
||||
--use-loader --base-btf --sign -i -k'
|
||||
COMPREPLY=( $( compgen -W "$c" -- "$cur" ) )
|
||||
return 0
|
||||
fi
|
||||
|
|
@ -283,7 +283,7 @@ _bpftool()
|
|||
_sysfs_get_netdevs
|
||||
return 0
|
||||
;;
|
||||
file|pinned|-B|--base-btf)
|
||||
file|pinned|-B|--base-btf|-i|-k)
|
||||
_filedir
|
||||
return 0
|
||||
;;
|
||||
|
|
@ -296,13 +296,21 @@ _bpftool()
|
|||
# Remove all options so completions don't have to deal with them.
|
||||
local i pprev
|
||||
for (( i=1; i < ${#words[@]}; )); do
|
||||
if [[ ${words[i]::1} == - ]] &&
|
||||
[[ ${words[i]} != "-B" ]] && [[ ${words[i]} != "--base-btf" ]]; then
|
||||
words=( "${words[@]:0:i}" "${words[@]:i+1}" )
|
||||
[[ $i -le $cword ]] && cword=$(( cword - 1 ))
|
||||
else
|
||||
i=$(( ++i ))
|
||||
fi
|
||||
case ${words[i]} in
|
||||
# Remove option and its argument
|
||||
-B|--base-btf|-i|-k)
|
||||
words=( "${words[@]:0:i}" "${words[@]:i+2}" )
|
||||
[[ $i -le $(($cword + 1)) ]] && cword=$(( cword - 2 ))
|
||||
;;
|
||||
# No argument, remove option only
|
||||
-*)
|
||||
words=( "${words[@]:0:i}" "${words[@]:i+1}" )
|
||||
[[ $i -le $cword ]] && cword=$(( cword - 1 ))
|
||||
;;
|
||||
*)
|
||||
i=$(( ++i ))
|
||||
;;
|
||||
esac
|
||||
done
|
||||
cur=${words[cword]}
|
||||
prev=${words[cword - 1]}
|
||||
|
|
@ -1215,6 +1223,17 @@ _bpftool()
|
|||
;;
|
||||
esac
|
||||
;;
|
||||
token)
|
||||
case $command in
|
||||
show|list)
|
||||
return 0
|
||||
;;
|
||||
*)
|
||||
[[ $prev == $object ]] && \
|
||||
COMPREPLY=( $( compgen -W 'help show list' -- "$cur" ) )
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
esac
|
||||
} &&
|
||||
complete -F _bpftool bpftool
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ static int dump_prog_id_as_func_ptr(const struct btf_dumper *d,
|
|||
__u32 info_len = sizeof(info);
|
||||
const char *prog_name = NULL;
|
||||
struct btf *prog_btf = NULL;
|
||||
struct bpf_func_info finfo;
|
||||
struct bpf_func_info finfo = {};
|
||||
__u32 finfo_rec_size;
|
||||
char prog_str[1024];
|
||||
int err;
|
||||
|
|
|
|||
|
|
@ -2,6 +2,10 @@
|
|||
// Copyright (C) 2017 Facebook
|
||||
// Author: Roman Gushchin <guro@fb.com>
|
||||
|
||||
#undef GCC_VERSION
|
||||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
#define _XOPEN_SOURCE 500
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@
|
|||
#include <sys/resource.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/vfs.h>
|
||||
#include <sys/utsname.h>
|
||||
|
||||
#include <linux/filter.h>
|
||||
#include <linux/limits.h>
|
||||
|
|
@ -31,6 +32,7 @@
|
|||
#include <bpf/hashmap.h>
|
||||
#include <bpf/libbpf.h> /* libbpf_num_possible_cpus */
|
||||
#include <bpf/btf.h>
|
||||
#include <zlib.h>
|
||||
|
||||
#include "main.h"
|
||||
|
||||
|
|
@ -1208,3 +1210,94 @@ int pathname_concat(char *buf, int buf_sz, const char *path,
|
|||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool read_next_kernel_config_option(gzFile file, char *buf, size_t n,
|
||||
char **value)
|
||||
{
|
||||
char *sep;
|
||||
|
||||
while (gzgets(file, buf, n)) {
|
||||
if (strncmp(buf, "CONFIG_", 7))
|
||||
continue;
|
||||
|
||||
sep = strchr(buf, '=');
|
||||
if (!sep)
|
||||
continue;
|
||||
|
||||
/* Trim ending '\n' */
|
||||
buf[strlen(buf) - 1] = '\0';
|
||||
|
||||
/* Split on '=' and ensure that a value is present. */
|
||||
*sep = '\0';
|
||||
if (!sep[1])
|
||||
continue;
|
||||
|
||||
*value = sep + 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
int read_kernel_config(const struct kernel_config_option *requested_options,
|
||||
size_t num_options, char **out_values,
|
||||
const char *define_prefix)
|
||||
{
|
||||
struct utsname utsn;
|
||||
char path[PATH_MAX];
|
||||
gzFile file = NULL;
|
||||
char buf[4096];
|
||||
char *value;
|
||||
size_t i;
|
||||
int ret = 0;
|
||||
|
||||
if (!requested_options || !out_values || num_options == 0)
|
||||
return -1;
|
||||
|
||||
if (!uname(&utsn)) {
|
||||
snprintf(path, sizeof(path), "/boot/config-%s", utsn.release);
|
||||
|
||||
/* gzopen also accepts uncompressed files. */
|
||||
file = gzopen(path, "r");
|
||||
}
|
||||
|
||||
if (!file) {
|
||||
/* Some distributions build with CONFIG_IKCONFIG=y and put the
|
||||
* config file at /proc/config.gz.
|
||||
*/
|
||||
file = gzopen("/proc/config.gz", "r");
|
||||
}
|
||||
|
||||
if (!file) {
|
||||
p_info("skipping kernel config, can't open file: %s",
|
||||
strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!gzgets(file, buf, sizeof(buf)) || !gzgets(file, buf, sizeof(buf))) {
|
||||
p_info("skipping kernel config, can't read from file: %s",
|
||||
strerror(errno));
|
||||
ret = -1;
|
||||
goto end_parse;
|
||||
}
|
||||
|
||||
if (strcmp(buf, "# Automatically generated file; DO NOT EDIT.\n")) {
|
||||
p_info("skipping kernel config, can't find correct file");
|
||||
ret = -1;
|
||||
goto end_parse;
|
||||
}
|
||||
|
||||
while (read_next_kernel_config_option(file, buf, sizeof(buf), &value)) {
|
||||
for (i = 0; i < num_options; i++) {
|
||||
if ((define_prefix && !requested_options[i].macro_dump) ||
|
||||
out_values[i] || strcmp(buf, requested_options[i].name))
|
||||
continue;
|
||||
|
||||
out_values[i] = strdup(value);
|
||||
}
|
||||
}
|
||||
|
||||
end_parse:
|
||||
gzclose(file);
|
||||
return ret;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,7 +10,6 @@
|
|||
#ifdef USE_LIBCAP
|
||||
#include <sys/capability.h>
|
||||
#endif
|
||||
#include <sys/utsname.h>
|
||||
#include <sys/vfs.h>
|
||||
|
||||
#include <linux/filter.h>
|
||||
|
|
@ -18,7 +17,6 @@
|
|||
|
||||
#include <bpf/bpf.h>
|
||||
#include <bpf/libbpf.h>
|
||||
#include <zlib.h>
|
||||
|
||||
#include "main.h"
|
||||
|
||||
|
|
@ -327,40 +325,9 @@ static void probe_jit_limit(void)
|
|||
}
|
||||
}
|
||||
|
||||
static bool read_next_kernel_config_option(gzFile file, char *buf, size_t n,
|
||||
char **value)
|
||||
{
|
||||
char *sep;
|
||||
|
||||
while (gzgets(file, buf, n)) {
|
||||
if (strncmp(buf, "CONFIG_", 7))
|
||||
continue;
|
||||
|
||||
sep = strchr(buf, '=');
|
||||
if (!sep)
|
||||
continue;
|
||||
|
||||
/* Trim ending '\n' */
|
||||
buf[strlen(buf) - 1] = '\0';
|
||||
|
||||
/* Split on '=' and ensure that a value is present. */
|
||||
*sep = '\0';
|
||||
if (!sep[1])
|
||||
continue;
|
||||
|
||||
*value = sep + 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void probe_kernel_image_config(const char *define_prefix)
|
||||
{
|
||||
static const struct {
|
||||
const char * const name;
|
||||
bool macro_dump;
|
||||
} options[] = {
|
||||
struct kernel_config_option options[] = {
|
||||
/* Enable BPF */
|
||||
{ "CONFIG_BPF", },
|
||||
/* Enable bpf() syscall */
|
||||
|
|
@ -435,52 +402,11 @@ static void probe_kernel_image_config(const char *define_prefix)
|
|||
{ "CONFIG_HZ", true, }
|
||||
};
|
||||
char *values[ARRAY_SIZE(options)] = { };
|
||||
struct utsname utsn;
|
||||
char path[PATH_MAX];
|
||||
gzFile file = NULL;
|
||||
char buf[4096];
|
||||
char *value;
|
||||
size_t i;
|
||||
|
||||
if (!uname(&utsn)) {
|
||||
snprintf(path, sizeof(path), "/boot/config-%s", utsn.release);
|
||||
|
||||
/* gzopen also accepts uncompressed files. */
|
||||
file = gzopen(path, "r");
|
||||
}
|
||||
|
||||
if (!file) {
|
||||
/* Some distributions build with CONFIG_IKCONFIG=y and put the
|
||||
* config file at /proc/config.gz.
|
||||
*/
|
||||
file = gzopen("/proc/config.gz", "r");
|
||||
}
|
||||
if (!file) {
|
||||
p_info("skipping kernel config, can't open file: %s",
|
||||
strerror(errno));
|
||||
goto end_parse;
|
||||
}
|
||||
/* Sanity checks */
|
||||
if (!gzgets(file, buf, sizeof(buf)) ||
|
||||
!gzgets(file, buf, sizeof(buf))) {
|
||||
p_info("skipping kernel config, can't read from file: %s",
|
||||
strerror(errno));
|
||||
goto end_parse;
|
||||
}
|
||||
if (strcmp(buf, "# Automatically generated file; DO NOT EDIT.\n")) {
|
||||
p_info("skipping kernel config, can't find correct file");
|
||||
goto end_parse;
|
||||
}
|
||||
|
||||
while (read_next_kernel_config_option(file, buf, sizeof(buf), &value)) {
|
||||
for (i = 0; i < ARRAY_SIZE(options); i++) {
|
||||
if ((define_prefix && !options[i].macro_dump) ||
|
||||
values[i] || strcmp(buf, options[i].name))
|
||||
continue;
|
||||
|
||||
values[i] = strdup(value);
|
||||
}
|
||||
}
|
||||
if (read_kernel_config(options, ARRAY_SIZE(options), values,
|
||||
define_prefix))
|
||||
return;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(options); i++) {
|
||||
if (define_prefix && !options[i].macro_dump)
|
||||
|
|
@ -488,10 +414,6 @@ static void probe_kernel_image_config(const char *define_prefix)
|
|||
print_kernel_option(options[i].name, values[i], define_prefix);
|
||||
free(values[i]);
|
||||
}
|
||||
|
||||
end_parse:
|
||||
if (file)
|
||||
gzclose(file);
|
||||
}
|
||||
|
||||
static bool probe_bpf_syscall(const char *define_prefix)
|
||||
|
|
|
|||
|
|
@ -688,10 +688,17 @@ static void codegen_destroy(struct bpf_object *obj, const char *obj_name)
|
|||
static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *header_guard)
|
||||
{
|
||||
DECLARE_LIBBPF_OPTS(gen_loader_opts, opts);
|
||||
struct bpf_load_and_run_opts sopts = {};
|
||||
char sig_buf[MAX_SIG_SIZE];
|
||||
__u8 prog_sha[SHA256_DIGEST_LENGTH];
|
||||
struct bpf_map *map;
|
||||
|
||||
char ident[256];
|
||||
int err = 0;
|
||||
|
||||
if (sign_progs)
|
||||
opts.gen_hash = true;
|
||||
|
||||
err = bpf_object__gen_loader(obj, &opts);
|
||||
if (err)
|
||||
return err;
|
||||
|
|
@ -701,6 +708,7 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h
|
|||
p_err("failed to load object file");
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* If there was no error during load then gen_loader_opts
|
||||
* are populated with the loader program.
|
||||
*/
|
||||
|
|
@ -780,8 +788,52 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h
|
|||
print_hex(opts.insns, opts.insns_sz);
|
||||
codegen("\
|
||||
\n\
|
||||
\"; \n\
|
||||
\n\
|
||||
\";\n");
|
||||
|
||||
if (sign_progs) {
|
||||
sopts.insns = opts.insns;
|
||||
sopts.insns_sz = opts.insns_sz;
|
||||
sopts.excl_prog_hash = prog_sha;
|
||||
sopts.excl_prog_hash_sz = sizeof(prog_sha);
|
||||
sopts.signature = sig_buf;
|
||||
sopts.signature_sz = MAX_SIG_SIZE;
|
||||
|
||||
err = bpftool_prog_sign(&sopts);
|
||||
if (err < 0) {
|
||||
p_err("failed to sign program");
|
||||
goto out;
|
||||
}
|
||||
|
||||
codegen("\
|
||||
\n\
|
||||
static const char opts_sig[] __attribute__((__aligned__(8))) = \"\\\n\
|
||||
");
|
||||
print_hex((const void *)sig_buf, sopts.signature_sz);
|
||||
codegen("\
|
||||
\n\
|
||||
\";\n");
|
||||
|
||||
codegen("\
|
||||
\n\
|
||||
static const char opts_excl_hash[] __attribute__((__aligned__(8))) = \"\\\n\
|
||||
");
|
||||
print_hex((const void *)prog_sha, sizeof(prog_sha));
|
||||
codegen("\
|
||||
\n\
|
||||
\";\n");
|
||||
|
||||
codegen("\
|
||||
\n\
|
||||
opts.signature = (void *)opts_sig; \n\
|
||||
opts.signature_sz = sizeof(opts_sig) - 1; \n\
|
||||
opts.excl_prog_hash = (void *)opts_excl_hash; \n\
|
||||
opts.excl_prog_hash_sz = sizeof(opts_excl_hash) - 1; \n\
|
||||
opts.keyring_id = skel->keyring_id; \n\
|
||||
");
|
||||
}
|
||||
|
||||
codegen("\
|
||||
\n\
|
||||
opts.ctx = (struct bpf_loader_ctx *)skel; \n\
|
||||
opts.data_sz = sizeof(opts_data) - 1; \n\
|
||||
opts.data = (void *)opts_data; \n\
|
||||
|
|
@ -1240,7 +1292,7 @@ static int do_skeleton(int argc, char **argv)
|
|||
err = -errno;
|
||||
libbpf_strerror(err, err_buf, sizeof(err_buf));
|
||||
p_err("failed to open BPF object file: %s", err_buf);
|
||||
goto out;
|
||||
goto out_obj;
|
||||
}
|
||||
|
||||
bpf_object__for_each_map(map, obj) {
|
||||
|
|
@ -1355,6 +1407,13 @@ static int do_skeleton(int argc, char **argv)
|
|||
printf("\t} links;\n");
|
||||
}
|
||||
|
||||
if (sign_progs) {
|
||||
codegen("\
|
||||
\n\
|
||||
__s32 keyring_id; \n\
|
||||
");
|
||||
}
|
||||
|
||||
if (btf) {
|
||||
err = codegen_datasecs(obj, obj_name);
|
||||
if (err)
|
||||
|
|
@ -1552,6 +1611,7 @@ static int do_skeleton(int argc, char **argv)
|
|||
err = 0;
|
||||
out:
|
||||
bpf_object__close(obj);
|
||||
out_obj:
|
||||
if (obj_data)
|
||||
munmap(obj_data, mmap_sz);
|
||||
close(fd);
|
||||
|
|
@ -1930,7 +1990,7 @@ static int do_help(int argc, char **argv)
|
|||
" %1$s %2$s help\n"
|
||||
"\n"
|
||||
" " HELP_SPEC_OPTIONS " |\n"
|
||||
" {-L|--use-loader} }\n"
|
||||
" {-L|--use-loader} | [ {-S|--sign } {-k} <private_key.pem> {-i} <certificate.x509> ]}\n"
|
||||
"",
|
||||
bin_name, "gen");
|
||||
|
||||
|
|
|
|||
|
|
@ -282,11 +282,52 @@ get_addr_cookie_array(__u64 *addrs, __u64 *cookies, __u32 count)
|
|||
return data;
|
||||
}
|
||||
|
||||
static bool is_x86_ibt_enabled(void)
|
||||
{
|
||||
#if defined(__x86_64__)
|
||||
struct kernel_config_option options[] = {
|
||||
{ "CONFIG_X86_KERNEL_IBT", },
|
||||
};
|
||||
char *values[ARRAY_SIZE(options)] = { };
|
||||
bool ret;
|
||||
|
||||
if (read_kernel_config(options, ARRAY_SIZE(options), values, NULL))
|
||||
return false;
|
||||
|
||||
ret = !!values[0];
|
||||
free(values[0]);
|
||||
return ret;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
static bool
|
||||
symbol_matches_target(__u64 sym_addr, __u64 target_addr, bool is_ibt_enabled)
|
||||
{
|
||||
if (sym_addr == target_addr)
|
||||
return true;
|
||||
|
||||
/*
|
||||
* On x86_64 architectures with CET (Control-flow Enforcement Technology),
|
||||
* function entry points have a 4-byte 'endbr' instruction prefix.
|
||||
* This causes kprobe hooks to target the address *after* 'endbr'
|
||||
* (symbol address + 4), preserving the CET instruction.
|
||||
* Here we check if the symbol address matches the hook target address
|
||||
* minus 4, indicating a CET-enabled function entry point.
|
||||
*/
|
||||
if (is_ibt_enabled && sym_addr == target_addr - 4)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void
|
||||
show_kprobe_multi_json(struct bpf_link_info *info, json_writer_t *wtr)
|
||||
{
|
||||
struct addr_cookie *data;
|
||||
__u32 i, j = 0;
|
||||
bool is_ibt_enabled;
|
||||
|
||||
jsonw_bool_field(json_wtr, "retprobe",
|
||||
info->kprobe_multi.flags & BPF_F_KPROBE_MULTI_RETURN);
|
||||
|
|
@ -306,11 +347,13 @@ show_kprobe_multi_json(struct bpf_link_info *info, json_writer_t *wtr)
|
|||
if (!dd.sym_count)
|
||||
goto error;
|
||||
|
||||
is_ibt_enabled = is_x86_ibt_enabled();
|
||||
for (i = 0; i < dd.sym_count; i++) {
|
||||
if (dd.sym_mapping[i].address != data[j].addr)
|
||||
if (!symbol_matches_target(dd.sym_mapping[i].address,
|
||||
data[j].addr, is_ibt_enabled))
|
||||
continue;
|
||||
jsonw_start_object(json_wtr);
|
||||
jsonw_uint_field(json_wtr, "addr", dd.sym_mapping[i].address);
|
||||
jsonw_uint_field(json_wtr, "addr", (unsigned long)data[j].addr);
|
||||
jsonw_string_field(json_wtr, "func", dd.sym_mapping[i].name);
|
||||
/* Print null if it is vmlinux */
|
||||
if (dd.sym_mapping[i].module[0] == '\0') {
|
||||
|
|
@ -719,6 +762,7 @@ static void show_kprobe_multi_plain(struct bpf_link_info *info)
|
|||
{
|
||||
struct addr_cookie *data;
|
||||
__u32 i, j = 0;
|
||||
bool is_ibt_enabled;
|
||||
|
||||
if (!info->kprobe_multi.count)
|
||||
return;
|
||||
|
|
@ -742,12 +786,14 @@ static void show_kprobe_multi_plain(struct bpf_link_info *info)
|
|||
if (!dd.sym_count)
|
||||
goto error;
|
||||
|
||||
is_ibt_enabled = is_x86_ibt_enabled();
|
||||
printf("\n\t%-16s %-16s %s", "addr", "cookie", "func [module]");
|
||||
for (i = 0; i < dd.sym_count; i++) {
|
||||
if (dd.sym_mapping[i].address != data[j].addr)
|
||||
if (!symbol_matches_target(dd.sym_mapping[i].address,
|
||||
data[j].addr, is_ibt_enabled))
|
||||
continue;
|
||||
printf("\n\t%016lx %-16llx %s",
|
||||
dd.sym_mapping[i].address, data[j].cookie, dd.sym_mapping[i].name);
|
||||
(unsigned long)data[j].addr, data[j].cookie, dd.sym_mapping[i].name);
|
||||
if (dd.sym_mapping[i].module[0] != '\0')
|
||||
printf(" [%s] ", dd.sym_mapping[i].module);
|
||||
else
|
||||
|
|
|
|||
|
|
@ -33,6 +33,9 @@ bool relaxed_maps;
|
|||
bool use_loader;
|
||||
struct btf *base_btf;
|
||||
struct hashmap *refs_table;
|
||||
bool sign_progs;
|
||||
const char *private_key_path;
|
||||
const char *cert_path;
|
||||
|
||||
static void __noreturn clean_and_exit(int i)
|
||||
{
|
||||
|
|
@ -61,7 +64,7 @@ static int do_help(int argc, char **argv)
|
|||
" %s batch file FILE\n"
|
||||
" %s version\n"
|
||||
"\n"
|
||||
" OBJECT := { prog | map | link | cgroup | perf | net | feature | btf | gen | struct_ops | iter }\n"
|
||||
" OBJECT := { prog | map | link | cgroup | perf | net | feature | btf | gen | struct_ops | iter | token }\n"
|
||||
" " HELP_SPEC_OPTIONS " |\n"
|
||||
" {-V|--version} }\n"
|
||||
"",
|
||||
|
|
@ -87,6 +90,7 @@ static const struct cmd commands[] = {
|
|||
{ "gen", do_gen },
|
||||
{ "struct_ops", do_struct_ops },
|
||||
{ "iter", do_iter },
|
||||
{ "token", do_token },
|
||||
{ "version", do_version },
|
||||
{ 0 }
|
||||
};
|
||||
|
|
@ -447,6 +451,7 @@ int main(int argc, char **argv)
|
|||
{ "nomount", no_argument, NULL, 'n' },
|
||||
{ "debug", no_argument, NULL, 'd' },
|
||||
{ "use-loader", no_argument, NULL, 'L' },
|
||||
{ "sign", no_argument, NULL, 'S' },
|
||||
{ "base-btf", required_argument, NULL, 'B' },
|
||||
{ 0 }
|
||||
};
|
||||
|
|
@ -473,7 +478,7 @@ int main(int argc, char **argv)
|
|||
bin_name = "bpftool";
|
||||
|
||||
opterr = 0;
|
||||
while ((opt = getopt_long(argc, argv, "VhpjfLmndB:l",
|
||||
while ((opt = getopt_long(argc, argv, "VhpjfLmndSi:k:B:l",
|
||||
options, NULL)) >= 0) {
|
||||
switch (opt) {
|
||||
case 'V':
|
||||
|
|
@ -519,6 +524,16 @@ int main(int argc, char **argv)
|
|||
case 'L':
|
||||
use_loader = true;
|
||||
break;
|
||||
case 'S':
|
||||
sign_progs = true;
|
||||
use_loader = true;
|
||||
break;
|
||||
case 'k':
|
||||
private_key_path = optarg;
|
||||
break;
|
||||
case 'i':
|
||||
cert_path = optarg;
|
||||
break;
|
||||
default:
|
||||
p_err("unrecognized option '%s'", argv[optind - 1]);
|
||||
if (json_output)
|
||||
|
|
@ -533,6 +548,16 @@ int main(int argc, char **argv)
|
|||
if (argc < 0)
|
||||
usage();
|
||||
|
||||
if (sign_progs && (private_key_path == NULL || cert_path == NULL)) {
|
||||
p_err("-i <identity_x509_cert> and -k <private_key> must be supplied with -S for signing");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!sign_progs && (private_key_path != NULL || cert_path != NULL)) {
|
||||
p_err("--sign (or -S) must be explicitly passed with -i <identity_x509_cert> and -k <private_key> to sign the programs");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (version_requested)
|
||||
ret = do_version(argc, argv);
|
||||
else
|
||||
|
|
|
|||
|
|
@ -6,9 +6,14 @@
|
|||
|
||||
/* BFD and kernel.h both define GCC_VERSION, differently */
|
||||
#undef GCC_VERSION
|
||||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
#include <stdlib.h>
|
||||
#include <bpf/skel_internal.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/kernel.h>
|
||||
|
|
@ -52,6 +57,7 @@ static inline void *u64_to_ptr(__u64 ptr)
|
|||
})
|
||||
|
||||
#define ERR_MAX_LEN 1024
|
||||
#define MAX_SIG_SIZE 4096
|
||||
|
||||
#define BPF_TAG_FMT "%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx"
|
||||
|
||||
|
|
@ -85,6 +91,9 @@ extern bool relaxed_maps;
|
|||
extern bool use_loader;
|
||||
extern struct btf *base_btf;
|
||||
extern struct hashmap *refs_table;
|
||||
extern bool sign_progs;
|
||||
extern const char *private_key_path;
|
||||
extern const char *cert_path;
|
||||
|
||||
void __printf(1, 2) p_err(const char *fmt, ...);
|
||||
void __printf(1, 2) p_info(const char *fmt, ...);
|
||||
|
|
@ -166,6 +175,7 @@ int do_tracelog(int argc, char **arg) __weak;
|
|||
int do_feature(int argc, char **argv) __weak;
|
||||
int do_struct_ops(int argc, char **argv) __weak;
|
||||
int do_iter(int argc, char **argv) __weak;
|
||||
int do_token(int argc, char **argv) __weak;
|
||||
|
||||
int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what);
|
||||
int prog_parse_fd(int *argc, char ***argv);
|
||||
|
|
@ -274,4 +284,15 @@ int pathname_concat(char *buf, int buf_sz, const char *path,
|
|||
/* print netfilter bpf_link info */
|
||||
void netfilter_dump_plain(const struct bpf_link_info *info);
|
||||
void netfilter_dump_json(const struct bpf_link_info *info, json_writer_t *wtr);
|
||||
|
||||
struct kernel_config_option {
|
||||
const char *name;
|
||||
bool macro_dump;
|
||||
};
|
||||
|
||||
int read_kernel_config(const struct kernel_config_option *requested_options,
|
||||
size_t num_options, char **out_values,
|
||||
const char *define_prefix);
|
||||
int bpftool_prog_sign(struct bpf_load_and_run_opts *opts);
|
||||
__u32 register_session_key(const char *key_der_path);
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@
|
|||
#include <linux/err.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/sizes.h>
|
||||
#include <linux/keyctl.h>
|
||||
|
||||
#include <bpf/bpf.h>
|
||||
#include <bpf/btf.h>
|
||||
|
|
@ -714,7 +715,7 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
|
|||
|
||||
if (mode == DUMP_JITED) {
|
||||
if (info->jited_prog_len == 0 || !info->jited_prog_insns) {
|
||||
p_info("no instructions returned");
|
||||
p_err("error retrieving jit dump: no instructions returned or kernel.kptr_restrict set?");
|
||||
return -1;
|
||||
}
|
||||
buf = u64_to_ptr(info->jited_prog_insns);
|
||||
|
|
@ -1930,6 +1931,8 @@ static int try_loader(struct gen_loader_opts *gen)
|
|||
{
|
||||
struct bpf_load_and_run_opts opts = {};
|
||||
struct bpf_loader_ctx *ctx;
|
||||
char sig_buf[MAX_SIG_SIZE];
|
||||
__u8 prog_sha[SHA256_DIGEST_LENGTH];
|
||||
int ctx_sz = sizeof(*ctx) + 64 * max(sizeof(struct bpf_map_desc),
|
||||
sizeof(struct bpf_prog_desc));
|
||||
int log_buf_sz = (1u << 24) - 1;
|
||||
|
|
@ -1953,6 +1956,26 @@ static int try_loader(struct gen_loader_opts *gen)
|
|||
opts.insns = gen->insns;
|
||||
opts.insns_sz = gen->insns_sz;
|
||||
fds_before = count_open_fds();
|
||||
|
||||
if (sign_progs) {
|
||||
opts.excl_prog_hash = prog_sha;
|
||||
opts.excl_prog_hash_sz = sizeof(prog_sha);
|
||||
opts.signature = sig_buf;
|
||||
opts.signature_sz = MAX_SIG_SIZE;
|
||||
opts.keyring_id = KEY_SPEC_SESSION_KEYRING;
|
||||
|
||||
err = bpftool_prog_sign(&opts);
|
||||
if (err < 0) {
|
||||
p_err("failed to sign program");
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = register_session_key(cert_path);
|
||||
if (err < 0) {
|
||||
p_err("failed to add session key");
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
err = bpf_load_and_run(&opts);
|
||||
fd_delta = count_open_fds() - fds_before;
|
||||
if (err < 0 || verifier_logs) {
|
||||
|
|
@ -1961,6 +1984,7 @@ static int try_loader(struct gen_loader_opts *gen)
|
|||
fprintf(stderr, "loader prog leaked %d FDs\n",
|
||||
fd_delta);
|
||||
}
|
||||
out:
|
||||
free(log_buf);
|
||||
return err;
|
||||
}
|
||||
|
|
@ -1988,6 +2012,9 @@ static int do_loader(int argc, char **argv)
|
|||
goto err_close_obj;
|
||||
}
|
||||
|
||||
if (sign_progs)
|
||||
gen.gen_hash = true;
|
||||
|
||||
err = bpf_object__gen_loader(obj, &gen);
|
||||
if (err)
|
||||
goto err_close_obj;
|
||||
|
|
@ -2262,7 +2289,7 @@ static void profile_print_readings(void)
|
|||
|
||||
static char *profile_target_name(int tgt_fd)
|
||||
{
|
||||
struct bpf_func_info func_info;
|
||||
struct bpf_func_info func_info = {};
|
||||
struct bpf_prog_info info = {};
|
||||
__u32 info_len = sizeof(info);
|
||||
const struct btf_type *t;
|
||||
|
|
@ -2562,7 +2589,7 @@ static int do_help(int argc, char **argv)
|
|||
" METRIC := { cycles | instructions | l1d_loads | llc_misses | itlb_misses | dtlb_misses }\n"
|
||||
" " HELP_SPEC_OPTIONS " |\n"
|
||||
" {-f|--bpffs} | {-m|--mapcompat} | {-n|--nomount} |\n"
|
||||
" {-L|--use-loader} }\n"
|
||||
" {-L|--use-loader} | [ {-S|--sign } {-k} <private_key.pem> {-i} <certificate.x509> ] \n"
|
||||
"",
|
||||
bin_name, argv[-2]);
|
||||
|
||||
|
|
|
|||
211
tools/bpf/bpftool/sign.c
Normal file
211
tools/bpf/bpftool/sign.c
Normal file
|
|
@ -0,0 +1,211 @@
|
|||
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
|
||||
/*
|
||||
* Copyright (C) 2025 Google LLC.
|
||||
*/
|
||||
|
||||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#include <string.h>
|
||||
#include <getopt.h>
|
||||
#include <err.h>
|
||||
#include <openssl/opensslv.h>
|
||||
#include <openssl/bio.h>
|
||||
#include <openssl/evp.h>
|
||||
#include <openssl/pem.h>
|
||||
#include <openssl/err.h>
|
||||
#include <openssl/cms.h>
|
||||
#include <linux/keyctl.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include <bpf/skel_internal.h>
|
||||
|
||||
#include "main.h"
|
||||
|
||||
#define OPEN_SSL_ERR_BUF_LEN 256
|
||||
|
||||
static void display_openssl_errors(int l)
|
||||
{
|
||||
char buf[OPEN_SSL_ERR_BUF_LEN];
|
||||
const char *file;
|
||||
const char *data;
|
||||
unsigned long e;
|
||||
int flags;
|
||||
int line;
|
||||
|
||||
while ((e = ERR_get_error_all(&file, &line, NULL, &data, &flags))) {
|
||||
ERR_error_string_n(e, buf, sizeof(buf));
|
||||
if (data && (flags & ERR_TXT_STRING)) {
|
||||
p_err("OpenSSL %s: %s:%d: %s", buf, file, line, data);
|
||||
} else {
|
||||
p_err("OpenSSL %s: %s:%d", buf, file, line);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#define DISPLAY_OSSL_ERR(cond) \
|
||||
do { \
|
||||
bool __cond = (cond); \
|
||||
if (__cond && ERR_peek_error()) \
|
||||
display_openssl_errors(__LINE__);\
|
||||
} while (0)
|
||||
|
||||
static EVP_PKEY *read_private_key(const char *pkey_path)
|
||||
{
|
||||
EVP_PKEY *private_key = NULL;
|
||||
BIO *b;
|
||||
|
||||
b = BIO_new_file(pkey_path, "rb");
|
||||
private_key = PEM_read_bio_PrivateKey(b, NULL, NULL, NULL);
|
||||
BIO_free(b);
|
||||
DISPLAY_OSSL_ERR(!private_key);
|
||||
return private_key;
|
||||
}
|
||||
|
||||
static X509 *read_x509(const char *x509_name)
|
||||
{
|
||||
unsigned char buf[2];
|
||||
X509 *x509 = NULL;
|
||||
BIO *b;
|
||||
int n;
|
||||
|
||||
b = BIO_new_file(x509_name, "rb");
|
||||
if (!b)
|
||||
goto cleanup;
|
||||
|
||||
/* Look at the first two bytes of the file to determine the encoding */
|
||||
n = BIO_read(b, buf, 2);
|
||||
if (n != 2)
|
||||
goto cleanup;
|
||||
|
||||
if (BIO_reset(b) != 0)
|
||||
goto cleanup;
|
||||
|
||||
if (buf[0] == 0x30 && buf[1] >= 0x81 && buf[1] <= 0x84)
|
||||
/* Assume raw DER encoded X.509 */
|
||||
x509 = d2i_X509_bio(b, NULL);
|
||||
else
|
||||
/* Assume PEM encoded X.509 */
|
||||
x509 = PEM_read_bio_X509(b, NULL, NULL, NULL);
|
||||
|
||||
cleanup:
|
||||
BIO_free(b);
|
||||
DISPLAY_OSSL_ERR(!x509);
|
||||
return x509;
|
||||
}
|
||||
|
||||
__u32 register_session_key(const char *key_der_path)
|
||||
{
|
||||
unsigned char *der_buf = NULL;
|
||||
X509 *x509 = NULL;
|
||||
int key_id = -1;
|
||||
int der_len;
|
||||
|
||||
if (!key_der_path)
|
||||
return key_id;
|
||||
x509 = read_x509(key_der_path);
|
||||
if (!x509)
|
||||
goto cleanup;
|
||||
der_len = i2d_X509(x509, &der_buf);
|
||||
if (der_len < 0)
|
||||
goto cleanup;
|
||||
key_id = syscall(__NR_add_key, "asymmetric", key_der_path, der_buf,
|
||||
(size_t)der_len, KEY_SPEC_SESSION_KEYRING);
|
||||
cleanup:
|
||||
X509_free(x509);
|
||||
OPENSSL_free(der_buf);
|
||||
DISPLAY_OSSL_ERR(key_id == -1);
|
||||
return key_id;
|
||||
}
|
||||
|
||||
int bpftool_prog_sign(struct bpf_load_and_run_opts *opts)
|
||||
{
|
||||
BIO *bd_in = NULL, *bd_out = NULL;
|
||||
EVP_PKEY *private_key = NULL;
|
||||
CMS_ContentInfo *cms = NULL;
|
||||
long actual_sig_len = 0;
|
||||
X509 *x509 = NULL;
|
||||
int err = 0;
|
||||
|
||||
bd_in = BIO_new_mem_buf(opts->insns, opts->insns_sz);
|
||||
if (!bd_in) {
|
||||
err = -ENOMEM;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
private_key = read_private_key(private_key_path);
|
||||
if (!private_key) {
|
||||
err = -EINVAL;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
x509 = read_x509(cert_path);
|
||||
if (!x509) {
|
||||
err = -EINVAL;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
cms = CMS_sign(NULL, NULL, NULL, NULL,
|
||||
CMS_NOCERTS | CMS_PARTIAL | CMS_BINARY | CMS_DETACHED |
|
||||
CMS_STREAM);
|
||||
if (!cms) {
|
||||
err = -EINVAL;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (!CMS_add1_signer(cms, x509, private_key, EVP_sha256(),
|
||||
CMS_NOCERTS | CMS_BINARY | CMS_NOSMIMECAP |
|
||||
CMS_USE_KEYID | CMS_NOATTR)) {
|
||||
err = -EINVAL;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (CMS_final(cms, bd_in, NULL, CMS_NOCERTS | CMS_BINARY) != 1) {
|
||||
err = -EIO;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
EVP_Digest(opts->insns, opts->insns_sz, opts->excl_prog_hash,
|
||||
&opts->excl_prog_hash_sz, EVP_sha256(), NULL);
|
||||
|
||||
bd_out = BIO_new(BIO_s_mem());
|
||||
if (!bd_out) {
|
||||
err = -ENOMEM;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (!i2d_CMS_bio_stream(bd_out, cms, NULL, 0)) {
|
||||
err = -EIO;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
actual_sig_len = BIO_get_mem_data(bd_out, NULL);
|
||||
if (actual_sig_len <= 0) {
|
||||
err = -EIO;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if ((size_t)actual_sig_len > opts->signature_sz) {
|
||||
err = -ENOSPC;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (BIO_read(bd_out, opts->signature, actual_sig_len) != actual_sig_len) {
|
||||
err = -EIO;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
opts->signature_sz = actual_sig_len;
|
||||
cleanup:
|
||||
BIO_free(bd_out);
|
||||
CMS_ContentInfo_free(cms);
|
||||
X509_free(x509);
|
||||
EVP_PKEY_free(private_key);
|
||||
BIO_free(bd_in);
|
||||
DISPLAY_OSSL_ERR(err < 0);
|
||||
return err;
|
||||
}
|
||||
210
tools/bpf/bpftool/token.c
Normal file
210
tools/bpf/bpftool/token.c
Normal file
|
|
@ -0,0 +1,210 @@
|
|||
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
|
||||
/* Copyright (C) 2025 Didi Technology Co., Tao Chen */
|
||||
|
||||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <mntent.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "json_writer.h"
|
||||
#include "main.h"
|
||||
|
||||
#define MOUNTS_FILE "/proc/mounts"
|
||||
|
||||
static struct {
|
||||
const char *header;
|
||||
const char *key;
|
||||
} sets[] = {
|
||||
{"allowed_cmds", "delegate_cmds"},
|
||||
{"allowed_maps", "delegate_maps"},
|
||||
{"allowed_progs", "delegate_progs"},
|
||||
{"allowed_attachs", "delegate_attachs"},
|
||||
};
|
||||
|
||||
static bool has_delegate_options(const char *mnt_ops)
|
||||
{
|
||||
return strstr(mnt_ops, "delegate_cmds") ||
|
||||
strstr(mnt_ops, "delegate_maps") ||
|
||||
strstr(mnt_ops, "delegate_progs") ||
|
||||
strstr(mnt_ops, "delegate_attachs");
|
||||
}
|
||||
|
||||
static char *get_delegate_value(char *opts, const char *key)
|
||||
{
|
||||
char *token, *rest, *ret = NULL;
|
||||
|
||||
if (!opts)
|
||||
return NULL;
|
||||
|
||||
for (token = strtok_r(opts, ",", &rest); token;
|
||||
token = strtok_r(NULL, ",", &rest)) {
|
||||
if (strncmp(token, key, strlen(key)) == 0 &&
|
||||
token[strlen(key)] == '=') {
|
||||
ret = token + strlen(key) + 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void print_items_per_line(char *input, int items_per_line)
|
||||
{
|
||||
char *str, *rest;
|
||||
int cnt = 0;
|
||||
|
||||
if (!input)
|
||||
return;
|
||||
|
||||
for (str = strtok_r(input, ":", &rest); str;
|
||||
str = strtok_r(NULL, ":", &rest)) {
|
||||
if (cnt % items_per_line == 0)
|
||||
printf("\n\t ");
|
||||
|
||||
printf("%-20s", str);
|
||||
cnt++;
|
||||
}
|
||||
}
|
||||
|
||||
#define ITEMS_PER_LINE 4
|
||||
static void show_token_info_plain(struct mntent *mntent)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
printf("token_info %s", mntent->mnt_dir);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(sets); i++) {
|
||||
char *opts, *value;
|
||||
|
||||
printf("\n\t%s:", sets[i].header);
|
||||
opts = strdup(mntent->mnt_opts);
|
||||
value = get_delegate_value(opts, sets[i].key);
|
||||
print_items_per_line(value, ITEMS_PER_LINE);
|
||||
free(opts);
|
||||
}
|
||||
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
static void split_json_array_str(char *input)
|
||||
{
|
||||
char *str, *rest;
|
||||
|
||||
if (!input) {
|
||||
jsonw_start_array(json_wtr);
|
||||
jsonw_end_array(json_wtr);
|
||||
return;
|
||||
}
|
||||
|
||||
jsonw_start_array(json_wtr);
|
||||
for (str = strtok_r(input, ":", &rest); str;
|
||||
str = strtok_r(NULL, ":", &rest)) {
|
||||
jsonw_string(json_wtr, str);
|
||||
}
|
||||
jsonw_end_array(json_wtr);
|
||||
}
|
||||
|
||||
static void show_token_info_json(struct mntent *mntent)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
jsonw_start_object(json_wtr);
|
||||
jsonw_string_field(json_wtr, "token_info", mntent->mnt_dir);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(sets); i++) {
|
||||
char *opts, *value;
|
||||
|
||||
jsonw_name(json_wtr, sets[i].header);
|
||||
opts = strdup(mntent->mnt_opts);
|
||||
value = get_delegate_value(opts, sets[i].key);
|
||||
split_json_array_str(value);
|
||||
free(opts);
|
||||
}
|
||||
|
||||
jsonw_end_object(json_wtr);
|
||||
}
|
||||
|
||||
static int __show_token_info(struct mntent *mntent)
|
||||
{
|
||||
if (json_output)
|
||||
show_token_info_json(mntent);
|
||||
else
|
||||
show_token_info_plain(mntent);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int show_token_info(void)
|
||||
{
|
||||
FILE *fp;
|
||||
struct mntent *ent;
|
||||
|
||||
fp = setmntent(MOUNTS_FILE, "r");
|
||||
if (!fp) {
|
||||
p_err("Failed to open: %s", MOUNTS_FILE);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (json_output)
|
||||
jsonw_start_array(json_wtr);
|
||||
|
||||
while ((ent = getmntent(fp)) != NULL) {
|
||||
if (strncmp(ent->mnt_type, "bpf", 3) == 0) {
|
||||
if (has_delegate_options(ent->mnt_opts))
|
||||
__show_token_info(ent);
|
||||
}
|
||||
}
|
||||
|
||||
if (json_output)
|
||||
jsonw_end_array(json_wtr);
|
||||
|
||||
endmntent(fp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int do_show(int argc, char **argv)
|
||||
{
|
||||
if (argc)
|
||||
return BAD_ARG();
|
||||
|
||||
return show_token_info();
|
||||
}
|
||||
|
||||
static int do_help(int argc, char **argv)
|
||||
{
|
||||
if (json_output) {
|
||||
jsonw_null(json_wtr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
fprintf(stderr,
|
||||
"Usage: %1$s %2$s { show | list }\n"
|
||||
" %1$s %2$s help\n"
|
||||
" " HELP_SPEC_OPTIONS " }\n"
|
||||
"\n"
|
||||
"",
|
||||
bin_name, argv[-2]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct cmd cmds[] = {
|
||||
{ "show", do_show },
|
||||
{ "list", do_show },
|
||||
{ "help", do_help },
|
||||
{ 0 }
|
||||
};
|
||||
|
||||
int do_token(int argc, char **argv)
|
||||
{
|
||||
return cmd_select(cmds, argc, argv, do_help);
|
||||
}
|
||||
|
|
@ -57,10 +57,8 @@ find_tracefs_mnt_single(unsigned long magic, char *mnt, const char *mntpt)
|
|||
static bool get_tracefs_pipe(char *mnt)
|
||||
{
|
||||
static const char * const known_mnts[] = {
|
||||
"/sys/kernel/debug/tracing",
|
||||
"/sys/kernel/tracing",
|
||||
"/tracing",
|
||||
"/trace",
|
||||
"/sys/kernel/debug/tracing",
|
||||
};
|
||||
const char *pipe_name = "/trace_pipe";
|
||||
const char *fstype = "tracefs";
|
||||
|
|
@ -95,12 +93,7 @@ static bool get_tracefs_pipe(char *mnt)
|
|||
return false;
|
||||
|
||||
p_info("could not find tracefs, attempting to mount it now");
|
||||
/* Most of the time, tracefs is automatically mounted by debugfs at
|
||||
* /sys/kernel/debug/tracing when we try to access it. If we could not
|
||||
* find it, it is likely that debugfs is not mounted. Let's give one
|
||||
* attempt at mounting just tracefs at /sys/kernel/tracing.
|
||||
*/
|
||||
strcpy(mnt, known_mnts[1]);
|
||||
strcpy(mnt, known_mnts[0]);
|
||||
if (mount_tracefs(mnt))
|
||||
return false;
|
||||
|
||||
|
|
|
|||
|
|
@ -1522,6 +1522,12 @@ union bpf_attr {
|
|||
* If provided, map_flags should have BPF_F_TOKEN_FD flag set.
|
||||
*/
|
||||
__s32 map_token_fd;
|
||||
|
||||
/* Hash of the program that has exclusive access to the map.
|
||||
*/
|
||||
__aligned_u64 excl_prog_hash;
|
||||
/* Size of the passed excl_prog_hash. */
|
||||
__u32 excl_prog_hash_size;
|
||||
};
|
||||
|
||||
struct { /* anonymous struct used by BPF_MAP_*_ELEM and BPF_MAP_FREEZE commands */
|
||||
|
|
@ -1605,6 +1611,16 @@ union bpf_attr {
|
|||
* continuous.
|
||||
*/
|
||||
__u32 fd_array_cnt;
|
||||
/* Pointer to a buffer containing the signature of the BPF
|
||||
* program.
|
||||
*/
|
||||
__aligned_u64 signature;
|
||||
/* Size of the signature buffer in bytes. */
|
||||
__u32 signature_size;
|
||||
/* ID of the kernel keyring to be used for signature
|
||||
* verification.
|
||||
*/
|
||||
__s32 keyring_id;
|
||||
};
|
||||
|
||||
struct { /* anonymous struct used by BPF_OBJ_* commands */
|
||||
|
|
@ -6666,6 +6682,8 @@ struct bpf_map_info {
|
|||
__u32 btf_value_type_id;
|
||||
__u32 btf_vmlinux_id;
|
||||
__u64 map_extra;
|
||||
__aligned_u64 hash;
|
||||
__u32 hash_size;
|
||||
} __attribute__((aligned(8)));
|
||||
|
||||
struct bpf_btf_info {
|
||||
|
|
@ -7418,6 +7436,10 @@ struct bpf_timer {
|
|||
__u64 __opaque[2];
|
||||
} __attribute__((aligned(8)));
|
||||
|
||||
struct bpf_task_work {
|
||||
__u64 __opaque;
|
||||
} __attribute__((aligned(8)));
|
||||
|
||||
struct bpf_wq {
|
||||
__u64 __opaque[2];
|
||||
} __attribute__((aligned(8)));
|
||||
|
|
|
|||
|
|
@ -172,7 +172,7 @@ int bpf_map_create(enum bpf_map_type map_type,
|
|||
__u32 max_entries,
|
||||
const struct bpf_map_create_opts *opts)
|
||||
{
|
||||
const size_t attr_sz = offsetofend(union bpf_attr, map_token_fd);
|
||||
const size_t attr_sz = offsetofend(union bpf_attr, excl_prog_hash_size);
|
||||
union bpf_attr attr;
|
||||
int fd;
|
||||
|
||||
|
|
@ -203,6 +203,8 @@ int bpf_map_create(enum bpf_map_type map_type,
|
|||
attr.map_ifindex = OPTS_GET(opts, map_ifindex, 0);
|
||||
|
||||
attr.map_token_fd = OPTS_GET(opts, token_fd, 0);
|
||||
attr.excl_prog_hash = ptr_to_u64(OPTS_GET(opts, excl_prog_hash, NULL));
|
||||
attr.excl_prog_hash_size = OPTS_GET(opts, excl_prog_hash_size, 0);
|
||||
|
||||
fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz);
|
||||
return libbpf_err_errno(fd);
|
||||
|
|
@ -238,7 +240,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
|
|||
const struct bpf_insn *insns, size_t insn_cnt,
|
||||
struct bpf_prog_load_opts *opts)
|
||||
{
|
||||
const size_t attr_sz = offsetofend(union bpf_attr, fd_array_cnt);
|
||||
const size_t attr_sz = offsetofend(union bpf_attr, keyring_id);
|
||||
void *finfo = NULL, *linfo = NULL;
|
||||
const char *func_info, *line_info;
|
||||
__u32 log_size, log_level, attach_prog_fd, attach_btf_obj_fd;
|
||||
|
|
|
|||
|
|
@ -54,9 +54,12 @@ struct bpf_map_create_opts {
|
|||
__s32 value_type_btf_obj_fd;
|
||||
|
||||
__u32 token_fd;
|
||||
|
||||
const void *excl_prog_hash;
|
||||
__u32 excl_prog_hash_size;
|
||||
size_t :0;
|
||||
};
|
||||
#define bpf_map_create_opts__last_field token_fd
|
||||
#define bpf_map_create_opts__last_field excl_prog_hash_size
|
||||
|
||||
LIBBPF_API int bpf_map_create(enum bpf_map_type map_type,
|
||||
const char *map_name,
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
#define __BPF_GEN_INTERNAL_H
|
||||
|
||||
#include "bpf.h"
|
||||
#include "libbpf_internal.h"
|
||||
|
||||
struct ksym_relo_desc {
|
||||
const char *name;
|
||||
|
|
@ -50,6 +51,7 @@ struct bpf_gen {
|
|||
__u32 nr_ksyms;
|
||||
int fd_array;
|
||||
int nr_fd_array;
|
||||
int hash_insn_offset[SHA256_DWORD_SIZE];
|
||||
};
|
||||
|
||||
void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps);
|
||||
|
|
|
|||
|
|
@ -110,6 +110,7 @@ static void emit2(struct bpf_gen *gen, struct bpf_insn insn1, struct bpf_insn in
|
|||
|
||||
static int add_data(struct bpf_gen *gen, const void *data, __u32 size);
|
||||
static void emit_sys_close_blob(struct bpf_gen *gen, int blob_off);
|
||||
static void emit_signature_match(struct bpf_gen *gen);
|
||||
|
||||
void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps)
|
||||
{
|
||||
|
|
@ -152,6 +153,8 @@ void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps
|
|||
/* R7 contains the error code from sys_bpf. Copy it into R0 and exit. */
|
||||
emit(gen, BPF_MOV64_REG(BPF_REG_0, BPF_REG_7));
|
||||
emit(gen, BPF_EXIT_INSN());
|
||||
if (OPTS_GET(gen->opts, gen_hash, false))
|
||||
emit_signature_match(gen);
|
||||
}
|
||||
|
||||
static int add_data(struct bpf_gen *gen, const void *data, __u32 size)
|
||||
|
|
@ -368,6 +371,8 @@ static void emit_sys_close_blob(struct bpf_gen *gen, int blob_off)
|
|||
__emit_sys_close(gen);
|
||||
}
|
||||
|
||||
static void compute_sha_update_offsets(struct bpf_gen *gen);
|
||||
|
||||
int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps)
|
||||
{
|
||||
int i;
|
||||
|
|
@ -394,6 +399,9 @@ int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps)
|
|||
blob_fd_array_off(gen, i));
|
||||
emit(gen, BPF_MOV64_IMM(BPF_REG_0, 0));
|
||||
emit(gen, BPF_EXIT_INSN());
|
||||
if (OPTS_GET(gen->opts, gen_hash, false))
|
||||
compute_sha_update_offsets(gen);
|
||||
|
||||
pr_debug("gen: finish %s\n", errstr(gen->error));
|
||||
if (!gen->error) {
|
||||
struct gen_loader_opts *opts = gen->opts;
|
||||
|
|
@ -446,6 +454,22 @@ void bpf_gen__free(struct bpf_gen *gen)
|
|||
_val; \
|
||||
})
|
||||
|
||||
static void compute_sha_update_offsets(struct bpf_gen *gen)
|
||||
{
|
||||
__u64 sha[SHA256_DWORD_SIZE];
|
||||
__u64 sha_dw;
|
||||
int i;
|
||||
|
||||
libbpf_sha256(gen->data_start, gen->data_cur - gen->data_start, (__u8 *)sha);
|
||||
for (i = 0; i < SHA256_DWORD_SIZE; i++) {
|
||||
struct bpf_insn *insn =
|
||||
(struct bpf_insn *)(gen->insn_start + gen->hash_insn_offset[i]);
|
||||
sha_dw = tgt_endian(sha[i]);
|
||||
insn[0].imm = (__u32)sha_dw;
|
||||
insn[1].imm = sha_dw >> 32;
|
||||
}
|
||||
}
|
||||
|
||||
void bpf_gen__load_btf(struct bpf_gen *gen, const void *btf_raw_data,
|
||||
__u32 btf_raw_size)
|
||||
{
|
||||
|
|
@ -557,6 +581,29 @@ void bpf_gen__map_create(struct bpf_gen *gen,
|
|||
emit_sys_close_stack(gen, stack_off(inner_map_fd));
|
||||
}
|
||||
|
||||
static void emit_signature_match(struct bpf_gen *gen)
|
||||
{
|
||||
__s64 off;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < SHA256_DWORD_SIZE; i++) {
|
||||
emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX,
|
||||
0, 0, 0, 0));
|
||||
emit(gen, BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, i * sizeof(__u64)));
|
||||
gen->hash_insn_offset[i] = gen->insn_cur - gen->insn_start;
|
||||
emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_3, 0, 0, 0, 0, 0));
|
||||
|
||||
off = -(gen->insn_cur - gen->insn_start - gen->cleanup_label) / 8 - 1;
|
||||
if (is_simm16(off)) {
|
||||
emit(gen, BPF_MOV64_IMM(BPF_REG_7, -EINVAL));
|
||||
emit(gen, BPF_JMP_REG(BPF_JNE, BPF_REG_2, BPF_REG_3, off));
|
||||
} else {
|
||||
gen->error = -ERANGE;
|
||||
emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, -1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *attach_name,
|
||||
enum bpf_attach_type type)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -35,6 +35,7 @@
|
|||
#include <linux/perf_event.h>
|
||||
#include <linux/bpf_perf_event.h>
|
||||
#include <linux/ring_buffer.h>
|
||||
#include <linux/unaligned.h>
|
||||
#include <sys/epoll.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/mman.h>
|
||||
|
|
@ -496,6 +497,7 @@ struct bpf_program {
|
|||
__u32 line_info_rec_size;
|
||||
__u32 line_info_cnt;
|
||||
__u32 prog_flags;
|
||||
__u8 hash[SHA256_DIGEST_LENGTH];
|
||||
};
|
||||
|
||||
struct bpf_struct_ops {
|
||||
|
|
@ -575,6 +577,7 @@ struct bpf_map {
|
|||
bool autocreate;
|
||||
bool autoattach;
|
||||
__u64 map_extra;
|
||||
struct bpf_program *excl_prog;
|
||||
};
|
||||
|
||||
enum extern_type {
|
||||
|
|
@ -1013,35 +1016,33 @@ find_struct_ops_kern_types(struct bpf_object *obj, const char *tname_raw,
|
|||
const struct btf_member *kern_data_member;
|
||||
struct btf *btf = NULL;
|
||||
__s32 kern_vtype_id, kern_type_id;
|
||||
char tname[256];
|
||||
char tname[192], stname[256];
|
||||
__u32 i;
|
||||
|
||||
snprintf(tname, sizeof(tname), "%.*s",
|
||||
(int)bpf_core_essential_name_len(tname_raw), tname_raw);
|
||||
|
||||
kern_type_id = find_ksym_btf_id(obj, tname, BTF_KIND_STRUCT,
|
||||
&btf, mod_btf);
|
||||
if (kern_type_id < 0) {
|
||||
pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n",
|
||||
tname);
|
||||
return kern_type_id;
|
||||
}
|
||||
kern_type = btf__type_by_id(btf, kern_type_id);
|
||||
snprintf(stname, sizeof(stname), "%s%s", STRUCT_OPS_VALUE_PREFIX, tname);
|
||||
|
||||
/* Find the corresponding "map_value" type that will be used
|
||||
* in map_update(BPF_MAP_TYPE_STRUCT_OPS). For example,
|
||||
* find "struct bpf_struct_ops_tcp_congestion_ops" from the
|
||||
* btf_vmlinux.
|
||||
/* Look for the corresponding "map_value" type that will be used
|
||||
* in map_update(BPF_MAP_TYPE_STRUCT_OPS) first, figure out the btf
|
||||
* and the mod_btf.
|
||||
* For example, find "struct bpf_struct_ops_tcp_congestion_ops".
|
||||
*/
|
||||
kern_vtype_id = find_btf_by_prefix_kind(btf, STRUCT_OPS_VALUE_PREFIX,
|
||||
tname, BTF_KIND_STRUCT);
|
||||
kern_vtype_id = find_ksym_btf_id(obj, stname, BTF_KIND_STRUCT, &btf, mod_btf);
|
||||
if (kern_vtype_id < 0) {
|
||||
pr_warn("struct_ops init_kern: struct %s%s is not found in kernel BTF\n",
|
||||
STRUCT_OPS_VALUE_PREFIX, tname);
|
||||
pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n", stname);
|
||||
return kern_vtype_id;
|
||||
}
|
||||
kern_vtype = btf__type_by_id(btf, kern_vtype_id);
|
||||
|
||||
kern_type_id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT);
|
||||
if (kern_type_id < 0) {
|
||||
pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n", tname);
|
||||
return kern_type_id;
|
||||
}
|
||||
kern_type = btf__type_by_id(btf, kern_type_id);
|
||||
|
||||
/* Find "struct tcp_congestion_ops" from
|
||||
* struct bpf_struct_ops_tcp_congestion_ops {
|
||||
* [ ... ]
|
||||
|
|
@ -1054,8 +1055,8 @@ find_struct_ops_kern_types(struct bpf_object *obj, const char *tname_raw,
|
|||
break;
|
||||
}
|
||||
if (i == btf_vlen(kern_vtype)) {
|
||||
pr_warn("struct_ops init_kern: struct %s data is not found in struct %s%s\n",
|
||||
tname, STRUCT_OPS_VALUE_PREFIX, tname);
|
||||
pr_warn("struct_ops init_kern: struct %s data is not found in struct %s\n",
|
||||
tname, stname);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
|
|
@ -4485,6 +4486,44 @@ bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
|
|||
}
|
||||
}
|
||||
|
||||
static int bpf_prog_compute_hash(struct bpf_program *prog)
|
||||
{
|
||||
struct bpf_insn *purged;
|
||||
int i, err = 0;
|
||||
|
||||
purged = calloc(prog->insns_cnt, BPF_INSN_SZ);
|
||||
if (!purged)
|
||||
return -ENOMEM;
|
||||
|
||||
/* If relocations have been done, the map_fd needs to be
|
||||
* discarded for the digest calculation.
|
||||
*/
|
||||
for (i = 0; i < prog->insns_cnt; i++) {
|
||||
purged[i] = prog->insns[i];
|
||||
if (purged[i].code == (BPF_LD | BPF_IMM | BPF_DW) &&
|
||||
(purged[i].src_reg == BPF_PSEUDO_MAP_FD ||
|
||||
purged[i].src_reg == BPF_PSEUDO_MAP_VALUE)) {
|
||||
purged[i].imm = 0;
|
||||
i++;
|
||||
if (i >= prog->insns_cnt ||
|
||||
prog->insns[i].code != 0 ||
|
||||
prog->insns[i].dst_reg != 0 ||
|
||||
prog->insns[i].src_reg != 0 ||
|
||||
prog->insns[i].off != 0) {
|
||||
err = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
purged[i] = prog->insns[i];
|
||||
purged[i].imm = 0;
|
||||
}
|
||||
}
|
||||
libbpf_sha256(purged, prog->insns_cnt * sizeof(struct bpf_insn),
|
||||
prog->hash);
|
||||
out:
|
||||
free(purged);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int bpf_program__record_reloc(struct bpf_program *prog,
|
||||
struct reloc_desc *reloc_desc,
|
||||
__u32 insn_idx, const char *sym_name,
|
||||
|
|
@ -5093,6 +5132,16 @@ static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
|
|||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* bpf_get_map_info_by_fd() for DEVMAP will always return flags with
|
||||
* BPF_F_RDONLY_PROG set, but it generally is not set at map creation time.
|
||||
* Thus, ignore the BPF_F_RDONLY_PROG flag in the flags returned from
|
||||
* bpf_get_map_info_by_fd() when checking for compatibility with an
|
||||
* existing DEVMAP.
|
||||
*/
|
||||
if (map->def.type == BPF_MAP_TYPE_DEVMAP || map->def.type == BPF_MAP_TYPE_DEVMAP_HASH)
|
||||
map_info.map_flags &= ~BPF_F_RDONLY_PROG;
|
||||
|
||||
return (map_info.type == map->def.type &&
|
||||
map_info.key_size == map->def.key_size &&
|
||||
map_info.value_size == map->def.value_size &&
|
||||
|
|
@ -5224,6 +5273,14 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
|
|||
create_attr.token_fd = obj->token_fd;
|
||||
if (obj->token_fd)
|
||||
create_attr.map_flags |= BPF_F_TOKEN_FD;
|
||||
if (map->excl_prog) {
|
||||
err = bpf_prog_compute_hash(map->excl_prog);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
create_attr.excl_prog_hash = map->excl_prog->hash;
|
||||
create_attr.excl_prog_hash_size = SHA256_DIGEST_LENGTH;
|
||||
}
|
||||
|
||||
if (bpf_map__is_struct_ops(map)) {
|
||||
create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
|
||||
|
|
@ -10514,6 +10571,27 @@ int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
|
|||
return 0;
|
||||
}
|
||||
|
||||
int bpf_map__set_exclusive_program(struct bpf_map *map, struct bpf_program *prog)
|
||||
{
|
||||
if (map_is_created(map)) {
|
||||
pr_warn("exclusive programs must be set before map creation\n");
|
||||
return libbpf_err(-EINVAL);
|
||||
}
|
||||
|
||||
if (map->obj != prog->obj) {
|
||||
pr_warn("excl_prog and map must be from the same bpf object\n");
|
||||
return libbpf_err(-EINVAL);
|
||||
}
|
||||
|
||||
map->excl_prog = prog;
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct bpf_program *bpf_map__exclusive_program(struct bpf_map *map)
|
||||
{
|
||||
return map->excl_prog;
|
||||
}
|
||||
|
||||
static struct bpf_map *
|
||||
__bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
|
||||
{
|
||||
|
|
@ -14207,3 +14285,100 @@ void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
|
|||
free(s->progs);
|
||||
free(s);
|
||||
}
|
||||
|
||||
static inline __u32 ror32(__u32 v, int bits)
|
||||
{
|
||||
return (v >> bits) | (v << (32 - bits));
|
||||
}
|
||||
|
||||
#define SHA256_BLOCK_LENGTH 64
|
||||
#define Ch(x, y, z) (((x) & (y)) ^ (~(x) & (z)))
|
||||
#define Maj(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
|
||||
#define Sigma_0(x) (ror32((x), 2) ^ ror32((x), 13) ^ ror32((x), 22))
|
||||
#define Sigma_1(x) (ror32((x), 6) ^ ror32((x), 11) ^ ror32((x), 25))
|
||||
#define sigma_0(x) (ror32((x), 7) ^ ror32((x), 18) ^ ((x) >> 3))
|
||||
#define sigma_1(x) (ror32((x), 17) ^ ror32((x), 19) ^ ((x) >> 10))
|
||||
|
||||
static const __u32 sha256_K[64] = {
|
||||
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1,
|
||||
0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
|
||||
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, 0xe49b69c1, 0xefbe4786,
|
||||
0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
|
||||
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147,
|
||||
0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
|
||||
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 0xa2bfe8a1, 0xa81a664b,
|
||||
0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
|
||||
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a,
|
||||
0x5b9cca4f, 0x682e6ff3, 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
|
||||
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
|
||||
};
|
||||
|
||||
#define SHA256_ROUND(i, a, b, c, d, e, f, g, h) \
|
||||
{ \
|
||||
__u32 tmp = h + Sigma_1(e) + Ch(e, f, g) + sha256_K[i] + w[i]; \
|
||||
d += tmp; \
|
||||
h = tmp + Sigma_0(a) + Maj(a, b, c); \
|
||||
}
|
||||
|
||||
static void sha256_blocks(__u32 state[8], const __u8 *data, size_t nblocks)
|
||||
{
|
||||
while (nblocks--) {
|
||||
__u32 a = state[0];
|
||||
__u32 b = state[1];
|
||||
__u32 c = state[2];
|
||||
__u32 d = state[3];
|
||||
__u32 e = state[4];
|
||||
__u32 f = state[5];
|
||||
__u32 g = state[6];
|
||||
__u32 h = state[7];
|
||||
__u32 w[64];
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
w[i] = get_unaligned_be32(&data[4 * i]);
|
||||
for (; i < ARRAY_SIZE(w); i++)
|
||||
w[i] = sigma_1(w[i - 2]) + w[i - 7] +
|
||||
sigma_0(w[i - 15]) + w[i - 16];
|
||||
for (i = 0; i < ARRAY_SIZE(w); i += 8) {
|
||||
SHA256_ROUND(i + 0, a, b, c, d, e, f, g, h);
|
||||
SHA256_ROUND(i + 1, h, a, b, c, d, e, f, g);
|
||||
SHA256_ROUND(i + 2, g, h, a, b, c, d, e, f);
|
||||
SHA256_ROUND(i + 3, f, g, h, a, b, c, d, e);
|
||||
SHA256_ROUND(i + 4, e, f, g, h, a, b, c, d);
|
||||
SHA256_ROUND(i + 5, d, e, f, g, h, a, b, c);
|
||||
SHA256_ROUND(i + 6, c, d, e, f, g, h, a, b);
|
||||
SHA256_ROUND(i + 7, b, c, d, e, f, g, h, a);
|
||||
}
|
||||
state[0] += a;
|
||||
state[1] += b;
|
||||
state[2] += c;
|
||||
state[3] += d;
|
||||
state[4] += e;
|
||||
state[5] += f;
|
||||
state[6] += g;
|
||||
state[7] += h;
|
||||
data += SHA256_BLOCK_LENGTH;
|
||||
}
|
||||
}
|
||||
|
||||
void libbpf_sha256(const void *data, size_t len, __u8 out[SHA256_DIGEST_LENGTH])
|
||||
{
|
||||
__u32 state[8] = { 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
|
||||
0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 };
|
||||
const __be64 bitcount = cpu_to_be64((__u64)len * 8);
|
||||
__u8 final_data[2 * SHA256_BLOCK_LENGTH] = { 0 };
|
||||
size_t final_len = len % SHA256_BLOCK_LENGTH;
|
||||
int i;
|
||||
|
||||
sha256_blocks(state, data, len / SHA256_BLOCK_LENGTH);
|
||||
|
||||
memcpy(final_data, data + len - final_len, final_len);
|
||||
final_data[final_len] = 0x80;
|
||||
final_len = round_up(final_len + 9, SHA256_BLOCK_LENGTH);
|
||||
memcpy(&final_data[final_len - 8], &bitcount, 8);
|
||||
|
||||
sha256_blocks(state, final_data, final_len / SHA256_BLOCK_LENGTH);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(state); i++)
|
||||
put_unaligned_be32(state[i], &out[4 * i]);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,8 +24,25 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief **libbpf_major_version()** provides the major version of libbpf.
|
||||
* @return An integer, the major version number
|
||||
*/
|
||||
LIBBPF_API __u32 libbpf_major_version(void);
|
||||
|
||||
/**
|
||||
* @brief **libbpf_minor_version()** provides the minor version of libbpf.
|
||||
* @return An integer, the minor version number
|
||||
*/
|
||||
LIBBPF_API __u32 libbpf_minor_version(void);
|
||||
|
||||
/**
|
||||
* @brief **libbpf_version_string()** provides the version of libbpf in a
|
||||
* human-readable form, e.g., "v1.7".
|
||||
* @return Pointer to a static string containing the version
|
||||
*
|
||||
* The format is *not* a part of a stable API and may change in the future.
|
||||
*/
|
||||
LIBBPF_API const char *libbpf_version_string(void);
|
||||
|
||||
enum libbpf_errno {
|
||||
|
|
@ -49,6 +66,14 @@ enum libbpf_errno {
|
|||
__LIBBPF_ERRNO__END,
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief **libbpf_strerror()** converts the provided error code into a
|
||||
* human-readable string.
|
||||
* @param err The error code to convert
|
||||
* @param buf Pointer to a buffer where the error message will be stored
|
||||
* @param size The number of bytes in the buffer
|
||||
* @return 0, on success; negative error code, otherwise
|
||||
*/
|
||||
LIBBPF_API int libbpf_strerror(int err, char *buf, size_t size);
|
||||
|
||||
/**
|
||||
|
|
@ -252,7 +277,7 @@ bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
|
|||
* @return 0, on success; negative error code, otherwise, error code is
|
||||
* stored in errno
|
||||
*/
|
||||
int bpf_object__prepare(struct bpf_object *obj);
|
||||
LIBBPF_API int bpf_object__prepare(struct bpf_object *obj);
|
||||
|
||||
/**
|
||||
* @brief **bpf_object__load()** loads BPF object into kernel.
|
||||
|
|
@ -1266,6 +1291,28 @@ LIBBPF_API int bpf_map__lookup_and_delete_elem(const struct bpf_map *map,
|
|||
*/
|
||||
LIBBPF_API int bpf_map__get_next_key(const struct bpf_map *map,
|
||||
const void *cur_key, void *next_key, size_t key_sz);
|
||||
/**
|
||||
* @brief **bpf_map__set_exclusive_program()** sets a map to be exclusive to the
|
||||
* specified program. This must be called *before* the map is created.
|
||||
*
|
||||
* @param map BPF map to make exclusive.
|
||||
* @param prog BPF program to be the exclusive user of the map. Must belong
|
||||
* to the same bpf_object as the map.
|
||||
* @return 0 on success; a negative error code otherwise.
|
||||
*
|
||||
* This function must be called after the BPF object is opened but before
|
||||
* it is loaded. Once the object is loaded, only the specified program
|
||||
* will be able to access the map's contents.
|
||||
*/
|
||||
LIBBPF_API int bpf_map__set_exclusive_program(struct bpf_map *map, struct bpf_program *prog);
|
||||
|
||||
/**
|
||||
* @brief **bpf_map__exclusive_program()** returns the exclusive program
|
||||
* that is registered with the map (if any).
|
||||
* @param map BPF map to which the exclusive program is registered.
|
||||
* @return the registered exclusive program.
|
||||
*/
|
||||
LIBBPF_API struct bpf_program *bpf_map__exclusive_program(struct bpf_map *map);
|
||||
|
||||
struct bpf_xdp_set_link_opts {
|
||||
size_t sz;
|
||||
|
|
@ -1810,9 +1857,10 @@ struct gen_loader_opts {
|
|||
const char *insns;
|
||||
__u32 data_sz;
|
||||
__u32 insns_sz;
|
||||
bool gen_hash;
|
||||
};
|
||||
|
||||
#define gen_loader_opts__last_field insns_sz
|
||||
#define gen_loader_opts__last_field gen_hash
|
||||
LIBBPF_API int bpf_object__gen_loader(struct bpf_object *obj,
|
||||
struct gen_loader_opts *opts);
|
||||
|
||||
|
|
|
|||
|
|
@ -448,4 +448,7 @@ LIBBPF_1.6.0 {
|
|||
} LIBBPF_1.5.0;
|
||||
|
||||
LIBBPF_1.7.0 {
|
||||
global:
|
||||
bpf_map__set_exclusive_program;
|
||||
bpf_map__exclusive_program;
|
||||
} LIBBPF_1.6.0;
|
||||
|
|
|
|||
|
|
@ -736,4 +736,8 @@ int elf_resolve_pattern_offsets(const char *binary_path, const char *pattern,
|
|||
|
||||
int probe_fd(int fd);
|
||||
|
||||
#define SHA256_DIGEST_LENGTH 32
|
||||
#define SHA256_DWORD_SIZE SHA256_DIGEST_LENGTH / sizeof(__u64)
|
||||
|
||||
void libbpf_sha256(const void *data, size_t len, __u8 out[SHA256_DIGEST_LENGTH]);
|
||||
#endif /* __LIBBPF_LIBBPF_INTERNAL_H */
|
||||
|
|
|
|||
|
|
@ -13,10 +13,15 @@
|
|||
#include <unistd.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/mman.h>
|
||||
#include <linux/keyctl.h>
|
||||
#include <stdlib.h>
|
||||
#include "bpf.h"
|
||||
#endif
|
||||
|
||||
#ifndef SHA256_DIGEST_LENGTH
|
||||
#define SHA256_DIGEST_LENGTH 32
|
||||
#endif
|
||||
|
||||
#ifndef __NR_bpf
|
||||
# if defined(__mips__) && defined(_ABIO32)
|
||||
# define __NR_bpf 4355
|
||||
|
|
@ -64,6 +69,11 @@ struct bpf_load_and_run_opts {
|
|||
__u32 data_sz;
|
||||
__u32 insns_sz;
|
||||
const char *errstr;
|
||||
void *signature;
|
||||
__u32 signature_sz;
|
||||
__s32 keyring_id;
|
||||
void *excl_prog_hash;
|
||||
__u32 excl_prog_hash_sz;
|
||||
};
|
||||
|
||||
long kern_sys_bpf(__u32 cmd, void *attr, __u32 attr_size);
|
||||
|
|
@ -220,14 +230,19 @@ static inline int skel_map_create(enum bpf_map_type map_type,
|
|||
const char *map_name,
|
||||
__u32 key_size,
|
||||
__u32 value_size,
|
||||
__u32 max_entries)
|
||||
__u32 max_entries,
|
||||
const void *excl_prog_hash,
|
||||
__u32 excl_prog_hash_sz)
|
||||
{
|
||||
const size_t attr_sz = offsetofend(union bpf_attr, map_extra);
|
||||
const size_t attr_sz = offsetofend(union bpf_attr, excl_prog_hash_size);
|
||||
union bpf_attr attr;
|
||||
|
||||
memset(&attr, 0, attr_sz);
|
||||
|
||||
attr.map_type = map_type;
|
||||
attr.excl_prog_hash = (unsigned long) excl_prog_hash;
|
||||
attr.excl_prog_hash_size = excl_prog_hash_sz;
|
||||
|
||||
strncpy(attr.map_name, map_name, sizeof(attr.map_name));
|
||||
attr.key_size = key_size;
|
||||
attr.value_size = value_size;
|
||||
|
|
@ -300,6 +315,35 @@ static inline int skel_link_create(int prog_fd, int target_fd,
|
|||
return skel_sys_bpf(BPF_LINK_CREATE, &attr, attr_sz);
|
||||
}
|
||||
|
||||
static inline int skel_obj_get_info_by_fd(int fd)
|
||||
{
|
||||
const size_t attr_sz = offsetofend(union bpf_attr, info);
|
||||
__u8 sha[SHA256_DIGEST_LENGTH];
|
||||
struct bpf_map_info info;
|
||||
__u32 info_len = sizeof(info);
|
||||
union bpf_attr attr;
|
||||
|
||||
memset(&info, 0, sizeof(info));
|
||||
info.hash = (long) &sha;
|
||||
info.hash_size = SHA256_DIGEST_LENGTH;
|
||||
|
||||
memset(&attr, 0, attr_sz);
|
||||
attr.info.bpf_fd = fd;
|
||||
attr.info.info = (long) &info;
|
||||
attr.info.info_len = info_len;
|
||||
return skel_sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, attr_sz);
|
||||
}
|
||||
|
||||
static inline int skel_map_freeze(int fd)
|
||||
{
|
||||
const size_t attr_sz = offsetofend(union bpf_attr, map_fd);
|
||||
union bpf_attr attr;
|
||||
|
||||
memset(&attr, 0, attr_sz);
|
||||
attr.map_fd = fd;
|
||||
|
||||
return skel_sys_bpf(BPF_MAP_FREEZE, &attr, attr_sz);
|
||||
}
|
||||
#ifdef __KERNEL__
|
||||
#define set_err
|
||||
#else
|
||||
|
|
@ -308,12 +352,13 @@ static inline int skel_link_create(int prog_fd, int target_fd,
|
|||
|
||||
static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts)
|
||||
{
|
||||
const size_t prog_load_attr_sz = offsetofend(union bpf_attr, fd_array);
|
||||
const size_t prog_load_attr_sz = offsetofend(union bpf_attr, keyring_id);
|
||||
const size_t test_run_attr_sz = offsetofend(union bpf_attr, test);
|
||||
int map_fd = -1, prog_fd = -1, key = 0, err;
|
||||
union bpf_attr attr;
|
||||
|
||||
err = map_fd = skel_map_create(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, opts->data_sz, 1);
|
||||
err = map_fd = skel_map_create(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, opts->data_sz, 1,
|
||||
opts->excl_prog_hash, opts->excl_prog_hash_sz);
|
||||
if (map_fd < 0) {
|
||||
opts->errstr = "failed to create loader map";
|
||||
set_err;
|
||||
|
|
@ -327,11 +372,34 @@ static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts)
|
|||
goto out;
|
||||
}
|
||||
|
||||
#ifndef __KERNEL__
|
||||
err = skel_map_freeze(map_fd);
|
||||
if (err < 0) {
|
||||
opts->errstr = "failed to freeze map";
|
||||
set_err;
|
||||
goto out;
|
||||
}
|
||||
err = skel_obj_get_info_by_fd(map_fd);
|
||||
if (err < 0) {
|
||||
opts->errstr = "failed to fetch obj info";
|
||||
set_err;
|
||||
goto out;
|
||||
}
|
||||
#endif
|
||||
|
||||
memset(&attr, 0, prog_load_attr_sz);
|
||||
attr.prog_type = BPF_PROG_TYPE_SYSCALL;
|
||||
attr.insns = (long) opts->insns;
|
||||
attr.insn_cnt = opts->insns_sz / sizeof(struct bpf_insn);
|
||||
attr.license = (long) "Dual BSD/GPL";
|
||||
#ifndef __KERNEL__
|
||||
attr.signature = (long) opts->signature;
|
||||
attr.signature_size = opts->signature_sz;
|
||||
#else
|
||||
if (opts->signature || opts->signature_sz)
|
||||
pr_warn("signatures are not supported from bpf_preload\n");
|
||||
#endif
|
||||
attr.keyring_id = opts->keyring_id;
|
||||
memcpy(attr.prog_name, "__loader.prog", sizeof("__loader.prog"));
|
||||
attr.fd_array = (long) &map_fd;
|
||||
attr.log_level = opts->ctx->log_level;
|
||||
|
|
|
|||
|
|
@ -34,13 +34,32 @@ enum __bpf_usdt_arg_type {
|
|||
BPF_USDT_ARG_CONST,
|
||||
BPF_USDT_ARG_REG,
|
||||
BPF_USDT_ARG_REG_DEREF,
|
||||
BPF_USDT_ARG_SIB,
|
||||
};
|
||||
|
||||
/*
|
||||
* This struct layout is designed specifically to be backwards/forward
|
||||
* compatible between libbpf versions for ARG_CONST, ARG_REG, and
|
||||
* ARG_REG_DEREF modes. ARG_SIB requires libbpf v1.7+.
|
||||
*/
|
||||
struct __bpf_usdt_arg_spec {
|
||||
/* u64 scalar interpreted depending on arg_type, see below */
|
||||
__u64 val_off;
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
/* arg location case, see bpf_usdt_arg() for details */
|
||||
enum __bpf_usdt_arg_type arg_type;
|
||||
enum __bpf_usdt_arg_type arg_type: 8;
|
||||
/* index register offset within struct pt_regs */
|
||||
__u16 idx_reg_off: 12;
|
||||
/* scale factor for index register (1, 2, 4, or 8) */
|
||||
__u16 scale_bitshift: 4;
|
||||
/* reserved for future use, keeps reg_off offset stable */
|
||||
__u8 __reserved: 8;
|
||||
#else
|
||||
__u8 __reserved: 8;
|
||||
__u16 idx_reg_off: 12;
|
||||
__u16 scale_bitshift: 4;
|
||||
enum __bpf_usdt_arg_type arg_type: 8;
|
||||
#endif
|
||||
/* offset of referenced register within struct pt_regs */
|
||||
short reg_off;
|
||||
/* whether arg should be interpreted as signed value */
|
||||
|
|
@ -149,7 +168,7 @@ int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res)
|
|||
{
|
||||
struct __bpf_usdt_spec *spec;
|
||||
struct __bpf_usdt_arg_spec *arg_spec;
|
||||
unsigned long val;
|
||||
unsigned long val, idx;
|
||||
int err, spec_id;
|
||||
|
||||
*res = 0;
|
||||
|
|
@ -202,6 +221,27 @@ int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res)
|
|||
return err;
|
||||
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||
val >>= arg_spec->arg_bitshift;
|
||||
#endif
|
||||
break;
|
||||
case BPF_USDT_ARG_SIB:
|
||||
/* Arg is in memory addressed by SIB (Scale-Index-Base) mode
|
||||
* (e.g., "-1@-96(%rbp,%rax,8)" in USDT arg spec). We first
|
||||
* fetch the base register contents and the index register
|
||||
* contents from pt_regs. Then we calculate the final address
|
||||
* as base + (index * scale) + offset, and do a user-space
|
||||
* probe read to fetch the argument value.
|
||||
*/
|
||||
err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off);
|
||||
if (err)
|
||||
return err;
|
||||
err = bpf_probe_read_kernel(&idx, sizeof(idx), (void *)ctx + arg_spec->idx_reg_off);
|
||||
if (err)
|
||||
return err;
|
||||
err = bpf_probe_read_user(&val, sizeof(val), (void *)(val + (idx << arg_spec->scale_bitshift) + arg_spec->val_off));
|
||||
if (err)
|
||||
return err;
|
||||
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||
val >>= arg_spec->arg_bitshift;
|
||||
#endif
|
||||
break;
|
||||
default:
|
||||
|
|
|
|||
|
|
@ -200,12 +200,23 @@ enum usdt_arg_type {
|
|||
USDT_ARG_CONST,
|
||||
USDT_ARG_REG,
|
||||
USDT_ARG_REG_DEREF,
|
||||
USDT_ARG_SIB,
|
||||
};
|
||||
|
||||
/* should match exactly struct __bpf_usdt_arg_spec from usdt.bpf.h */
|
||||
struct usdt_arg_spec {
|
||||
__u64 val_off;
|
||||
enum usdt_arg_type arg_type;
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
enum usdt_arg_type arg_type: 8;
|
||||
__u16 idx_reg_off: 12;
|
||||
__u16 scale_bitshift: 4;
|
||||
__u8 __reserved: 8; /* keep reg_off offset stable */
|
||||
#else
|
||||
__u8 __reserved: 8; /* keep reg_off offset stable */
|
||||
__u16 idx_reg_off: 12;
|
||||
__u16 scale_bitshift: 4;
|
||||
enum usdt_arg_type arg_type: 8;
|
||||
#endif
|
||||
short reg_off;
|
||||
bool arg_signed;
|
||||
char arg_bitshift;
|
||||
|
|
@ -570,9 +581,8 @@ static struct elf_seg *find_vma_seg(struct elf_seg *segs, size_t seg_cnt, long o
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static int parse_usdt_note(Elf *elf, const char *path, GElf_Nhdr *nhdr,
|
||||
const char *data, size_t name_off, size_t desc_off,
|
||||
struct usdt_note *usdt_note);
|
||||
static int parse_usdt_note(GElf_Nhdr *nhdr, const char *data, size_t name_off,
|
||||
size_t desc_off, struct usdt_note *usdt_note);
|
||||
|
||||
static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, __u64 usdt_cookie);
|
||||
|
||||
|
|
@ -626,7 +636,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *
|
|||
struct elf_seg *seg = NULL;
|
||||
void *tmp;
|
||||
|
||||
err = parse_usdt_note(elf, path, &nhdr, data->d_buf, name_off, desc_off, ¬e);
|
||||
err = parse_usdt_note(&nhdr, data->d_buf, name_off, desc_off, ¬e);
|
||||
if (err)
|
||||
goto err_out;
|
||||
|
||||
|
|
@ -1132,8 +1142,7 @@ struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct
|
|||
/* Parse out USDT ELF note from '.note.stapsdt' section.
|
||||
* Logic inspired by perf's code.
|
||||
*/
|
||||
static int parse_usdt_note(Elf *elf, const char *path, GElf_Nhdr *nhdr,
|
||||
const char *data, size_t name_off, size_t desc_off,
|
||||
static int parse_usdt_note(GElf_Nhdr *nhdr, const char *data, size_t name_off, size_t desc_off,
|
||||
struct usdt_note *note)
|
||||
{
|
||||
const char *provider, *name, *args;
|
||||
|
|
@ -1283,11 +1292,51 @@ static int calc_pt_regs_off(const char *reg_name)
|
|||
|
||||
static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz)
|
||||
{
|
||||
char reg_name[16];
|
||||
int len, reg_off;
|
||||
long off;
|
||||
char reg_name[16] = {0}, idx_reg_name[16] = {0};
|
||||
int len, reg_off, idx_reg_off, scale = 1;
|
||||
long off = 0;
|
||||
|
||||
if (sscanf(arg_str, " %d @ %ld ( %%%15[^)] ) %n", arg_sz, &off, reg_name, &len) == 3) {
|
||||
if (sscanf(arg_str, " %d @ %ld ( %%%15[^,] , %%%15[^,] , %d ) %n",
|
||||
arg_sz, &off, reg_name, idx_reg_name, &scale, &len) == 5 ||
|
||||
sscanf(arg_str, " %d @ ( %%%15[^,] , %%%15[^,] , %d ) %n",
|
||||
arg_sz, reg_name, idx_reg_name, &scale, &len) == 4 ||
|
||||
sscanf(arg_str, " %d @ %ld ( %%%15[^,] , %%%15[^)] ) %n",
|
||||
arg_sz, &off, reg_name, idx_reg_name, &len) == 4 ||
|
||||
sscanf(arg_str, " %d @ ( %%%15[^,] , %%%15[^)] ) %n",
|
||||
arg_sz, reg_name, idx_reg_name, &len) == 3
|
||||
) {
|
||||
/*
|
||||
* Scale Index Base case:
|
||||
* 1@-96(%rbp,%rax,8)
|
||||
* 1@(%rbp,%rax,8)
|
||||
* 1@-96(%rbp,%rax)
|
||||
* 1@(%rbp,%rax)
|
||||
*/
|
||||
arg->arg_type = USDT_ARG_SIB;
|
||||
arg->val_off = off;
|
||||
|
||||
reg_off = calc_pt_regs_off(reg_name);
|
||||
if (reg_off < 0)
|
||||
return reg_off;
|
||||
arg->reg_off = reg_off;
|
||||
|
||||
idx_reg_off = calc_pt_regs_off(idx_reg_name);
|
||||
if (idx_reg_off < 0)
|
||||
return idx_reg_off;
|
||||
arg->idx_reg_off = idx_reg_off;
|
||||
|
||||
/* validate scale factor and set fields directly */
|
||||
switch (scale) {
|
||||
case 1: arg->scale_bitshift = 0; break;
|
||||
case 2: arg->scale_bitshift = 1; break;
|
||||
case 4: arg->scale_bitshift = 2; break;
|
||||
case 8: arg->scale_bitshift = 3; break;
|
||||
default:
|
||||
pr_warn("usdt: invalid SIB scale %d, expected 1, 2, 4, 8\n", scale);
|
||||
return -EINVAL;
|
||||
}
|
||||
} else if (sscanf(arg_str, " %d @ %ld ( %%%15[^)] ) %n",
|
||||
arg_sz, &off, reg_name, &len) == 3) {
|
||||
/* Memory dereference case, e.g., -4@-20(%rbp) */
|
||||
arg->arg_type = USDT_ARG_REG_DEREF;
|
||||
arg->val_off = off;
|
||||
|
|
@ -1306,6 +1355,7 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec
|
|||
} else if (sscanf(arg_str, " %d @ %%%15s %n", arg_sz, reg_name, &len) == 2) {
|
||||
/* Register read case, e.g., -4@%eax */
|
||||
arg->arg_type = USDT_ARG_REG;
|
||||
/* register read has no memory offset */
|
||||
arg->val_off = 0;
|
||||
|
||||
reg_off = calc_pt_regs_off(reg_name);
|
||||
|
|
|
|||
1
tools/testing/selftests/bpf/.gitignore
vendored
1
tools/testing/selftests/bpf/.gitignore
vendored
|
|
@ -44,3 +44,4 @@ xdp_redirect_multi
|
|||
xdp_synproxy
|
||||
xdp_hw_metadata
|
||||
xdp_features
|
||||
verification_cert.h
|
||||
|
|
|
|||
|
|
@ -2,4 +2,3 @@
|
|||
# Alphabetical order
|
||||
get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace)
|
||||
stacktrace_build_id # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2 (?)
|
||||
verifier_iterating_callbacks
|
||||
|
|
|
|||
|
|
@ -120,7 +120,7 @@ TEST_PROGS_EXTENDED := \
|
|||
test_bpftool.py
|
||||
|
||||
TEST_KMODS := bpf_testmod.ko bpf_test_no_cfi.ko bpf_test_modorder_x.ko \
|
||||
bpf_test_modorder_y.ko
|
||||
bpf_test_modorder_y.ko bpf_test_rqspinlock.ko
|
||||
TEST_KMOD_TARGETS = $(addprefix $(OUTPUT)/,$(TEST_KMODS))
|
||||
|
||||
# Compile but not part of 'make run_tests'
|
||||
|
|
@ -137,7 +137,7 @@ TEST_GEN_PROGS_EXTENDED = \
|
|||
xdping \
|
||||
xskxceiver
|
||||
|
||||
TEST_GEN_FILES += liburandom_read.so urandom_read sign-file uprobe_multi
|
||||
TEST_GEN_FILES += $(TEST_KMODS) liburandom_read.so urandom_read sign-file uprobe_multi
|
||||
|
||||
ifneq ($(V),1)
|
||||
submake_extras := feature_display=0
|
||||
|
|
@ -398,7 +398,7 @@ $(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
|
|||
DESTDIR=$(HOST_SCRATCH_DIR)/ prefix= all install_headers
|
||||
endif
|
||||
|
||||
# vmlinux.h is first dumped to a temprorary file and then compared to
|
||||
# vmlinux.h is first dumped to a temporary file and then compared to
|
||||
# the previous version. This helps to avoid unnecessary re-builds of
|
||||
# $(TRUNNER_BPF_OBJS)
|
||||
$(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL) | $(INCLUDE_DIR)
|
||||
|
|
@ -496,15 +496,16 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \
|
|||
test_subskeleton.skel.h test_subskeleton_lib.skel.h \
|
||||
test_usdt.skel.h
|
||||
|
||||
LSKELS := fentry_test.c fexit_test.c fexit_sleep.c atomics.c \
|
||||
trace_printk.c trace_vprintk.c map_ptr_kern.c \
|
||||
LSKELS := fexit_sleep.c trace_printk.c trace_vprintk.c map_ptr_kern.c \
|
||||
core_kern.c core_kern_overflow.c test_ringbuf.c \
|
||||
test_ringbuf_n.c test_ringbuf_map_key.c test_ringbuf_write.c
|
||||
|
||||
LSKELS_SIGNED := fentry_test.c fexit_test.c atomics.c
|
||||
|
||||
# Generate both light skeleton and libbpf skeleton for these
|
||||
LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test.c \
|
||||
kfunc_call_test_subprog.c
|
||||
SKEL_BLACKLIST += $$(LSKELS)
|
||||
SKEL_BLACKLIST += $$(LSKELS) $$(LSKELS_SIGNED)
|
||||
|
||||
test_static_linked.skel.h-deps := test_static_linked1.bpf.o test_static_linked2.bpf.o
|
||||
linked_funcs.skel.h-deps := linked_funcs1.bpf.o linked_funcs2.bpf.o
|
||||
|
|
@ -535,6 +536,7 @@ HEADERS_FOR_BPF_OBJS := $(wildcard $(BPFDIR)/*.bpf.h) \
|
|||
# $2 - test runner extra "flavor" (e.g., no_alu32, cpuv4, bpf_gcc, etc)
|
||||
define DEFINE_TEST_RUNNER
|
||||
|
||||
LSKEL_SIGN := -S -k $(PRIVATE_KEY) -i $(VERIFICATION_CERT)
|
||||
TRUNNER_OUTPUT := $(OUTPUT)$(if $2,/)$2
|
||||
TRUNNER_BINARY := $1$(if $2,-)$2
|
||||
TRUNNER_TEST_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.test.o, \
|
||||
|
|
@ -550,6 +552,7 @@ TRUNNER_BPF_SKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.skel.h, \
|
|||
$$(TRUNNER_BPF_SRCS)))
|
||||
TRUNNER_BPF_LSKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.lskel.h, $$(LSKELS) $$(LSKELS_EXTRA))
|
||||
TRUNNER_BPF_SKELS_LINKED := $$(addprefix $$(TRUNNER_OUTPUT)/,$(LINKED_SKELS))
|
||||
TRUNNER_BPF_LSKELS_SIGNED := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.lskel.h, $$(LSKELS_SIGNED))
|
||||
TEST_GEN_FILES += $$(TRUNNER_BPF_OBJS)
|
||||
|
||||
# Evaluate rules now with extra TRUNNER_XXX variables above already defined
|
||||
|
|
@ -604,6 +607,15 @@ $(TRUNNER_BPF_LSKELS): %.lskel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
|
|||
$(Q)$$(BPFTOOL) gen skeleton -L $$(<:.o=.llinked3.o) name $$(notdir $$(<:.bpf.o=_lskel)) > $$@
|
||||
$(Q)rm -f $$(<:.o=.llinked1.o) $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o)
|
||||
|
||||
$(TRUNNER_BPF_LSKELS_SIGNED): %.lskel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
|
||||
$$(call msg,GEN-SKEL,$(TRUNNER_BINARY) (signed),$$@)
|
||||
$(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked1.o) $$<
|
||||
$(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked2.o) $$(<:.o=.llinked1.o)
|
||||
$(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked3.o) $$(<:.o=.llinked2.o)
|
||||
$(Q)diff $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o)
|
||||
$(Q)$$(BPFTOOL) gen skeleton $(LSKEL_SIGN) $$(<:.o=.llinked3.o) name $$(notdir $$(<:.bpf.o=_lskel)) > $$@
|
||||
$(Q)rm -f $$(<:.o=.llinked1.o) $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o)
|
||||
|
||||
$(LINKED_BPF_OBJS): %: $(TRUNNER_OUTPUT)/%
|
||||
|
||||
# .SECONDEXPANSION here allows to correctly expand %-deps variables as prerequisites
|
||||
|
|
@ -653,6 +665,7 @@ $(TRUNNER_TEST_OBJS:.o=.d): $(TRUNNER_OUTPUT)/%.test.d: \
|
|||
$(TRUNNER_EXTRA_HDRS) \
|
||||
$(TRUNNER_BPF_SKELS) \
|
||||
$(TRUNNER_BPF_LSKELS) \
|
||||
$(TRUNNER_BPF_LSKELS_SIGNED) \
|
||||
$(TRUNNER_BPF_SKELS_LINKED) \
|
||||
$$(BPFOBJ) | $(TRUNNER_OUTPUT)
|
||||
|
||||
|
|
@ -667,6 +680,7 @@ $(foreach N,$(patsubst $(TRUNNER_OUTPUT)/%.o,%,$(TRUNNER_EXTRA_OBJS)), \
|
|||
$(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \
|
||||
%.c \
|
||||
$(TRUNNER_EXTRA_HDRS) \
|
||||
$(VERIFY_SIG_HDR) \
|
||||
$(TRUNNER_TESTS_HDR) \
|
||||
$$(BPFOBJ) | $(TRUNNER_OUTPUT)
|
||||
$$(call msg,EXT-OBJ,$(TRUNNER_BINARY),$$@)
|
||||
|
|
@ -697,6 +711,18 @@ $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \
|
|||
|
||||
endef
|
||||
|
||||
VERIFY_SIG_SETUP := $(CURDIR)/verify_sig_setup.sh
|
||||
VERIFY_SIG_HDR := verification_cert.h
|
||||
VERIFICATION_CERT := $(BUILD_DIR)/signing_key.der
|
||||
PRIVATE_KEY := $(BUILD_DIR)/signing_key.pem
|
||||
|
||||
$(VERIFICATION_CERT) $(PRIVATE_KEY): $(VERIFY_SIG_SETUP)
|
||||
$(Q)mkdir -p $(BUILD_DIR)
|
||||
$(Q)$(VERIFY_SIG_SETUP) genkey $(BUILD_DIR)
|
||||
|
||||
$(VERIFY_SIG_HDR): $(VERIFICATION_CERT)
|
||||
$(Q)xxd -i -n test_progs_verification_cert $< > $@
|
||||
|
||||
# Define test_progs test runner.
|
||||
TRUNNER_TESTS_DIR := prog_tests
|
||||
TRUNNER_BPF_PROGS_DIR := progs
|
||||
|
|
@ -716,6 +742,7 @@ TRUNNER_EXTRA_SOURCES := test_progs.c \
|
|||
disasm.c \
|
||||
disasm_helpers.c \
|
||||
json_writer.c \
|
||||
$(VERIFY_SIG_HDR) \
|
||||
flow_dissector_load.h \
|
||||
ip_check_defrag_frags.h
|
||||
TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \
|
||||
|
|
@ -725,7 +752,7 @@ TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \
|
|||
$(OUTPUT)/uprobe_multi \
|
||||
$(TEST_KMOD_TARGETS) \
|
||||
ima_setup.sh \
|
||||
verify_sig_setup.sh \
|
||||
$(VERIFY_SIG_SETUP) \
|
||||
$(wildcard progs/btf_dump_test_case_*.c) \
|
||||
$(wildcard progs/*.bpf.o)
|
||||
TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
|
||||
|
|
@ -816,6 +843,7 @@ $(OUTPUT)/bench_bpf_hashmap_lookup.o: $(OUTPUT)/bpf_hashmap_lookup.skel.h
|
|||
$(OUTPUT)/bench_htab_mem.o: $(OUTPUT)/htab_mem_bench.skel.h
|
||||
$(OUTPUT)/bench_bpf_crypto.o: $(OUTPUT)/crypto_bench.skel.h
|
||||
$(OUTPUT)/bench_sockmap.o: $(OUTPUT)/bench_sockmap_prog.skel.h
|
||||
$(OUTPUT)/bench_lpm_trie_map.o: $(OUTPUT)/lpm_trie_bench.skel.h $(OUTPUT)/lpm_trie_map.skel.h
|
||||
$(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ)
|
||||
$(OUTPUT)/bench: LDLIBS += -lm
|
||||
$(OUTPUT)/bench: $(OUTPUT)/bench.o \
|
||||
|
|
@ -837,6 +865,7 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \
|
|||
$(OUTPUT)/bench_htab_mem.o \
|
||||
$(OUTPUT)/bench_bpf_crypto.o \
|
||||
$(OUTPUT)/bench_sockmap.o \
|
||||
$(OUTPUT)/bench_lpm_trie_map.o \
|
||||
#
|
||||
$(call msg,BINARY,,$@)
|
||||
$(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@
|
||||
|
|
|
|||
|
|
@ -284,6 +284,7 @@ extern struct argp bench_htab_mem_argp;
|
|||
extern struct argp bench_trigger_batch_argp;
|
||||
extern struct argp bench_crypto_argp;
|
||||
extern struct argp bench_sockmap_argp;
|
||||
extern struct argp bench_lpm_trie_map_argp;
|
||||
|
||||
static const struct argp_child bench_parsers[] = {
|
||||
{ &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 },
|
||||
|
|
@ -299,6 +300,7 @@ static const struct argp_child bench_parsers[] = {
|
|||
{ &bench_trigger_batch_argp, 0, "BPF triggering benchmark", 0 },
|
||||
{ &bench_crypto_argp, 0, "bpf crypto benchmark", 0 },
|
||||
{ &bench_sockmap_argp, 0, "bpf sockmap benchmark", 0 },
|
||||
{ &bench_lpm_trie_map_argp, 0, "LPM trie map benchmark", 0 },
|
||||
{},
|
||||
};
|
||||
|
||||
|
|
@ -499,7 +501,7 @@ extern const struct bench bench_rename_rawtp;
|
|||
extern const struct bench bench_rename_fentry;
|
||||
extern const struct bench bench_rename_fexit;
|
||||
|
||||
/* pure counting benchmarks to establish theoretical lmits */
|
||||
/* pure counting benchmarks to establish theoretical limits */
|
||||
extern const struct bench bench_trig_usermode_count;
|
||||
extern const struct bench bench_trig_syscall_count;
|
||||
extern const struct bench bench_trig_kernel_count;
|
||||
|
|
@ -510,6 +512,8 @@ extern const struct bench bench_trig_kretprobe;
|
|||
extern const struct bench bench_trig_kprobe_multi;
|
||||
extern const struct bench bench_trig_kretprobe_multi;
|
||||
extern const struct bench bench_trig_fentry;
|
||||
extern const struct bench bench_trig_kprobe_multi_all;
|
||||
extern const struct bench bench_trig_kretprobe_multi_all;
|
||||
extern const struct bench bench_trig_fexit;
|
||||
extern const struct bench bench_trig_fmodret;
|
||||
extern const struct bench bench_trig_tp;
|
||||
|
|
@ -558,6 +562,13 @@ extern const struct bench bench_htab_mem;
|
|||
extern const struct bench bench_crypto_encrypt;
|
||||
extern const struct bench bench_crypto_decrypt;
|
||||
extern const struct bench bench_sockmap;
|
||||
extern const struct bench bench_lpm_trie_noop;
|
||||
extern const struct bench bench_lpm_trie_baseline;
|
||||
extern const struct bench bench_lpm_trie_lookup;
|
||||
extern const struct bench bench_lpm_trie_insert;
|
||||
extern const struct bench bench_lpm_trie_update;
|
||||
extern const struct bench bench_lpm_trie_delete;
|
||||
extern const struct bench bench_lpm_trie_free;
|
||||
|
||||
static const struct bench *benchs[] = {
|
||||
&bench_count_global,
|
||||
|
|
@ -578,6 +589,8 @@ static const struct bench *benchs[] = {
|
|||
&bench_trig_kprobe_multi,
|
||||
&bench_trig_kretprobe_multi,
|
||||
&bench_trig_fentry,
|
||||
&bench_trig_kprobe_multi_all,
|
||||
&bench_trig_kretprobe_multi_all,
|
||||
&bench_trig_fexit,
|
||||
&bench_trig_fmodret,
|
||||
&bench_trig_tp,
|
||||
|
|
@ -625,6 +638,13 @@ static const struct bench *benchs[] = {
|
|||
&bench_crypto_encrypt,
|
||||
&bench_crypto_decrypt,
|
||||
&bench_sockmap,
|
||||
&bench_lpm_trie_noop,
|
||||
&bench_lpm_trie_baseline,
|
||||
&bench_lpm_trie_lookup,
|
||||
&bench_lpm_trie_insert,
|
||||
&bench_lpm_trie_update,
|
||||
&bench_lpm_trie_delete,
|
||||
&bench_lpm_trie_free,
|
||||
};
|
||||
|
||||
static void find_benchmark(void)
|
||||
|
|
|
|||
|
|
@ -46,6 +46,7 @@ struct bench_res {
|
|||
unsigned long gp_ns;
|
||||
unsigned long gp_ct;
|
||||
unsigned int stime;
|
||||
unsigned long duration_ns;
|
||||
};
|
||||
|
||||
struct bench {
|
||||
|
|
|
|||
555
tools/testing/selftests/bpf/benchs/bench_lpm_trie_map.c
Normal file
555
tools/testing/selftests/bpf/benchs/bench_lpm_trie_map.c
Normal file
|
|
@ -0,0 +1,555 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2025 Cloudflare */
|
||||
|
||||
/*
|
||||
* All of these benchmarks operate on tries with keys in the range
|
||||
* [0, args.nr_entries), i.e. there are no gaps or partially filled
|
||||
* branches of the trie for any key < args.nr_entries.
|
||||
*
|
||||
* This gives an idea of worst-case behaviour.
|
||||
*/
|
||||
|
||||
#include <argp.h>
|
||||
#include <linux/time64.h>
|
||||
#include <linux/if_ether.h>
|
||||
#include "lpm_trie_bench.skel.h"
|
||||
#include "lpm_trie_map.skel.h"
|
||||
#include "bench.h"
|
||||
#include "testing_helpers.h"
|
||||
#include "progs/lpm_trie.h"
|
||||
|
||||
static struct ctx {
|
||||
struct lpm_trie_bench *bench;
|
||||
} ctx;
|
||||
|
||||
static struct {
|
||||
__u32 nr_entries;
|
||||
__u32 prefixlen;
|
||||
bool random;
|
||||
} args = {
|
||||
.nr_entries = 0,
|
||||
.prefixlen = 32,
|
||||
.random = false,
|
||||
};
|
||||
|
||||
enum {
|
||||
ARG_NR_ENTRIES = 9000,
|
||||
ARG_PREFIX_LEN,
|
||||
ARG_RANDOM,
|
||||
};
|
||||
|
||||
static const struct argp_option opts[] = {
|
||||
{ "nr_entries", ARG_NR_ENTRIES, "NR_ENTRIES", 0,
|
||||
"Number of unique entries in the LPM trie" },
|
||||
{ "prefix_len", ARG_PREFIX_LEN, "PREFIX_LEN", 0,
|
||||
"Number of prefix bits to use in the LPM trie" },
|
||||
{ "random", ARG_RANDOM, NULL, 0, "Access random keys during op" },
|
||||
{},
|
||||
};
|
||||
|
||||
static error_t lpm_parse_arg(int key, char *arg, struct argp_state *state)
|
||||
{
|
||||
long ret;
|
||||
|
||||
switch (key) {
|
||||
case ARG_NR_ENTRIES:
|
||||
ret = strtol(arg, NULL, 10);
|
||||
if (ret < 1 || ret > UINT_MAX) {
|
||||
fprintf(stderr, "Invalid nr_entries count.");
|
||||
argp_usage(state);
|
||||
}
|
||||
args.nr_entries = ret;
|
||||
break;
|
||||
case ARG_PREFIX_LEN:
|
||||
ret = strtol(arg, NULL, 10);
|
||||
if (ret < 1 || ret > UINT_MAX) {
|
||||
fprintf(stderr, "Invalid prefix_len value.");
|
||||
argp_usage(state);
|
||||
}
|
||||
args.prefixlen = ret;
|
||||
break;
|
||||
case ARG_RANDOM:
|
||||
args.random = true;
|
||||
break;
|
||||
default:
|
||||
return ARGP_ERR_UNKNOWN;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct argp bench_lpm_trie_map_argp = {
|
||||
.options = opts,
|
||||
.parser = lpm_parse_arg,
|
||||
};
|
||||
|
||||
static void validate_common(void)
|
||||
{
|
||||
if (env.consumer_cnt != 0) {
|
||||
fprintf(stderr, "benchmark doesn't support consumer\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (args.nr_entries == 0) {
|
||||
fprintf(stderr, "Missing --nr_entries parameter\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if ((1UL << args.prefixlen) < args.nr_entries) {
|
||||
fprintf(stderr, "prefix_len value too small for nr_entries\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
static void lpm_insert_validate(void)
|
||||
{
|
||||
validate_common();
|
||||
|
||||
if (env.producer_cnt != 1) {
|
||||
fprintf(stderr, "lpm-trie-insert requires a single producer\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (args.random) {
|
||||
fprintf(stderr, "lpm-trie-insert does not support --random\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
static void lpm_delete_validate(void)
|
||||
{
|
||||
validate_common();
|
||||
|
||||
if (env.producer_cnt != 1) {
|
||||
fprintf(stderr, "lpm-trie-delete requires a single producer\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (args.random) {
|
||||
fprintf(stderr, "lpm-trie-delete does not support --random\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
static void lpm_free_validate(void)
|
||||
{
|
||||
validate_common();
|
||||
|
||||
if (env.producer_cnt != 1) {
|
||||
fprintf(stderr, "lpm-trie-free requires a single producer\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (args.random) {
|
||||
fprintf(stderr, "lpm-trie-free does not support --random\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
static struct trie_key *keys;
|
||||
static __u32 *vals;
|
||||
|
||||
static void fill_map(int map_fd)
|
||||
{
|
||||
int err;
|
||||
|
||||
DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
|
||||
.elem_flags = 0,
|
||||
.flags = 0,
|
||||
);
|
||||
|
||||
err = bpf_map_update_batch(map_fd, keys, vals, &args.nr_entries, &opts);
|
||||
if (err) {
|
||||
fprintf(stderr, "failed to batch update keys to map: %d\n",
|
||||
-err);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
static void empty_map(int map_fd)
|
||||
{
|
||||
int err;
|
||||
|
||||
DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
|
||||
.elem_flags = 0,
|
||||
.flags = 0,
|
||||
);
|
||||
|
||||
err = bpf_map_delete_batch(map_fd, keys, &args.nr_entries, &opts);
|
||||
if (err) {
|
||||
fprintf(stderr, "failed to batch delete keys for map: %d\n",
|
||||
-err);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
static void attach_prog(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
ctx.bench = lpm_trie_bench__open_and_load();
|
||||
if (!ctx.bench) {
|
||||
fprintf(stderr, "failed to open skeleton\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
ctx.bench->bss->nr_entries = args.nr_entries;
|
||||
ctx.bench->bss->prefixlen = args.prefixlen;
|
||||
ctx.bench->bss->random = args.random;
|
||||
|
||||
if (lpm_trie_bench__attach(ctx.bench)) {
|
||||
fprintf(stderr, "failed to attach skeleton\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
keys = calloc(args.nr_entries, sizeof(*keys));
|
||||
vals = calloc(args.nr_entries, sizeof(*vals));
|
||||
|
||||
for (i = 0; i < args.nr_entries; i++) {
|
||||
struct trie_key *k = &keys[i];
|
||||
__u32 *v = &vals[i];
|
||||
|
||||
k->prefixlen = args.prefixlen;
|
||||
k->data = i;
|
||||
*v = 1;
|
||||
}
|
||||
}
|
||||
|
||||
static void attach_prog_and_fill_map(void)
|
||||
{
|
||||
int fd;
|
||||
|
||||
attach_prog();
|
||||
|
||||
fd = bpf_map__fd(ctx.bench->maps.trie_map);
|
||||
fill_map(fd);
|
||||
}
|
||||
|
||||
static void lpm_noop_setup(void)
|
||||
{
|
||||
attach_prog();
|
||||
ctx.bench->bss->op = LPM_OP_NOOP;
|
||||
}
|
||||
|
||||
static void lpm_baseline_setup(void)
|
||||
{
|
||||
attach_prog();
|
||||
ctx.bench->bss->op = LPM_OP_BASELINE;
|
||||
}
|
||||
|
||||
static void lpm_lookup_setup(void)
|
||||
{
|
||||
attach_prog_and_fill_map();
|
||||
ctx.bench->bss->op = LPM_OP_LOOKUP;
|
||||
}
|
||||
|
||||
static void lpm_insert_setup(void)
|
||||
{
|
||||
attach_prog();
|
||||
ctx.bench->bss->op = LPM_OP_INSERT;
|
||||
}
|
||||
|
||||
static void lpm_update_setup(void)
|
||||
{
|
||||
attach_prog_and_fill_map();
|
||||
ctx.bench->bss->op = LPM_OP_UPDATE;
|
||||
}
|
||||
|
||||
static void lpm_delete_setup(void)
|
||||
{
|
||||
attach_prog_and_fill_map();
|
||||
ctx.bench->bss->op = LPM_OP_DELETE;
|
||||
}
|
||||
|
||||
static void lpm_free_setup(void)
|
||||
{
|
||||
attach_prog();
|
||||
ctx.bench->bss->op = LPM_OP_FREE;
|
||||
}
|
||||
|
||||
static void lpm_measure(struct bench_res *res)
|
||||
{
|
||||
res->hits = atomic_swap(&ctx.bench->bss->hits, 0);
|
||||
res->duration_ns = atomic_swap(&ctx.bench->bss->duration_ns, 0);
|
||||
}
|
||||
|
||||
static void bench_reinit_map(void)
|
||||
{
|
||||
int fd = bpf_map__fd(ctx.bench->maps.trie_map);
|
||||
|
||||
switch (ctx.bench->bss->op) {
|
||||
case LPM_OP_INSERT:
|
||||
/* trie_map needs to be emptied */
|
||||
empty_map(fd);
|
||||
break;
|
||||
case LPM_OP_DELETE:
|
||||
/* trie_map needs to be refilled */
|
||||
fill_map(fd);
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "Unexpected REINIT return code for op %d\n",
|
||||
ctx.bench->bss->op);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
/* For NOOP, BASELINE, LOOKUP, INSERT, UPDATE, and DELETE */
|
||||
static void *lpm_producer(void *unused __always_unused)
|
||||
{
|
||||
int err;
|
||||
char in[ETH_HLEN]; /* unused */
|
||||
|
||||
LIBBPF_OPTS(bpf_test_run_opts, opts, .data_in = in,
|
||||
.data_size_in = sizeof(in), .repeat = 1, );
|
||||
|
||||
while (true) {
|
||||
int fd = bpf_program__fd(ctx.bench->progs.run_bench);
|
||||
err = bpf_prog_test_run_opts(fd, &opts);
|
||||
if (err) {
|
||||
fprintf(stderr, "failed to run BPF prog: %d\n", err);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* Check for kernel error code */
|
||||
if ((int)opts.retval < 0) {
|
||||
fprintf(stderr, "BPF prog returned error: %d\n",
|
||||
opts.retval);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
switch (opts.retval) {
|
||||
case LPM_BENCH_SUCCESS:
|
||||
break;
|
||||
case LPM_BENCH_REINIT_MAP:
|
||||
bench_reinit_map();
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "Unexpected BPF prog return code %d for op %d\n",
|
||||
opts.retval, ctx.bench->bss->op);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void *lpm_free_producer(void *unused __always_unused)
|
||||
{
|
||||
while (true) {
|
||||
struct lpm_trie_map *skel;
|
||||
|
||||
skel = lpm_trie_map__open_and_load();
|
||||
if (!skel) {
|
||||
fprintf(stderr, "failed to open skeleton\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
fill_map(bpf_map__fd(skel->maps.trie_free_map));
|
||||
lpm_trie_map__destroy(skel);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* The standard bench op_report_*() functions assume measurements are
|
||||
* taken over a 1-second interval but operations that modify the map
|
||||
* (INSERT, DELETE, and FREE) cannot run indefinitely without
|
||||
* "resetting" the map to the initial state. Depending on the size of
|
||||
* the map, this likely needs to happen before the 1-second timer fires.
|
||||
*
|
||||
* Calculate the fraction of a second over which the op measurement was
|
||||
* taken (to ignore any time spent doing the reset) and report the
|
||||
* throughput results per second.
|
||||
*/
|
||||
static void frac_second_report_progress(int iter, struct bench_res *res,
|
||||
long delta_ns, double rate_divisor,
|
||||
char rate)
|
||||
{
|
||||
double hits_per_sec, hits_per_prod;
|
||||
|
||||
hits_per_sec = res->hits / rate_divisor /
|
||||
(res->duration_ns / (double)NSEC_PER_SEC);
|
||||
hits_per_prod = hits_per_sec / env.producer_cnt;
|
||||
|
||||
printf("Iter %3d (%7.3lfus): ", iter,
|
||||
(delta_ns - NSEC_PER_SEC) / 1000.0);
|
||||
printf("hits %8.3lf%c/s (%7.3lf%c/prod)\n", hits_per_sec, rate,
|
||||
hits_per_prod, rate);
|
||||
}
|
||||
|
||||
static void frac_second_report_final(struct bench_res res[], int res_cnt,
|
||||
double lat_divisor, double rate_divisor,
|
||||
char rate, const char *unit)
|
||||
{
|
||||
double hits_mean = 0.0, hits_stddev = 0.0;
|
||||
double latency = 0.0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < res_cnt; i++) {
|
||||
double val = res[i].hits / rate_divisor /
|
||||
(res[i].duration_ns / (double)NSEC_PER_SEC);
|
||||
hits_mean += val / (0.0 + res_cnt);
|
||||
latency += res[i].duration_ns / res[i].hits / (0.0 + res_cnt);
|
||||
}
|
||||
|
||||
if (res_cnt > 1) {
|
||||
for (i = 0; i < res_cnt; i++) {
|
||||
double val =
|
||||
res[i].hits / rate_divisor /
|
||||
(res[i].duration_ns / (double)NSEC_PER_SEC);
|
||||
hits_stddev += (hits_mean - val) * (hits_mean - val) /
|
||||
(res_cnt - 1.0);
|
||||
}
|
||||
|
||||
hits_stddev = sqrt(hits_stddev);
|
||||
}
|
||||
printf("Summary: throughput %8.3lf \u00B1 %5.3lf %c ops/s (%7.3lf%c ops/prod), ",
|
||||
hits_mean, hits_stddev, rate, hits_mean / env.producer_cnt,
|
||||
rate);
|
||||
printf("latency %8.3lf %s/op\n",
|
||||
latency / lat_divisor / env.producer_cnt, unit);
|
||||
}
|
||||
|
||||
static void insert_ops_report_progress(int iter, struct bench_res *res,
|
||||
long delta_ns)
|
||||
{
|
||||
double rate_divisor = 1000000.0;
|
||||
char rate = 'M';
|
||||
|
||||
frac_second_report_progress(iter, res, delta_ns, rate_divisor, rate);
|
||||
}
|
||||
|
||||
static void delete_ops_report_progress(int iter, struct bench_res *res,
|
||||
long delta_ns)
|
||||
{
|
||||
double rate_divisor = 1000000.0;
|
||||
char rate = 'M';
|
||||
|
||||
frac_second_report_progress(iter, res, delta_ns, rate_divisor, rate);
|
||||
}
|
||||
|
||||
static void free_ops_report_progress(int iter, struct bench_res *res,
|
||||
long delta_ns)
|
||||
{
|
||||
double rate_divisor = 1000.0;
|
||||
char rate = 'K';
|
||||
|
||||
frac_second_report_progress(iter, res, delta_ns, rate_divisor, rate);
|
||||
}
|
||||
|
||||
static void insert_ops_report_final(struct bench_res res[], int res_cnt)
|
||||
{
|
||||
double lat_divisor = 1.0;
|
||||
double rate_divisor = 1000000.0;
|
||||
const char *unit = "ns";
|
||||
char rate = 'M';
|
||||
|
||||
frac_second_report_final(res, res_cnt, lat_divisor, rate_divisor, rate,
|
||||
unit);
|
||||
}
|
||||
|
||||
static void delete_ops_report_final(struct bench_res res[], int res_cnt)
|
||||
{
|
||||
double lat_divisor = 1.0;
|
||||
double rate_divisor = 1000000.0;
|
||||
const char *unit = "ns";
|
||||
char rate = 'M';
|
||||
|
||||
frac_second_report_final(res, res_cnt, lat_divisor, rate_divisor, rate,
|
||||
unit);
|
||||
}
|
||||
|
||||
static void free_ops_report_final(struct bench_res res[], int res_cnt)
|
||||
{
|
||||
double lat_divisor = 1000000.0;
|
||||
double rate_divisor = 1000.0;
|
||||
const char *unit = "ms";
|
||||
char rate = 'K';
|
||||
|
||||
frac_second_report_final(res, res_cnt, lat_divisor, rate_divisor, rate,
|
||||
unit);
|
||||
}
|
||||
|
||||
/* noop bench measures harness-overhead */
|
||||
const struct bench bench_lpm_trie_noop = {
|
||||
.name = "lpm-trie-noop",
|
||||
.argp = &bench_lpm_trie_map_argp,
|
||||
.validate = validate_common,
|
||||
.setup = lpm_noop_setup,
|
||||
.producer_thread = lpm_producer,
|
||||
.measure = lpm_measure,
|
||||
.report_progress = ops_report_progress,
|
||||
.report_final = ops_report_final,
|
||||
};
|
||||
|
||||
/* baseline overhead for lookup and update */
|
||||
const struct bench bench_lpm_trie_baseline = {
|
||||
.name = "lpm-trie-baseline",
|
||||
.argp = &bench_lpm_trie_map_argp,
|
||||
.validate = validate_common,
|
||||
.setup = lpm_baseline_setup,
|
||||
.producer_thread = lpm_producer,
|
||||
.measure = lpm_measure,
|
||||
.report_progress = ops_report_progress,
|
||||
.report_final = ops_report_final,
|
||||
};
|
||||
|
||||
/* measure cost of doing a lookup on existing entries in a full trie */
|
||||
const struct bench bench_lpm_trie_lookup = {
|
||||
.name = "lpm-trie-lookup",
|
||||
.argp = &bench_lpm_trie_map_argp,
|
||||
.validate = validate_common,
|
||||
.setup = lpm_lookup_setup,
|
||||
.producer_thread = lpm_producer,
|
||||
.measure = lpm_measure,
|
||||
.report_progress = ops_report_progress,
|
||||
.report_final = ops_report_final,
|
||||
};
|
||||
|
||||
/* measure cost of inserting new entries into an empty trie */
|
||||
const struct bench bench_lpm_trie_insert = {
|
||||
.name = "lpm-trie-insert",
|
||||
.argp = &bench_lpm_trie_map_argp,
|
||||
.validate = lpm_insert_validate,
|
||||
.setup = lpm_insert_setup,
|
||||
.producer_thread = lpm_producer,
|
||||
.measure = lpm_measure,
|
||||
.report_progress = insert_ops_report_progress,
|
||||
.report_final = insert_ops_report_final,
|
||||
};
|
||||
|
||||
/* measure cost of updating existing entries in a full trie */
|
||||
const struct bench bench_lpm_trie_update = {
|
||||
.name = "lpm-trie-update",
|
||||
.argp = &bench_lpm_trie_map_argp,
|
||||
.validate = validate_common,
|
||||
.setup = lpm_update_setup,
|
||||
.producer_thread = lpm_producer,
|
||||
.measure = lpm_measure,
|
||||
.report_progress = ops_report_progress,
|
||||
.report_final = ops_report_final,
|
||||
};
|
||||
|
||||
/* measure cost of deleting existing entries from a full trie */
|
||||
const struct bench bench_lpm_trie_delete = {
|
||||
.name = "lpm-trie-delete",
|
||||
.argp = &bench_lpm_trie_map_argp,
|
||||
.validate = lpm_delete_validate,
|
||||
.setup = lpm_delete_setup,
|
||||
.producer_thread = lpm_producer,
|
||||
.measure = lpm_measure,
|
||||
.report_progress = delete_ops_report_progress,
|
||||
.report_final = delete_ops_report_final,
|
||||
};
|
||||
|
||||
/* measure cost of freeing a full trie */
|
||||
const struct bench bench_lpm_trie_free = {
|
||||
.name = "lpm-trie-free",
|
||||
.argp = &bench_lpm_trie_map_argp,
|
||||
.validate = lpm_free_validate,
|
||||
.setup = lpm_free_setup,
|
||||
.producer_thread = lpm_free_producer,
|
||||
.measure = lpm_measure,
|
||||
.report_progress = free_ops_report_progress,
|
||||
.report_final = free_ops_report_final,
|
||||
};
|
||||
|
|
@ -10,6 +10,7 @@
|
|||
#include <argp.h>
|
||||
#include "bench.h"
|
||||
#include "bench_sockmap_prog.skel.h"
|
||||
#include "bpf_util.h"
|
||||
|
||||
#define FILE_SIZE (128 * 1024)
|
||||
#define DATA_REPEAT_SIZE 10
|
||||
|
|
@ -124,8 +125,8 @@ static void bench_sockmap_prog_destroy(void)
|
|||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < sizeof(ctx.fds); i++) {
|
||||
if (ctx.fds[0] > 0)
|
||||
for (i = 0; i < ARRAY_SIZE(ctx.fds); i++) {
|
||||
if (ctx.fds[i] > 0)
|
||||
close(ctx.fds[i]);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -226,6 +226,65 @@ static void trigger_fentry_setup(void)
|
|||
attach_bpf(ctx.skel->progs.bench_trigger_fentry);
|
||||
}
|
||||
|
||||
static void attach_ksyms_all(struct bpf_program *empty, bool kretprobe)
|
||||
{
|
||||
LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
|
||||
char **syms = NULL;
|
||||
size_t cnt = 0;
|
||||
|
||||
/* Some recursive functions will be skipped in
|
||||
* bpf_get_ksyms -> skip_entry, as they can introduce sufficient
|
||||
* overhead. However, it's difficut to skip all the recursive
|
||||
* functions for a debug kernel.
|
||||
*
|
||||
* So, don't run the kprobe-multi-all and kretprobe-multi-all on
|
||||
* a debug kernel.
|
||||
*/
|
||||
if (bpf_get_ksyms(&syms, &cnt, true)) {
|
||||
fprintf(stderr, "failed to get ksyms\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
opts.syms = (const char **) syms;
|
||||
opts.cnt = cnt;
|
||||
opts.retprobe = kretprobe;
|
||||
/* attach empty to all the kernel functions except bpf_get_numa_node_id. */
|
||||
if (!bpf_program__attach_kprobe_multi_opts(empty, NULL, &opts)) {
|
||||
fprintf(stderr, "failed to attach bpf_program__attach_kprobe_multi_opts to all\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
static void trigger_kprobe_multi_all_setup(void)
|
||||
{
|
||||
struct bpf_program *prog, *empty;
|
||||
|
||||
setup_ctx();
|
||||
empty = ctx.skel->progs.bench_kprobe_multi_empty;
|
||||
prog = ctx.skel->progs.bench_trigger_kprobe_multi;
|
||||
bpf_program__set_autoload(empty, true);
|
||||
bpf_program__set_autoload(prog, true);
|
||||
load_ctx();
|
||||
|
||||
attach_ksyms_all(empty, false);
|
||||
attach_bpf(prog);
|
||||
}
|
||||
|
||||
static void trigger_kretprobe_multi_all_setup(void)
|
||||
{
|
||||
struct bpf_program *prog, *empty;
|
||||
|
||||
setup_ctx();
|
||||
empty = ctx.skel->progs.bench_kretprobe_multi_empty;
|
||||
prog = ctx.skel->progs.bench_trigger_kretprobe_multi;
|
||||
bpf_program__set_autoload(empty, true);
|
||||
bpf_program__set_autoload(prog, true);
|
||||
load_ctx();
|
||||
|
||||
attach_ksyms_all(empty, true);
|
||||
attach_bpf(prog);
|
||||
}
|
||||
|
||||
static void trigger_fexit_setup(void)
|
||||
{
|
||||
setup_ctx();
|
||||
|
|
@ -512,6 +571,8 @@ BENCH_TRIG_KERNEL(kretprobe, "kretprobe");
|
|||
BENCH_TRIG_KERNEL(kprobe_multi, "kprobe-multi");
|
||||
BENCH_TRIG_KERNEL(kretprobe_multi, "kretprobe-multi");
|
||||
BENCH_TRIG_KERNEL(fentry, "fentry");
|
||||
BENCH_TRIG_KERNEL(kprobe_multi_all, "kprobe-multi-all");
|
||||
BENCH_TRIG_KERNEL(kretprobe_multi_all, "kretprobe-multi-all");
|
||||
BENCH_TRIG_KERNEL(fexit, "fexit");
|
||||
BENCH_TRIG_KERNEL(fmodret, "fmodret");
|
||||
BENCH_TRIG_KERNEL(tp, "tp");
|
||||
|
|
|
|||
|
|
@ -6,8 +6,8 @@ def_tests=( \
|
|||
usermode-count kernel-count syscall-count \
|
||||
fentry fexit fmodret \
|
||||
rawtp tp \
|
||||
kprobe kprobe-multi \
|
||||
kretprobe kretprobe-multi \
|
||||
kprobe kprobe-multi kprobe-multi-all \
|
||||
kretprobe kretprobe-multi kretprobe-multi-all \
|
||||
)
|
||||
|
||||
tests=("$@")
|
||||
|
|
|
|||
|
|
@ -599,4 +599,58 @@ extern void bpf_iter_dmabuf_destroy(struct bpf_iter_dmabuf *it) __weak __ksym;
|
|||
extern int bpf_cgroup_read_xattr(struct cgroup *cgroup, const char *name__str,
|
||||
struct bpf_dynptr *value_p) __weak __ksym;
|
||||
|
||||
#define PREEMPT_BITS 8
|
||||
#define SOFTIRQ_BITS 8
|
||||
#define HARDIRQ_BITS 4
|
||||
#define NMI_BITS 4
|
||||
|
||||
#define PREEMPT_SHIFT 0
|
||||
#define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS)
|
||||
#define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS)
|
||||
#define NMI_SHIFT (HARDIRQ_SHIFT + HARDIRQ_BITS)
|
||||
|
||||
#define __IRQ_MASK(x) ((1UL << (x))-1)
|
||||
|
||||
#define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT)
|
||||
#define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
|
||||
#define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT)
|
||||
|
||||
extern bool CONFIG_PREEMPT_RT __kconfig __weak;
|
||||
#ifdef bpf_target_x86
|
||||
extern const int __preempt_count __ksym;
|
||||
#endif
|
||||
|
||||
struct task_struct___preempt_rt {
|
||||
int softirq_disable_cnt;
|
||||
} __attribute__((preserve_access_index));
|
||||
|
||||
static inline int get_preempt_count(void)
|
||||
{
|
||||
#if defined(bpf_target_x86)
|
||||
return *(int *) bpf_this_cpu_ptr(&__preempt_count);
|
||||
#elif defined(bpf_target_arm64)
|
||||
return bpf_get_current_task_btf()->thread_info.preempt.count;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Description
|
||||
* Report whether it is in interrupt context. Only works on the following archs:
|
||||
* * x86
|
||||
* * arm64
|
||||
*/
|
||||
static inline int bpf_in_interrupt(void)
|
||||
{
|
||||
struct task_struct___preempt_rt *tsk;
|
||||
int pcnt;
|
||||
|
||||
pcnt = get_preempt_count();
|
||||
if (!CONFIG_PREEMPT_RT)
|
||||
return pcnt & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_MASK);
|
||||
|
||||
tsk = (void *) bpf_get_current_task_btf();
|
||||
return (pcnt & (NMI_MASK | HARDIRQ_MASK)) |
|
||||
(tsk->softirq_disable_cnt & SOFTIRQ_MASK);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -19,6 +19,9 @@ extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags,
|
|||
extern int bpf_dynptr_from_xdp(struct xdp_md *xdp, __u64 flags,
|
||||
struct bpf_dynptr *ptr__uninit) __ksym __weak;
|
||||
|
||||
extern int bpf_dynptr_from_skb_meta(struct __sk_buff *skb, __u64 flags,
|
||||
struct bpf_dynptr *ptr__uninit) __ksym __weak;
|
||||
|
||||
/* Description
|
||||
* Obtain a read-only pointer to the dynptr's data
|
||||
* Returns
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue