From 0f85b6c1cb5746180b2bf674a436a48e6722e7ed Mon Sep 17 00:00:00 2001 From: Rhys Lloyd Date: Fri, 18 Jul 2025 00:36:24 -0700 Subject: [PATCH 01/64] gpu: nova-core: vbios: use size_of instead of magic number 12 is identical to the value of `size_of::()`, so use the latter instead. [acourbot@nvidia.com: remove `dead_code` expect to fix `unfulfilled_lint_expectations` lint] Reviewed-by: Alexandre Courbot Signed-off-by: Rhys Lloyd Link: https://lore.kernel.org/r/20250718073633.194032-2-krakow20@gmail.com Signed-off-by: Alexandre Courbot --- drivers/gpu/nova-core/vbios.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/nova-core/vbios.rs b/drivers/gpu/nova-core/vbios.rs index 5b5d9f38cbb3..b4bca1919d8f 100644 --- a/drivers/gpu/nova-core/vbios.rs +++ b/drivers/gpu/nova-core/vbios.rs @@ -345,7 +345,7 @@ fn image_size_bytes(&self) -> usize { /// its header) is in the [`PciAtBiosImage`] and the falcon data it is pointing to is in the /// [`FwSecBiosImage`]. #[derive(Debug, Clone, Copy)] -#[expect(dead_code)] +#[repr(C)] struct BitHeader { /// 0h: BIT Header Identifier (BMP=0x7FFF/BIT=0xB8FF) id: u16, @@ -365,7 +365,7 @@ struct BitHeader { impl BitHeader { fn new(data: &[u8]) -> Result { - if data.len() < 12 { + if data.len() < core::mem::size_of::() { return Err(EINVAL); } From 2872c8982c80e22f9a7fc4db7d8f0e8188ab4cd7 Mon Sep 17 00:00:00 2001 From: Rhys Lloyd Date: Fri, 18 Jul 2025 00:36:25 -0700 Subject: [PATCH 02/64] gpu: nova-core: vbios: change PmuLookupTableEntry to use size_of Annotate the PmuLookupTableEntry with an `#[repr(C, packed)]` attribute. Removes another magic number by making the struct the same size as the data it needs to read, allowing the use of `size_of::()` [acourbot@nvidia.com: remove `dead_code` expect to fix `unfulfilled_lint_expectations` lint] Reviewed-by: Alexandre Courbot Signed-off-by: Rhys Lloyd Link: https://lore.kernel.org/r/20250718073633.194032-3-krakow20@gmail.com Signed-off-by: Alexandre Courbot --- drivers/gpu/nova-core/vbios.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/nova-core/vbios.rs b/drivers/gpu/nova-core/vbios.rs index b4bca1919d8f..50fbea69fc00 100644 --- a/drivers/gpu/nova-core/vbios.rs +++ b/drivers/gpu/nova-core/vbios.rs @@ -892,7 +892,7 @@ fn try_from(base: BiosImageBase) -> Result { /// The [`PmuLookupTableEntry`] structure is a single entry in the [`PmuLookupTable`]. /// /// See the [`PmuLookupTable`] description for more information. -#[expect(dead_code)] +#[repr(C, packed)] struct PmuLookupTableEntry { application_id: u8, target_id: u8, @@ -901,7 +901,7 @@ struct PmuLookupTableEntry { impl PmuLookupTableEntry { fn new(data: &[u8]) -> Result { - if data.len() < 6 { + if data.len() < core::mem::size_of::() { return Err(EINVAL); } From 3b51739fa2001b047359e11afd755ae5e189b7db Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Thu, 7 Aug 2025 09:07:57 +0200 Subject: [PATCH 03/64] MAINTAINERS: Add website of Nova GPU driver The Nova GPU driver has a sub-website on the Rust-for-Linux website which so far was missing from the respective section in MAINTAINERS. Add the Nova website. Signed-off-by: Philipp Stanner Link: https://lore.kernel.org/r/20250807070756.103816-2-phasta@kernel.org Signed-off-by: Alexandre Courbot --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index fe168477caa4..936650c7eff9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7829,6 +7829,7 @@ M: Danilo Krummrich M: Alexandre Courbot L: nouveau@lists.freedesktop.org S: Supported +W: https://rust-for-linux.com/nova-gpu-driver Q: https://patchwork.freedesktop.org/project/nouveau/ B: https://gitlab.freedesktop.org/drm/nova/-/issues C: irc://irc.oftc.net/nouveau @@ -7840,6 +7841,7 @@ DRM DRIVER FOR NVIDIA GPUS [RUST] M: Danilo Krummrich L: nouveau@lists.freedesktop.org S: Supported +W: https://rust-for-linux.com/nova-gpu-driver Q: https://patchwork.freedesktop.org/project/nouveau/ B: https://gitlab.freedesktop.org/drm/nova/-/issues C: irc://irc.oftc.net/nouveau From 46ab4278e9bbb16e3dc8cec52fcdcadc95936a89 Mon Sep 17 00:00:00 2001 From: Shankari Anand Date: Wed, 16 Jul 2025 14:39:41 +0530 Subject: [PATCH 04/64] drm: nova: update ARef import from sync::aref Update nova to import `ARef` from `sync::aref` instead of `types`. This aligns with the ongoing effort to move `ARef` and `AlwaysRefCounted` to sync. Suggested-by: Benno Lossin Link: https://github.com/Rust-for-Linux/linux/issues/1173 Signed-off-by: Shankari Anand Link: https://lore.kernel.org/r/20250716090941.811418-1-shankari.ak0208@gmail.com [ Alter subject and commit message to be nova specific. - Danilo ] Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/nova/driver.rs | 4 +++- drivers/gpu/drm/nova/gem.rs | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nova/driver.rs b/drivers/gpu/drm/nova/driver.rs index b28b2e05cc15..91b7380f83ab 100644 --- a/drivers/gpu/drm/nova/driver.rs +++ b/drivers/gpu/drm/nova/driver.rs @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 -use kernel::{auxiliary, c_str, device::Core, drm, drm::gem, drm::ioctl, prelude::*, types::ARef}; +use kernel::{ + auxiliary, c_str, device::Core, drm, drm::gem, drm::ioctl, prelude::*, sync::aref::ARef, +}; use crate::file::File; use crate::gem::NovaObject; diff --git a/drivers/gpu/drm/nova/gem.rs b/drivers/gpu/drm/nova/gem.rs index 33b62d21400c..cd82773dab92 100644 --- a/drivers/gpu/drm/nova/gem.rs +++ b/drivers/gpu/drm/nova/gem.rs @@ -4,7 +4,7 @@ drm, drm::{gem, gem::BaseObject}, prelude::*, - types::ARef, + sync::aref::ARef, }; use crate::{ From 5e04ff729f93fff16ec760e9cd7b72499963663f Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Fri, 18 Jul 2025 16:26:06 +0900 Subject: [PATCH 05/64] gpu: nova-core: register: minor grammar and spelling fixes There is only one top-level macro in this file at the moment, but the "macros.rs" file name allows for more. Change the wording so that it will remain valid even if additional macros are added to the file. Fix a couple of spelling errors and grammatical errors, and break up a run-on sentence, for clarity. Cc: Alexandre Courbot Cc: Danilo Krummrich Signed-off-by: John Hubbard Reviewed-by: Daniel Almeida Reviewed-by: Lyude Paul Link: https://lore.kernel.org/r/20250718-nova-regs-v2-1-7b6a762aa1cd@nvidia.com Signed-off-by: Alexandre Courbot --- drivers/gpu/nova-core/regs/macros.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/nova-core/regs/macros.rs b/drivers/gpu/nova-core/regs/macros.rs index a3e6de1779d4..6d4080a65ee4 100644 --- a/drivers/gpu/nova-core/regs/macros.rs +++ b/drivers/gpu/nova-core/regs/macros.rs @@ -1,17 +1,17 @@ // SPDX-License-Identifier: GPL-2.0 -//! Macro to define register layout and accessors. +//! `register!` macro to define register layout and accessors. //! //! A single register typically includes several fields, which are accessed through a combination //! of bit-shift and mask operations that introduce a class of potential mistakes, notably because //! not all possible field values are necessarily valid. //! -//! The macro in this module allow to define, using an intruitive and readable syntax, a dedicated -//! type for each register with its own field accessors that can return an error is a field's value -//! is invalid. +//! The `register!` macro in this module provides an intuitive and readable syntax for defining a +//! dedicated type for each register. Each such type comes with its own field accessors that can +//! return an error if a field's value is invalid. -/// Defines a dedicated type for a register with an absolute offset, alongside with getter and -/// setter methods for its fields and methods to read and write it from an `Io` region. +/// Defines a dedicated type for a register with an absolute offset, including getter and setter +/// methods for its fields and methods to read and write it from an `Io` region. /// /// Example: /// @@ -24,7 +24,7 @@ /// ``` /// /// This defines a `BOOT_0` type which can be read or written from offset `0x100` of an `Io` -/// region. It is composed of 3 fields, for instance `minor_revision` is made of the 4 less +/// region. It is composed of 3 fields, for instance `minor_revision` is made of the 4 least /// significant bits of the register. Each field can be accessed and modified using accessor /// methods: /// From be3536a4bdda53ff5a91b7e542b167d12bddb317 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Fri, 18 Jul 2025 16:26:07 +0900 Subject: [PATCH 06/64] gpu: nova-core: register: add missing space in register!() A space was missing between arguments in this invocation. [acourbot@nvidia.com: use more descriptive commit title] Reviewed-by: Daniel Almeida Reviewed-by: Boqun Feng Reviewed-by: Lyude Paul Link: https://lore.kernel.org/r/20250718-nova-regs-v2-2-7b6a762aa1cd@nvidia.com Signed-off-by: Alexandre Courbot --- drivers/gpu/nova-core/regs/macros.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/nova-core/regs/macros.rs b/drivers/gpu/nova-core/regs/macros.rs index 6d4080a65ee4..9cc612f41f12 100644 --- a/drivers/gpu/nova-core/regs/macros.rs +++ b/drivers/gpu/nova-core/regs/macros.rs @@ -116,7 +116,7 @@ macro_rules! register { ) => { register!(@common $name @ $offset $(, $comment)?); register!(@field_accessors $name { $($fields)* }); - register!(@io$name @ + $offset); + register!(@io $name @ + $offset); }; // Creates a alias register of relative offset register `alias` with its own fields. From c5aeb264b6b27c52fc6c9ef3b50eaaebff5d9b60 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Fri, 18 Jul 2025 16:26:08 +0900 Subject: [PATCH 07/64] gpu: nova-core: register: allow fields named `offset` `offset` is a common field name, yet using it triggers a build error due to the conflict between the uppercased field constant (which becomes `OFFSET` in this case) containing the bitrange of the field, and the `OFFSET` constant constaining the offset of the register. Fix this by adding `_RANGE` the field's range constant to avoid the name collision. [acourbot@nvidia.com: fix merge conflict due to switch from `as u32` to `u32::from`.] Reported-by: Timur Tabi Reviewed-by: Daniel Almeida Reviewed-by: Lyude Paul Link: https://lore.kernel.org/r/20250718-nova-regs-v2-3-7b6a762aa1cd@nvidia.com Signed-off-by: Alexandre Courbot --- drivers/gpu/nova-core/regs.rs | 5 +++-- drivers/gpu/nova-core/regs/macros.rs | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs index d49fddf6a3c6..c8f8adb24f6e 100644 --- a/drivers/gpu/nova-core/regs.rs +++ b/drivers/gpu/nova-core/regs.rs @@ -28,7 +28,7 @@ impl NV_PMC_BOOT_0 { /// Combines `architecture_0` and `architecture_1` to obtain the architecture of the chip. pub(crate) fn architecture(self) -> Result { Architecture::try_from( - self.architecture_0() | (self.architecture_1() << Self::ARCHITECTURE_0.len()), + self.architecture_0() | (self.architecture_1() << Self::ARCHITECTURE_0_RANGE.len()), ) } @@ -36,7 +36,8 @@ pub(crate) fn architecture(self) -> Result { pub(crate) fn chipset(self) -> Result { self.architecture() .map(|arch| { - ((arch as u32) << Self::IMPLEMENTATION.len()) | u32::from(self.implementation()) + ((arch as u32) << Self::IMPLEMENTATION_RANGE.len()) + | u32::from(self.implementation()) }) .and_then(Chipset::try_from) } diff --git a/drivers/gpu/nova-core/regs/macros.rs b/drivers/gpu/nova-core/regs/macros.rs index 9cc612f41f12..023748685412 100644 --- a/drivers/gpu/nova-core/regs/macros.rs +++ b/drivers/gpu/nova-core/regs/macros.rs @@ -278,7 +278,7 @@ impl $name { { $process:expr } $to_type:ty => $res_type:ty $(, $comment:literal)?; ) => { ::kernel::macros::paste!( - const [<$field:upper>]: ::core::ops::RangeInclusive = $lo..=$hi; + const [<$field:upper _RANGE>]: ::core::ops::RangeInclusive = $lo..=$hi; const [<$field:upper _MASK>]: u32 = ((((1 << $hi) - 1) << 1) + 1) - ((1 << $lo) - 1); const [<$field:upper _SHIFT>]: u32 = Self::[<$field:upper _MASK>].trailing_zeros(); ); From cb2607f3b27087120c15c13f57f1a950251445c9 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Fri, 18 Jul 2025 16:26:09 +0900 Subject: [PATCH 08/64] gpu: nova-core: register: improve documentation for basic registers Reword parts of the documentation that were a bit heavy to read, and harmonize/fix the examples. The relative registers section is about to be redesigned and its documentation rewritten, so do not touch this part. Reviewed-by: Daniel Almeida Reviewed-by: Lyude Paul Link: https://lore.kernel.org/r/20250718-nova-regs-v2-4-7b6a762aa1cd@nvidia.com Signed-off-by: Alexandre Courbot --- drivers/gpu/nova-core/regs/macros.rs | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/nova-core/regs/macros.rs b/drivers/gpu/nova-core/regs/macros.rs index 023748685412..31df1827de77 100644 --- a/drivers/gpu/nova-core/regs/macros.rs +++ b/drivers/gpu/nova-core/regs/macros.rs @@ -33,25 +33,25 @@ /// let boot0 = BOOT_0::read(&bar); /// pr_info!("chip revision: {}.{}", boot0.major_revision(), boot0.minor_revision()); /// -/// // `Chipset::try_from` will be called with the value of the field and returns an error if the -/// // value is invalid. +/// // `Chipset::try_from` is called with the value of the `chipset` field and returns an +/// // error if it is invalid. /// let chipset = boot0.chipset()?; /// /// // Update some fields and write the value back. /// boot0.set_major_revision(3).set_minor_revision(10).write(&bar); /// -/// // Or just read and update the register in a single step: +/// // Or, just read and update the register in a single step: /// BOOT_0::alter(&bar, |r| r.set_major_revision(3).set_minor_revision(10)); /// ``` /// -/// Fields can be defined as follows: +/// Fields are defined as follows: /// -/// - `as ` simply returns the field value casted as the requested integer type, typically -/// `u32`, `u16`, `u8` or `bool`. Note that `bool` fields must have a range of 1 bit. +/// - `as ` simply returns the field value casted to , typically `u32`, `u16`, `u8` or +/// `bool`. Note that `bool` fields must have a range of 1 bit. /// - `as => ` calls ``'s `From::<>` implementation and returns /// the result. /// - `as ?=> ` calls ``'s `TryFrom::<>` implementation -/// and returns the result. This is useful on fields for which not all values are value. +/// and returns the result. This is useful with fields for which not all values are valid. /// /// The documentation strings are optional. If present, they will be added to the type's /// definition, or the field getter and setter methods they are attached to. @@ -76,15 +76,17 @@ /// for cases where a register's interpretation depends on the context: /// /// ```no_run -/// register!(SCRATCH_0 @ 0x0000100, "Scratch register 0" { +/// register!(SCRATCH @ 0x00000200, "Scratch register" { /// 31:0 value as u32, "Raw value"; +/// }); /// -/// register!(SCRATCH_0_BOOT_STATUS => SCRATCH_0, "Boot status of the firmware" { +/// register!(SCRATCH_BOOT_STATUS => SCRATCH, "Boot status of the firmware" { /// 0:0 completed as bool, "Whether the firmware has completed booting"; +/// }); /// ``` /// -/// In this example, `SCRATCH_0_BOOT_STATUS` uses the same I/O address as `SCRATCH_0`, while also -/// providing its own `completed` method. +/// In this example, `SCRATCH_0_BOOT_STATUS` uses the same I/O address as `SCRATCH`, while also +/// providing its own `completed` field. macro_rules! register { // Creates a register at a fixed offset of the MMIO space. ( From 4f7f8f847d4a9c2460ecd43a702e51ac31919e2f Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Fri, 18 Jul 2025 16:26:10 +0900 Subject: [PATCH 09/64] gpu: nova-core: register: simplify @leaf_accessor rule The `$type` metavariable is not used in the @leaf_accessor rule, so remove it. Reviewed-by: Daniel Almeida Reviewed-by: Lyude Paul Link: https://lore.kernel.org/r/20250718-nova-regs-v2-5-7b6a762aa1cd@nvidia.com Signed-off-by: Alexandre Courbot --- drivers/gpu/nova-core/regs/macros.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/nova-core/regs/macros.rs b/drivers/gpu/nova-core/regs/macros.rs index 31df1827de77..44e2a12dce52 100644 --- a/drivers/gpu/nova-core/regs/macros.rs +++ b/drivers/gpu/nova-core/regs/macros.rs @@ -230,7 +230,7 @@ impl $name { $(, $comment:literal)?; ) => { register!( - @leaf_accessor $name $hi:$lo $field as bool + @leaf_accessor $name $hi:$lo $field { |f| <$into_type>::from(if f != 0 { true } else { false }) } $into_type => $into_type $(, $comment)?; ); @@ -248,7 +248,7 @@ impl $name { @field_accessor $name:ident $hi:tt:$lo:tt $field:ident as $type:tt ?=> $try_into_type:ty $(, $comment:literal)?; ) => { - register!(@leaf_accessor $name $hi:$lo $field as $type + register!(@leaf_accessor $name $hi:$lo $field { |f| <$try_into_type>::try_from(f as $type) } $try_into_type => ::core::result::Result< $try_into_type, @@ -262,7 +262,7 @@ impl $name { @field_accessor $name:ident $hi:tt:$lo:tt $field:ident as $type:tt => $into_type:ty $(, $comment:literal)?; ) => { - register!(@leaf_accessor $name $hi:$lo $field as $type + register!(@leaf_accessor $name $hi:$lo $field { |f| <$into_type>::from(f as $type) } $into_type => $into_type $(, $comment)?;); }; @@ -276,7 +276,7 @@ impl $name { // Generates the accessor methods for a single field. ( - @leaf_accessor $name:ident $hi:tt:$lo:tt $field:ident as $type:ty + @leaf_accessor $name:ident $hi:tt:$lo:tt $field:ident { $process:expr } $to_type:ty => $res_type:ty $(, $comment:literal)?; ) => { ::kernel::macros::paste!( From b567daf4ea4e4565118436aca51d12adf8ab3e12 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Fri, 18 Jul 2025 16:26:11 +0900 Subject: [PATCH 10/64] gpu: nova-core: register: remove `try_` accessors for relative registers Relative registers are always accessed using a literal base, meaning their validity can always be checked at compile-time. Thus remove the `try_` accessors that have no purpose. Reviewed-by: Daniel Almeida Reviewed-by: Lyude Paul Link: https://lore.kernel.org/r/20250718-nova-regs-v2-6-7b6a762aa1cd@nvidia.com Signed-off-by: Alexandre Courbot --- drivers/gpu/nova-core/regs/macros.rs | 38 +--------------------------- 1 file changed, 1 insertion(+), 37 deletions(-) diff --git a/drivers/gpu/nova-core/regs/macros.rs b/drivers/gpu/nova-core/regs/macros.rs index 44e2a12dce52..0567b3aae4d2 100644 --- a/drivers/gpu/nova-core/regs/macros.rs +++ b/drivers/gpu/nova-core/regs/macros.rs @@ -57,9 +57,7 @@ /// definition, or the field getter and setter methods they are attached to. /// /// Putting a `+` before the address of the register makes it relative to a base: the `read` and -/// `write` methods take a `base` argument that is added to the specified address before access, -/// and `try_read` and `try_write` methods are also created, allowing access with offsets unknown -/// at compile-time: +/// `write` methods take a `base` argument that is added to the specified address before access: /// /// ```no_run /// register!(CPU_CTL @ +0x0000010, "CPU core control" { @@ -386,40 +384,6 @@ pub(crate) fn alter( let reg = f(Self::read(io, base)); reg.write(io, base); } - - #[inline] - pub(crate) fn try_read( - io: &T, - base: usize, - ) -> ::kernel::error::Result where - T: ::core::ops::Deref>, - { - io.try_read32(base + $offset).map(Self) - } - - #[inline] - pub(crate) fn try_write( - self, - io: &T, - base: usize, - ) -> ::kernel::error::Result<()> where - T: ::core::ops::Deref>, - { - io.try_write32(self.0, base + $offset) - } - - #[inline] - pub(crate) fn try_alter( - io: &T, - base: usize, - f: F, - ) -> ::kernel::error::Result<()> where - T: ::core::ops::Deref>, - F: ::core::ops::FnOnce(Self) -> Self, - { - let reg = f(Self::try_read(io, base)?); - reg.try_write(io, base) - } } }; } From 036c5fa25020d8b1619a96dabbcf9b156f4329bb Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Fri, 18 Jul 2025 16:26:12 +0900 Subject: [PATCH 11/64] gpu: nova-core: register: move OFFSET declaration to I/O impl block The OFFSET const is an I/O property, and having to pass it to the @common rule makes it impossible to make I/O optional, as we want to get to eventually. Thus, move OFFSET to the I/O impl block so it is not needed by the @common rule anymore. Reviewed-by: Daniel Almeida Reviewed-by: Lyude Paul Link: https://lore.kernel.org/r/20250718-nova-regs-v2-7-7b6a762aa1cd@nvidia.com Signed-off-by: Alexandre Courbot --- drivers/gpu/nova-core/regs/macros.rs | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/nova-core/regs/macros.rs b/drivers/gpu/nova-core/regs/macros.rs index 0567b3aae4d2..3d4476cc2421 100644 --- a/drivers/gpu/nova-core/regs/macros.rs +++ b/drivers/gpu/nova-core/regs/macros.rs @@ -92,7 +92,7 @@ macro_rules! register { $($fields:tt)* } ) => { - register!(@common $name @ $offset $(, $comment)?); + register!(@common $name $(, $comment)?); register!(@field_accessors $name { $($fields)* }); register!(@io $name @ $offset); }; @@ -103,7 +103,7 @@ macro_rules! register { $($fields:tt)* } ) => { - register!(@common $name @ $alias::OFFSET $(, $comment)?); + register!(@common $name $(, $comment)?); register!(@field_accessors $name { $($fields)* }); register!(@io $name @ $alias::OFFSET); }; @@ -114,7 +114,7 @@ macro_rules! register { $($fields:tt)* } ) => { - register!(@common $name @ $offset $(, $comment)?); + register!(@common $name $(, $comment)?); register!(@field_accessors $name { $($fields)* }); register!(@io $name @ + $offset); }; @@ -125,7 +125,7 @@ macro_rules! register { $($fields:tt)* } ) => { - register!(@common $name @ $alias::OFFSET $(, $comment)?); + register!(@common $name $(, $comment)?); register!(@field_accessors $name { $($fields)* }); register!(@io $name @ + $alias::OFFSET); }; @@ -134,7 +134,7 @@ macro_rules! register { // Defines the wrapper `$name` type, as well as its relevant implementations (`Debug`, `BitOr`, // and conversion to regular `u32`). - (@common $name:ident @ $offset:expr $(, $comment:literal)?) => { + (@common $name:ident $(, $comment:literal)?) => { $( #[doc=$comment] )? @@ -142,11 +142,6 @@ macro_rules! register { #[derive(Clone, Copy, Default)] pub(crate) struct $name(u32); - #[allow(dead_code)] - impl $name { - pub(crate) const OFFSET: usize = $offset; - } - // TODO[REGA]: display the raw hex value, then the value of all the fields. This requires // matching the fields, which will complexify the syntax considerably... impl ::core::fmt::Debug for $name { @@ -319,6 +314,8 @@ pub(crate) fn [](mut self, value: $to_type) -> Self { (@io $name:ident @ $offset:expr) => { #[allow(dead_code)] impl $name { + pub(crate) const OFFSET: usize = $offset; + #[inline] pub(crate) fn read(io: &T) -> Self where T: ::core::ops::Deref>, @@ -351,6 +348,8 @@ pub(crate) fn alter( (@io $name:ident @ + $offset:literal) => { #[allow(dead_code)] impl $name { + pub(crate) const OFFSET: usize = $offset; + #[inline] pub(crate) fn read( io: &T, From e40d2b26167200b99fa4fb1df15d4c870489c82e Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Fri, 18 Jul 2025 16:26:13 +0900 Subject: [PATCH 12/64] gpu: nova-core: register: fix documentation and indentation Fix a few documentation inconsistencies, and harmonize indentation where possible. Reviewed-by: Daniel Almeida Reviewed-by: Lyude Paul Link: https://lore.kernel.org/r/20250718-nova-regs-v2-8-7b6a762aa1cd@nvidia.com Signed-off-by: Alexandre Courbot --- drivers/gpu/nova-core/regs/macros.rs | 34 ++++++++-------------------- 1 file changed, 9 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/nova-core/regs/macros.rs b/drivers/gpu/nova-core/regs/macros.rs index 3d4476cc2421..5b3a320de1d3 100644 --- a/drivers/gpu/nova-core/regs/macros.rs +++ b/drivers/gpu/nova-core/regs/macros.rs @@ -87,44 +87,28 @@ /// providing its own `completed` field. macro_rules! register { // Creates a register at a fixed offset of the MMIO space. - ( - $name:ident @ $offset:literal $(, $comment:literal)? { - $($fields:tt)* - } - ) => { + ($name:ident @ $offset:literal $(, $comment:literal)? { $($fields:tt)* } ) => { register!(@common $name $(, $comment)?); register!(@field_accessors $name { $($fields)* }); register!(@io $name @ $offset); }; - // Creates a alias register of fixed offset register `alias` with its own fields. - ( - $name:ident => $alias:ident $(, $comment:literal)? { - $($fields:tt)* - } - ) => { + // Creates an alias register of fixed offset register `alias` with its own fields. + ($name:ident => $alias:ident $(, $comment:literal)? { $($fields:tt)* } ) => { register!(@common $name $(, $comment)?); register!(@field_accessors $name { $($fields)* }); register!(@io $name @ $alias::OFFSET); }; // Creates a register at a relative offset from a base address. - ( - $name:ident @ + $offset:literal $(, $comment:literal)? { - $($fields:tt)* - } - ) => { + ($name:ident @ + $offset:literal $(, $comment:literal)? { $($fields:tt)* } ) => { register!(@common $name $(, $comment)?); register!(@field_accessors $name { $($fields)* }); register!(@io $name @ + $offset); }; - // Creates a alias register of relative offset register `alias` with its own fields. - ( - $name:ident => + $alias:ident $(, $comment:literal)? { - $($fields:tt)* - } - ) => { + // Creates an alias register of relative offset register `alias` with its own fields. + ($name:ident => + $alias:ident $(, $comment:literal)? { $($fields:tt)* } ) => { register!(@common $name $(, $comment)?); register!(@field_accessors $name { $($fields)* }); register!(@io $name @ + $alias::OFFSET); @@ -259,7 +243,7 @@ impl $name { { |f| <$into_type>::from(f as $type) } $into_type => $into_type $(, $comment)?;); }; - // Shortcut for fields defined as non-`bool` without the `=>` or `?=>` syntax. + // Shortcut for non-boolean fields defined without the `=>` or `?=>` syntax. ( @field_accessor $name:ident $hi:tt:$lo:tt $field:ident as $type:tt $(, $comment:literal)?; @@ -310,7 +294,7 @@ pub(crate) fn [](mut self, value: $to_type) -> Self { ); }; - // Creates the IO accessors for a fixed offset register. + // Generates the IO accessors for a fixed offset register. (@io $name:ident @ $offset:expr) => { #[allow(dead_code)] impl $name { @@ -344,7 +328,7 @@ pub(crate) fn alter( } }; - // Create the IO accessors for a relative offset register. + // Generates the IO accessors for a relative offset register. (@io $name:ident @ + $offset:literal) => { #[allow(dead_code)] impl $name { From 9b2379f02423801d372bc588d38b62a1fa6cd05f Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Fri, 18 Jul 2025 16:26:14 +0900 Subject: [PATCH 13/64] gpu: nova-core: register: add missing doccomments for fixed registers I/O accessors Add the missing doccomments for these accessors, as having a bit of inline documentation is always helpful. Reviewed-by: Daniel Almeida Reviewed-by: Lyude Paul Link: https://lore.kernel.org/r/20250718-nova-regs-v2-9-7b6a762aa1cd@nvidia.com Signed-off-by: Alexandre Courbot --- drivers/gpu/nova-core/regs/macros.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/nova-core/regs/macros.rs b/drivers/gpu/nova-core/regs/macros.rs index 5b3a320de1d3..d5ce7cb67f21 100644 --- a/drivers/gpu/nova-core/regs/macros.rs +++ b/drivers/gpu/nova-core/regs/macros.rs @@ -300,6 +300,7 @@ pub(crate) fn [](mut self, value: $to_type) -> Self { impl $name { pub(crate) const OFFSET: usize = $offset; + /// Read the register from its address in `io`. #[inline] pub(crate) fn read(io: &T) -> Self where T: ::core::ops::Deref>, @@ -307,6 +308,7 @@ pub(crate) fn read(io: &T) -> Self where Self(io.read32($offset)) } + /// Write the value contained in `self` to the register address in `io`. #[inline] pub(crate) fn write(self, io: &T) where T: ::core::ops::Deref>, @@ -314,6 +316,8 @@ pub(crate) fn write(self, io: &T) where io.write32(self.0, $offset) } + /// Read the register from its address in `io` and run `f` on its value to obtain a new + /// value to write back. #[inline] pub(crate) fn alter( io: &T, From 7a9cb3dfb07d3e29c0eeebb26b11a34a19a889a4 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Fri, 18 Jul 2025 16:26:15 +0900 Subject: [PATCH 14/64] gpu: nova-core: register: add fields dispatcher internal rule Fields are complex and cumbersome to match in a rule, and were only captured in order to generate the field accessors. However, there are other places (like the `Debug` and `Default` implementations) where we would benefit from having access to at least some of the field information, but refrained from doing so because it would have meant matching the whole fields in a rule more complex than we need. Introduce a new `@fields_dispatcher` internal rule that captures all the field information and passes it to `@field_accessors`. It does not provide any functional change in itself, but allows us to reuse the captured field information partially to provide better `Debug` and `Default` implementations in following patches. Reviewed-by: Daniel Almeida Reviewed-by: Lyude Paul Link: https://lore.kernel.org/r/20250718-nova-regs-v2-10-7b6a762aa1cd@nvidia.com Signed-off-by: Alexandre Courbot --- drivers/gpu/nova-core/regs/macros.rs | 42 +++++++++++++++++++++------- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/nova-core/regs/macros.rs b/drivers/gpu/nova-core/regs/macros.rs index d5ce7cb67f21..ff2deca6b1f3 100644 --- a/drivers/gpu/nova-core/regs/macros.rs +++ b/drivers/gpu/nova-core/regs/macros.rs @@ -88,37 +88,33 @@ macro_rules! register { // Creates a register at a fixed offset of the MMIO space. ($name:ident @ $offset:literal $(, $comment:literal)? { $($fields:tt)* } ) => { - register!(@common $name $(, $comment)?); - register!(@field_accessors $name { $($fields)* }); + register!(@core $name $(, $comment)? { $($fields)* } ); register!(@io $name @ $offset); }; // Creates an alias register of fixed offset register `alias` with its own fields. ($name:ident => $alias:ident $(, $comment:literal)? { $($fields:tt)* } ) => { - register!(@common $name $(, $comment)?); - register!(@field_accessors $name { $($fields)* }); + register!(@core $name $(, $comment)? { $($fields)* } ); register!(@io $name @ $alias::OFFSET); }; // Creates a register at a relative offset from a base address. ($name:ident @ + $offset:literal $(, $comment:literal)? { $($fields:tt)* } ) => { - register!(@common $name $(, $comment)?); - register!(@field_accessors $name { $($fields)* }); + register!(@core $name $(, $comment)? { $($fields)* } ); register!(@io $name @ + $offset); }; // Creates an alias register of relative offset register `alias` with its own fields. ($name:ident => + $alias:ident $(, $comment:literal)? { $($fields:tt)* } ) => { - register!(@common $name $(, $comment)?); - register!(@field_accessors $name { $($fields)* }); + register!(@core $name $(, $comment)? { $($fields)* } ); register!(@io $name @ + $alias::OFFSET); }; // All rules below are helpers. // Defines the wrapper `$name` type, as well as its relevant implementations (`Debug`, `BitOr`, - // and conversion to regular `u32`). - (@common $name:ident $(, $comment:literal)?) => { + // and conversion to the value type) and field accessor methods. + (@core $name:ident $(, $comment:literal)? { $($fields:tt)* }) => { $( #[doc=$comment] )? @@ -149,6 +145,32 @@ fn from(reg: $name) -> u32 { reg.0 } } + + register!(@fields_dispatcher $name { $($fields)* }); + }; + + // Captures the fields and passes them to all the implementers that require field information. + // + // Used to simplify the matching rules for implementers, so they don't need to match the entire + // complex fields rule even though they only make use of part of it. + (@fields_dispatcher $name:ident { + $($hi:tt:$lo:tt $field:ident as $type:tt + $(?=> $try_into_type:ty)? + $(=> $into_type:ty)? + $(, $comment:literal)? + ; + )* + } + ) => { + register!(@field_accessors $name { + $( + $hi:$lo $field as $type + $(?=> $try_into_type)? + $(=> $into_type)? + $(, $comment)? + ; + )* + }); }; // Defines all the field getter/methods methods for `$name`. From 6ecd6b73e0845e2a1ba0d4da273c7cc7c21c88db Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Fri, 18 Jul 2025 16:26:16 +0900 Subject: [PATCH 15/64] gpu: nova-core: register: improve `Debug` implementation Now that we have an internal rule to dispatch field information where needed, use it to generate a better `Debug` implementation where the raw hexadecimal value of the register is displayed, as well as the `Debug` values of its individual fields. Reviewed-by: Daniel Almeida Reviewed-by: Lyude Paul Link: https://lore.kernel.org/r/20250718-nova-regs-v2-11-7b6a762aa1cd@nvidia.com Signed-off-by: Alexandre Courbot --- drivers/gpu/nova-core/regs/macros.rs | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/nova-core/regs/macros.rs b/drivers/gpu/nova-core/regs/macros.rs index ff2deca6b1f3..c966e58b775d 100644 --- a/drivers/gpu/nova-core/regs/macros.rs +++ b/drivers/gpu/nova-core/regs/macros.rs @@ -122,16 +122,6 @@ macro_rules! register { #[derive(Clone, Copy, Default)] pub(crate) struct $name(u32); - // TODO[REGA]: display the raw hex value, then the value of all the fields. This requires - // matching the fields, which will complexify the syntax considerably... - impl ::core::fmt::Debug for $name { - fn fmt(&self, f: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result { - f.debug_tuple(stringify!($name)) - .field(&format_args!("0x{0:x}", &self.0)) - .finish() - } - } - impl ::core::ops::BitOr for $name { type Output = Self; @@ -171,6 +161,7 @@ fn from(reg: $name) -> u32 { ; )* }); + register!(@debug $name { $($field;)* }); }; // Defines all the field getter/methods methods for `$name`. @@ -316,6 +307,20 @@ pub(crate) fn [](mut self, value: $to_type) -> Self { ); }; + // Generates the `Debug` implementation for `$name`. + (@debug $name:ident { $($field:ident;)* }) => { + impl ::core::fmt::Debug for $name { + fn fmt(&self, f: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result { + f.debug_struct(stringify!($name)) + .field("", &format_args!("{:#x}", &self.0)) + $( + .field(stringify!($field), &self.$field()) + )* + .finish() + } + } + }; + // Generates the IO accessors for a fixed offset register. (@io $name:ident @ $offset:expr) => { #[allow(dead_code)] From 3fa145bef533f899ed6de641fe99c70028e28481 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Fri, 18 Jul 2025 16:26:17 +0900 Subject: [PATCH 16/64] gpu: nova-core: register: generate correct `Default` implementation The `Default` implementation of a register should be the aggregate of the default values of all its fields, and not simply be zeroed. Reviewed-by: Daniel Almeida Reviewed-by: Lyude Paul Link: https://lore.kernel.org/r/20250718-nova-regs-v2-12-7b6a762aa1cd@nvidia.com Signed-off-by: Alexandre Courbot --- drivers/gpu/nova-core/regs/macros.rs | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/nova-core/regs/macros.rs b/drivers/gpu/nova-core/regs/macros.rs index c966e58b775d..e43e1ab7ae2e 100644 --- a/drivers/gpu/nova-core/regs/macros.rs +++ b/drivers/gpu/nova-core/regs/macros.rs @@ -112,14 +112,14 @@ macro_rules! register { // All rules below are helpers. - // Defines the wrapper `$name` type, as well as its relevant implementations (`Debug`, `BitOr`, - // and conversion to the value type) and field accessor methods. + // Defines the wrapper `$name` type, as well as its relevant implementations (`Debug`, + // `Default`, `BitOr`, and conversion to the value type) and field accessor methods. (@core $name:ident $(, $comment:literal)? { $($fields:tt)* }) => { $( #[doc=$comment] )? #[repr(transparent)] - #[derive(Clone, Copy, Default)] + #[derive(Clone, Copy)] pub(crate) struct $name(u32); impl ::core::ops::BitOr for $name { @@ -162,6 +162,7 @@ fn from(reg: $name) -> u32 { )* }); register!(@debug $name { $($field;)* }); + register!(@default $name { $($field;)* }); }; // Defines all the field getter/methods methods for `$name`. @@ -321,6 +322,25 @@ fn fmt(&self, f: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result { } }; + // Generates the `Default` implementation for `$name`. + (@default $name:ident { $($field:ident;)* }) => { + /// Returns a value for the register where all fields are set to their default value. + impl ::core::default::Default for $name { + fn default() -> Self { + #[allow(unused_mut)] + let mut value = Self(Default::default()); + + ::kernel::macros::paste!( + $( + value.[](Default::default()); + )* + ); + + value + } + } + }; + // Generates the IO accessors for a fixed offset register. (@io $name:ident @ $offset:expr) => { #[allow(dead_code)] From fcdce54d645a0779892faaed1209105350e15d92 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Fri, 18 Jul 2025 16:26:18 +0900 Subject: [PATCH 17/64] gpu: nova-core: register: split @io rule into fixed and relative versions We used the same @io rule with different patterns to define both the fixed and relative I/O accessors. This can be confusing as the matching rules are very similar. Since all call sites know which version they want to call, split @io into @io_fixed and @io_relative to remove any ambiguity. Reviewed-by: Daniel Almeida Reviewed-by: Lyude Paul Link: https://lore.kernel.org/r/20250718-nova-regs-v2-13-7b6a762aa1cd@nvidia.com Signed-off-by: Alexandre Courbot --- drivers/gpu/nova-core/regs/macros.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/nova-core/regs/macros.rs b/drivers/gpu/nova-core/regs/macros.rs index e43e1ab7ae2e..432af02b3378 100644 --- a/drivers/gpu/nova-core/regs/macros.rs +++ b/drivers/gpu/nova-core/regs/macros.rs @@ -89,25 +89,25 @@ macro_rules! register { // Creates a register at a fixed offset of the MMIO space. ($name:ident @ $offset:literal $(, $comment:literal)? { $($fields:tt)* } ) => { register!(@core $name $(, $comment)? { $($fields)* } ); - register!(@io $name @ $offset); + register!(@io_fixed $name @ $offset); }; // Creates an alias register of fixed offset register `alias` with its own fields. ($name:ident => $alias:ident $(, $comment:literal)? { $($fields:tt)* } ) => { register!(@core $name $(, $comment)? { $($fields)* } ); - register!(@io $name @ $alias::OFFSET); + register!(@io_fixed $name @ $alias::OFFSET); }; // Creates a register at a relative offset from a base address. ($name:ident @ + $offset:literal $(, $comment:literal)? { $($fields:tt)* } ) => { register!(@core $name $(, $comment)? { $($fields)* } ); - register!(@io $name @ + $offset); + register!(@io_relative $name @ + $offset); }; // Creates an alias register of relative offset register `alias` with its own fields. ($name:ident => + $alias:ident $(, $comment:literal)? { $($fields:tt)* } ) => { register!(@core $name $(, $comment)? { $($fields)* } ); - register!(@io $name @ + $alias::OFFSET); + register!(@io_relative $name @ + $alias::OFFSET); }; // All rules below are helpers. @@ -342,7 +342,7 @@ fn default() -> Self { }; // Generates the IO accessors for a fixed offset register. - (@io $name:ident @ $offset:expr) => { + (@io_fixed $name:ident @ $offset:expr) => { #[allow(dead_code)] impl $name { pub(crate) const OFFSET: usize = $offset; @@ -380,7 +380,7 @@ pub(crate) fn alter( }; // Generates the IO accessors for a relative offset register. - (@io $name:ident @ + $offset:literal) => { + (@io_relative $name:ident @ + $offset:literal) => { #[allow(dead_code)] impl $name { pub(crate) const OFFSET: usize = $offset; From c6bc4225279d9a6eff8aafc94347183b2babc52a Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Fri, 18 Jul 2025 16:26:19 +0900 Subject: [PATCH 18/64] gpu: nova-core: register: use #[inline(always)] for all methods These methods should always be inlined, so use the strongest compiler hint that exists to maximize the chance they will indeed be. Reviewed-by: Daniel Almeida Reviewed-by: Lyude Paul Link: https://lore.kernel.org/r/20250718-nova-regs-v2-14-7b6a762aa1cd@nvidia.com Signed-off-by: Alexandre Courbot --- drivers/gpu/nova-core/regs/macros.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/nova-core/regs/macros.rs b/drivers/gpu/nova-core/regs/macros.rs index 432af02b3378..200fbe847571 100644 --- a/drivers/gpu/nova-core/regs/macros.rs +++ b/drivers/gpu/nova-core/regs/macros.rs @@ -280,7 +280,7 @@ impl $name { #[doc="Returns the value of this field:"] #[doc=$comment] )? - #[inline] + #[inline(always)] pub(crate) fn $field(self) -> $res_type { ::kernel::macros::paste!( const MASK: u32 = $name::[<$field:upper _MASK>]; @@ -296,7 +296,7 @@ pub(crate) fn $field(self) -> $res_type { #[doc="Sets the value of this field:"] #[doc=$comment] )? - #[inline] + #[inline(always)] pub(crate) fn [](mut self, value: $to_type) -> Self { const MASK: u32 = $name::[<$field:upper _MASK>]; const SHIFT: u32 = $name::[<$field:upper _SHIFT>]; @@ -348,7 +348,7 @@ impl $name { pub(crate) const OFFSET: usize = $offset; /// Read the register from its address in `io`. - #[inline] + #[inline(always)] pub(crate) fn read(io: &T) -> Self where T: ::core::ops::Deref>, { @@ -356,7 +356,7 @@ pub(crate) fn read(io: &T) -> Self where } /// Write the value contained in `self` to the register address in `io`. - #[inline] + #[inline(always)] pub(crate) fn write(self, io: &T) where T: ::core::ops::Deref>, { @@ -365,7 +365,7 @@ pub(crate) fn write(self, io: &T) where /// Read the register from its address in `io` and run `f` on its value to obtain a new /// value to write back. - #[inline] + #[inline(always)] pub(crate) fn alter( io: &T, f: F, @@ -385,7 +385,7 @@ pub(crate) fn alter( impl $name { pub(crate) const OFFSET: usize = $offset; - #[inline] + #[inline(always)] pub(crate) fn read( io: &T, base: usize, @@ -395,7 +395,7 @@ pub(crate) fn read( Self(io.read32(base + $offset)) } - #[inline] + #[inline(always)] pub(crate) fn write( self, io: &T, @@ -406,7 +406,7 @@ pub(crate) fn write( io.write32(self.0, base + $offset) } - #[inline] + #[inline(always)] pub(crate) fn alter( io: &T, base: usize, From af10924fc471d1c693b8689249f53ea10b0519b7 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Fri, 18 Jul 2025 16:26:20 +0900 Subject: [PATCH 19/64] gpu: nova-core: register: redesign relative registers The relative registers are currently very unsafe to use: callers can specify any constant as the base address for access, meaning they can effectively interpret any I/O address as any relative register. Ideally, valid base addresses for a family of registers should be explicitly defined in the code, and could only be used with the relevant registers This patch changes the relative register declaration from e.g.: register!(CPU_CTL @ +0x0000010, "CPU core control" { 0:0 start as bool, "Start the CPU core"; }); into: register!(CPU_CTL @ CpuCtlBase[0x10], "CPU core control" { 0:0 start as bool, "Start the CPU core"; }); Where `CpuCtlBase` is the name of a ZST used as a parameter of the `RegisterBase<>` trait to define a trait unique to a class of register. This specialized trait is then implemented for every type that provides a valid base address, enabling said types to be passed as the base address provider for the register's I/O accessor methods. This design thus makes it impossible to pass an unexpected base address to a relative register, and, since the valid bases are all known at compile-time, also guarantees that all I/O accesses are done within the valid bounds of the I/O range. [acourbot@nvidia.com: add example in the commit log.] Reviewed-by: Lyude Paul Link: https://lore.kernel.org/r/20250718-nova-regs-v2-15-7b6a762aa1cd@nvidia.com Signed-off-by: Alexandre Courbot --- Documentation/gpu/nova/core/todo.rst | 1 - drivers/gpu/nova-core/falcon.rs | 67 +++++----- drivers/gpu/nova-core/falcon/gsp.rs | 12 +- drivers/gpu/nova-core/falcon/hal/ga102.rs | 14 +- drivers/gpu/nova-core/falcon/sec2.rs | 9 +- drivers/gpu/nova-core/regs.rs | 50 +++---- drivers/gpu/nova-core/regs/macros.rs | 156 ++++++++++++++++++---- 7 files changed, 212 insertions(+), 97 deletions(-) diff --git a/Documentation/gpu/nova/core/todo.rst b/Documentation/gpu/nova/core/todo.rst index 894a1e9c3741..a1d12c1b289d 100644 --- a/Documentation/gpu/nova/core/todo.rst +++ b/Documentation/gpu/nova/core/todo.rst @@ -131,7 +131,6 @@ crate so it can be used by other components as well. Features desired before this happens: -* Relative register with build-time base address validation, * Arrays of registers with build-time index validation, * Make I/O optional I/O (for field values that are not registers), * Support other sizes than `u32`, diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs index 50437c67c14a..67265a0b5d7b 100644 --- a/drivers/gpu/nova-core/falcon.rs +++ b/drivers/gpu/nova-core/falcon.rs @@ -14,6 +14,7 @@ use crate::driver::Bar0; use crate::gpu::Chipset; use crate::regs; +use crate::regs::macros::RegisterBase; use crate::util; pub(crate) mod gsp; @@ -274,10 +275,16 @@ fn from(value: bool) -> Self { } } -/// Trait defining the parameters of a given Falcon instance. -pub(crate) trait FalconEngine: Sync { - /// Base I/O address for the falcon, relative from which its registers are accessed. - const BASE: usize; +/// Type used to represent the `PFALCON` registers address base for a given falcon engine. +pub(crate) struct PFalconBase(()); + +/// Trait defining the parameters of a given Falcon engine. +/// +/// Each engine provides one base for `PFALCON` and `PFALCON2` registers. The `ID` constant is used +/// to identify a given Falcon instance with register I/O methods. +pub(crate) trait FalconEngine: Sync + RegisterBase + Sized { + /// Singleton of the engine, used to identify it with register I/O methods. + const ID: Self; } /// Represents a portion of the firmware to be loaded into a particular memory (e.g. IMEM or DMEM). @@ -343,13 +350,13 @@ pub(crate) fn new( bar: &Bar0, need_riscv: bool, ) -> Result { - let hwcfg1 = regs::NV_PFALCON_FALCON_HWCFG1::read(bar, E::BASE); + let hwcfg1 = regs::NV_PFALCON_FALCON_HWCFG1::read(bar, &E::ID); // Check that the revision and security model contain valid values. let _ = hwcfg1.core_rev()?; let _ = hwcfg1.security_model()?; if need_riscv { - let hwcfg2 = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, E::BASE); + let hwcfg2 = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, &E::ID); if !hwcfg2.riscv() { dev_err!( dev, @@ -369,7 +376,7 @@ pub(crate) fn new( fn reset_wait_mem_scrubbing(&self, bar: &Bar0) -> Result { // TIMEOUT: memory scrubbing should complete in less than 20ms. util::wait_on(Delta::from_millis(20), || { - if regs::NV_PFALCON_FALCON_HWCFG2::read(bar, E::BASE).mem_scrubbing_done() { + if regs::NV_PFALCON_FALCON_HWCFG2::read(bar, &E::ID).mem_scrubbing_done() { Some(()) } else { None @@ -379,12 +386,12 @@ fn reset_wait_mem_scrubbing(&self, bar: &Bar0) -> Result { /// Reset the falcon engine. fn reset_eng(&self, bar: &Bar0) -> Result { - let _ = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, E::BASE); + let _ = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, &E::ID); // According to OpenRM's `kflcnPreResetWait_GA102` documentation, HW sometimes does not set // RESET_READY so a non-failing timeout is used. let _ = util::wait_on(Delta::from_micros(150), || { - let r = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, E::BASE); + let r = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, &E::ID); if r.reset_ready() { Some(()) } else { @@ -392,13 +399,13 @@ fn reset_eng(&self, bar: &Bar0) -> Result { } }); - regs::NV_PFALCON_FALCON_ENGINE::alter(bar, E::BASE, |v| v.set_reset(true)); + regs::NV_PFALCON_FALCON_ENGINE::alter(bar, &E::ID, |v| v.set_reset(true)); // TODO[DLAY]: replace with udelay() or equivalent once available. // TIMEOUT: falcon engine should not take more than 10us to reset. let _: Result = util::wait_on(Delta::from_micros(10), || None); - regs::NV_PFALCON_FALCON_ENGINE::alter(bar, E::BASE, |v| v.set_reset(false)); + regs::NV_PFALCON_FALCON_ENGINE::alter(bar, &E::ID, |v| v.set_reset(false)); self.reset_wait_mem_scrubbing(bar)?; @@ -413,7 +420,7 @@ pub(crate) fn reset(&self, bar: &Bar0) -> Result { regs::NV_PFALCON_FALCON_RM::default() .set_value(regs::NV_PMC_BOOT_0::read(bar).into()) - .write(bar, E::BASE); + .write(bar, &E::ID); Ok(()) } @@ -464,10 +471,10 @@ fn dma_wr>( regs::NV_PFALCON_FALCON_DMATRFBASE::default() .set_base((dma_start >> 8) as u32) - .write(bar, E::BASE); + .write(bar, &E::ID); regs::NV_PFALCON_FALCON_DMATRFBASE1::default() .set_base((dma_start >> 40) as u16) - .write(bar, E::BASE); + .write(bar, &E::ID); let cmd = regs::NV_PFALCON_FALCON_DMATRFCMD::default() .set_size(DmaTrfCmdSize::Size256B) @@ -478,17 +485,17 @@ fn dma_wr>( // Perform a transfer of size `DMA_LEN`. regs::NV_PFALCON_FALCON_DMATRFMOFFS::default() .set_offs(load_offsets.dst_start + pos) - .write(bar, E::BASE); + .write(bar, &E::ID); regs::NV_PFALCON_FALCON_DMATRFFBOFFS::default() .set_offs(src_start + pos) - .write(bar, E::BASE); - cmd.write(bar, E::BASE); + .write(bar, &E::ID); + cmd.write(bar, &E::ID); // Wait for the transfer to complete. // TIMEOUT: arbitrarily large value, no DMA transfer to the falcon's small memories // should ever take that long. util::wait_on(Delta::from_secs(2), || { - let r = regs::NV_PFALCON_FALCON_DMATRFCMD::read(bar, E::BASE); + let r = regs::NV_PFALCON_FALCON_DMATRFCMD::read(bar, &E::ID); if r.idle() { Some(()) } else { @@ -502,9 +509,9 @@ fn dma_wr>( /// Perform a DMA load into `IMEM` and `DMEM` of `fw`, and prepare the falcon to run it. pub(crate) fn dma_load>(&self, bar: &Bar0, fw: &F) -> Result { - regs::NV_PFALCON_FBIF_CTL::alter(bar, E::BASE, |v| v.set_allow_phys_no_ctx(true)); - regs::NV_PFALCON_FALCON_DMACTL::default().write(bar, E::BASE); - regs::NV_PFALCON_FBIF_TRANSCFG::alter(bar, E::BASE, |v| { + regs::NV_PFALCON_FBIF_CTL::alter(bar, &E::ID, |v| v.set_allow_phys_no_ctx(true)); + regs::NV_PFALCON_FALCON_DMACTL::default().write(bar, &E::ID); + regs::NV_PFALCON_FBIF_TRANSCFG::alter(bar, &E::ID, |v| { v.set_target(FalconFbifTarget::CoherentSysmem) .set_mem_type(FalconFbifMemType::Physical) }); @@ -517,7 +524,7 @@ pub(crate) fn dma_load>(&self, bar: &Bar0, fw: &F) // Set `BootVec` to start of non-secure code. regs::NV_PFALCON_FALCON_BOOTVEC::default() .set_value(fw.boot_addr()) - .write(bar, E::BASE); + .write(bar, &E::ID); Ok(()) } @@ -538,27 +545,27 @@ pub(crate) fn boot( if let Some(mbox0) = mbox0 { regs::NV_PFALCON_FALCON_MAILBOX0::default() .set_value(mbox0) - .write(bar, E::BASE); + .write(bar, &E::ID); } if let Some(mbox1) = mbox1 { regs::NV_PFALCON_FALCON_MAILBOX1::default() .set_value(mbox1) - .write(bar, E::BASE); + .write(bar, &E::ID); } - match regs::NV_PFALCON_FALCON_CPUCTL::read(bar, E::BASE).alias_en() { + match regs::NV_PFALCON_FALCON_CPUCTL::read(bar, &E::ID).alias_en() { true => regs::NV_PFALCON_FALCON_CPUCTL_ALIAS::default() .set_startcpu(true) - .write(bar, E::BASE), + .write(bar, &E::ID), false => regs::NV_PFALCON_FALCON_CPUCTL::default() .set_startcpu(true) - .write(bar, E::BASE), + .write(bar, &E::ID), } // TIMEOUT: arbitrarily large value, firmwares should complete in less than 2 seconds. util::wait_on(Delta::from_secs(2), || { - let r = regs::NV_PFALCON_FALCON_CPUCTL::read(bar, E::BASE); + let r = regs::NV_PFALCON_FALCON_CPUCTL::read(bar, &E::ID); if r.halted() { Some(()) } else { @@ -567,8 +574,8 @@ pub(crate) fn boot( })?; let (mbox0, mbox1) = ( - regs::NV_PFALCON_FALCON_MAILBOX0::read(bar, E::BASE).value(), - regs::NV_PFALCON_FALCON_MAILBOX1::read(bar, E::BASE).value(), + regs::NV_PFALCON_FALCON_MAILBOX0::read(bar, &E::ID).value(), + regs::NV_PFALCON_FALCON_MAILBOX1::read(bar, &E::ID).value(), ); Ok((mbox0, mbox1)) diff --git a/drivers/gpu/nova-core/falcon/gsp.rs b/drivers/gpu/nova-core/falcon/gsp.rs index d622e9a64470..0db9f94036a6 100644 --- a/drivers/gpu/nova-core/falcon/gsp.rs +++ b/drivers/gpu/nova-core/falcon/gsp.rs @@ -2,23 +2,27 @@ use crate::{ driver::Bar0, - falcon::{Falcon, FalconEngine}, - regs, + falcon::{Falcon, FalconEngine, PFalconBase}, + regs::{self, macros::RegisterBase}, }; /// Type specifying the `Gsp` falcon engine. Cannot be instantiated. pub(crate) struct Gsp(()); -impl FalconEngine for Gsp { +impl RegisterBase for Gsp { const BASE: usize = 0x00110000; } +impl FalconEngine for Gsp { + const ID: Self = Gsp(()); +} + impl Falcon { /// Clears the SWGEN0 bit in the Falcon's IRQ status clear register to /// allow GSP to signal CPU for processing new messages in message queue. pub(crate) fn clear_swgen0_intr(&self, bar: &Bar0) { regs::NV_PFALCON_FALCON_IRQSCLR::default() .set_swgen0(true) - .write(bar, Gsp::BASE); + .write(bar, &Gsp::ID); } } diff --git a/drivers/gpu/nova-core/falcon/hal/ga102.rs b/drivers/gpu/nova-core/falcon/hal/ga102.rs index 52c33d3f22a8..3fdacd19322d 100644 --- a/drivers/gpu/nova-core/falcon/hal/ga102.rs +++ b/drivers/gpu/nova-core/falcon/hal/ga102.rs @@ -16,15 +16,15 @@ use super::FalconHal; fn select_core_ga102(bar: &Bar0) -> Result { - let bcr_ctrl = regs::NV_PRISCV_RISCV_BCR_CTRL::read(bar, E::BASE); + let bcr_ctrl = regs::NV_PRISCV_RISCV_BCR_CTRL::read(bar, &E::ID); if bcr_ctrl.core_select() != PeregrineCoreSelect::Falcon { regs::NV_PRISCV_RISCV_BCR_CTRL::default() .set_core_select(PeregrineCoreSelect::Falcon) - .write(bar, E::BASE); + .write(bar, &E::ID); // TIMEOUT: falcon core should take less than 10ms to report being enabled. util::wait_on(Delta::from_millis(10), || { - let r = regs::NV_PRISCV_RISCV_BCR_CTRL::read(bar, E::BASE); + let r = regs::NV_PRISCV_RISCV_BCR_CTRL::read(bar, &E::ID); if r.valid() { Some(()) } else { @@ -76,16 +76,16 @@ fn signature_reg_fuse_version_ga102( fn program_brom_ga102(bar: &Bar0, params: &FalconBromParams) -> Result { regs::NV_PFALCON2_FALCON_BROM_PARAADDR::default() .set_value(params.pkc_data_offset) - .write(bar, E::BASE); + .write(bar, &E::ID); regs::NV_PFALCON2_FALCON_BROM_ENGIDMASK::default() .set_value(u32::from(params.engine_id_mask)) - .write(bar, E::BASE); + .write(bar, &E::ID); regs::NV_PFALCON2_FALCON_BROM_CURR_UCODE_ID::default() .set_ucode_id(params.ucode_id) - .write(bar, E::BASE); + .write(bar, &E::ID); regs::NV_PFALCON2_FALCON_MOD_SEL::default() .set_algo(FalconModSelAlgo::Rsa3k) - .write(bar, E::BASE); + .write(bar, &E::ID); Ok(()) } diff --git a/drivers/gpu/nova-core/falcon/sec2.rs b/drivers/gpu/nova-core/falcon/sec2.rs index 5147d9e2a7fe..dbc486a712ff 100644 --- a/drivers/gpu/nova-core/falcon/sec2.rs +++ b/drivers/gpu/nova-core/falcon/sec2.rs @@ -1,10 +1,15 @@ // SPDX-License-Identifier: GPL-2.0 -use crate::falcon::FalconEngine; +use crate::falcon::{FalconEngine, PFalconBase}; +use crate::regs::macros::RegisterBase; /// Type specifying the `Sec2` falcon engine. Cannot be instantiated. pub(crate) struct Sec2(()); -impl FalconEngine for Sec2 { +impl RegisterBase for Sec2 { const BASE: usize = 0x00840000; } + +impl FalconEngine for Sec2 { + const ID: Self = Sec2(()); +} diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs index c8f8adb24f6e..7730c5a475d5 100644 --- a/drivers/gpu/nova-core/regs.rs +++ b/drivers/gpu/nova-core/regs.rs @@ -5,11 +5,11 @@ #![allow(non_camel_case_types)] #[macro_use] -mod macros; +pub(crate) mod macros; use crate::falcon::{ DmaTrfCmdSize, FalconCoreRev, FalconCoreRevSubversion, FalconFbifMemType, FalconFbifTarget, - FalconModSelAlgo, FalconSecurityModel, PeregrineCoreSelect, + FalconModSelAlgo, FalconSecurityModel, PFalconBase, PeregrineCoreSelect, }; use crate::gpu::{Architecture, Chipset}; use kernel::prelude::*; @@ -195,24 +195,24 @@ pub(crate) fn vga_workspace_addr(self) -> Option { // PFALCON -register!(NV_PFALCON_FALCON_IRQSCLR @ +0x00000004 { +register!(NV_PFALCON_FALCON_IRQSCLR @ PFalconBase[0x00000004] { 4:4 halt as bool; 6:6 swgen0 as bool; }); -register!(NV_PFALCON_FALCON_MAILBOX0 @ +0x00000040 { +register!(NV_PFALCON_FALCON_MAILBOX0 @ PFalconBase[0x00000040] { 31:0 value as u32; }); -register!(NV_PFALCON_FALCON_MAILBOX1 @ +0x00000044 { +register!(NV_PFALCON_FALCON_MAILBOX1 @ PFalconBase[0x00000044] { 31:0 value as u32; }); -register!(NV_PFALCON_FALCON_RM @ +0x00000084 { +register!(NV_PFALCON_FALCON_RM @ PFalconBase[0x00000084] { 31:0 value as u32; }); -register!(NV_PFALCON_FALCON_HWCFG2 @ +0x000000f4 { +register!(NV_PFALCON_FALCON_HWCFG2 @ PFalconBase[0x000000f4] { 10:10 riscv as bool; 12:12 mem_scrubbing as bool, "Set to 0 after memory scrubbing is completed"; 31:31 reset_ready as bool, "Signal indicating that reset is completed (GA102+)"; @@ -225,17 +225,17 @@ pub(crate) fn mem_scrubbing_done(self) -> bool { } } -register!(NV_PFALCON_FALCON_CPUCTL @ +0x00000100 { +register!(NV_PFALCON_FALCON_CPUCTL @ PFalconBase[0x00000100] { 1:1 startcpu as bool; 4:4 halted as bool; 6:6 alias_en as bool; }); -register!(NV_PFALCON_FALCON_BOOTVEC @ +0x00000104 { +register!(NV_PFALCON_FALCON_BOOTVEC @ PFalconBase[0x00000104] { 31:0 value as u32; }); -register!(NV_PFALCON_FALCON_DMACTL @ +0x0000010c { +register!(NV_PFALCON_FALCON_DMACTL @ PFalconBase[0x0000010c] { 0:0 require_ctx as bool; 1:1 dmem_scrubbing as bool; 2:2 imem_scrubbing as bool; @@ -243,15 +243,15 @@ pub(crate) fn mem_scrubbing_done(self) -> bool { 7:7 secure_stat as bool; }); -register!(NV_PFALCON_FALCON_DMATRFBASE @ +0x00000110 { +register!(NV_PFALCON_FALCON_DMATRFBASE @ PFalconBase[0x00000110] { 31:0 base as u32; }); -register!(NV_PFALCON_FALCON_DMATRFMOFFS @ +0x00000114 { +register!(NV_PFALCON_FALCON_DMATRFMOFFS @ PFalconBase[0x00000114] { 23:0 offs as u32; }); -register!(NV_PFALCON_FALCON_DMATRFCMD @ +0x00000118 { +register!(NV_PFALCON_FALCON_DMATRFCMD @ PFalconBase[0x00000118] { 0:0 full as bool; 1:1 idle as bool; 3:2 sec as u8; @@ -262,60 +262,60 @@ pub(crate) fn mem_scrubbing_done(self) -> bool { 16:16 set_dmtag as u8; }); -register!(NV_PFALCON_FALCON_DMATRFFBOFFS @ +0x0000011c { +register!(NV_PFALCON_FALCON_DMATRFFBOFFS @ PFalconBase[0x0000011c] { 31:0 offs as u32; }); -register!(NV_PFALCON_FALCON_DMATRFBASE1 @ +0x00000128 { +register!(NV_PFALCON_FALCON_DMATRFBASE1 @ PFalconBase[0x00000128] { 8:0 base as u16; }); -register!(NV_PFALCON_FALCON_HWCFG1 @ +0x0000012c { +register!(NV_PFALCON_FALCON_HWCFG1 @ PFalconBase[0x0000012c] { 3:0 core_rev as u8 ?=> FalconCoreRev, "Core revision"; 5:4 security_model as u8 ?=> FalconSecurityModel, "Security model"; 7:6 core_rev_subversion as u8 ?=> FalconCoreRevSubversion, "Core revision subversion"; }); -register!(NV_PFALCON_FALCON_CPUCTL_ALIAS @ +0x00000130 { +register!(NV_PFALCON_FALCON_CPUCTL_ALIAS @ PFalconBase[0x00000130] { 1:1 startcpu as bool; }); // Actually known as `NV_PSEC_FALCON_ENGINE` and `NV_PGSP_FALCON_ENGINE` depending on the falcon // instance. -register!(NV_PFALCON_FALCON_ENGINE @ +0x000003c0 { +register!(NV_PFALCON_FALCON_ENGINE @ PFalconBase[0x000003c0] { 0:0 reset as bool; }); // TODO[REGA]: this is an array of registers. -register!(NV_PFALCON_FBIF_TRANSCFG @ +0x00000600 { +register!(NV_PFALCON_FBIF_TRANSCFG @ PFalconBase[0x00000600] { 1:0 target as u8 ?=> FalconFbifTarget; 2:2 mem_type as bool => FalconFbifMemType; }); -register!(NV_PFALCON_FBIF_CTL @ +0x00000624 { +register!(NV_PFALCON_FBIF_CTL @ PFalconBase[0x00000624] { 7:7 allow_phys_no_ctx as bool; }); -register!(NV_PFALCON2_FALCON_MOD_SEL @ +0x00001180 { +register!(NV_PFALCON2_FALCON_MOD_SEL @ PFalconBase[0x00001180] { 7:0 algo as u8 ?=> FalconModSelAlgo; }); -register!(NV_PFALCON2_FALCON_BROM_CURR_UCODE_ID @ +0x00001198 { +register!(NV_PFALCON2_FALCON_BROM_CURR_UCODE_ID @ PFalconBase[0x00001198] { 7:0 ucode_id as u8; }); -register!(NV_PFALCON2_FALCON_BROM_ENGIDMASK @ +0x0000119c { +register!(NV_PFALCON2_FALCON_BROM_ENGIDMASK @ PFalconBase[0x0000119c] { 31:0 value as u32; }); // TODO[REGA]: this is an array of registers. -register!(NV_PFALCON2_FALCON_BROM_PARAADDR @ +0x00001210 { +register!(NV_PFALCON2_FALCON_BROM_PARAADDR @ PFalconBase[0x00001210] { 31:0 value as u32; }); // PRISCV -register!(NV_PRISCV_RISCV_BCR_CTRL @ +0x00001668 { +register!(NV_PRISCV_RISCV_BCR_CTRL @ PFalconBase[0x00001668] { 0:0 valid as bool; 4:4 core_select as bool => PeregrineCoreSelect; 8:8 br_fetch as bool; diff --git a/drivers/gpu/nova-core/regs/macros.rs b/drivers/gpu/nova-core/regs/macros.rs index 200fbe847571..005ee7afe279 100644 --- a/drivers/gpu/nova-core/regs/macros.rs +++ b/drivers/gpu/nova-core/regs/macros.rs @@ -10,6 +10,16 @@ //! dedicated type for each register. Each such type comes with its own field accessors that can //! return an error if a field's value is invalid. +/// Trait providing a base address to be added to the offset of a relative register to obtain +/// its actual offset. +/// +/// The `T` generic argument is used to distinguish which base to use, in case a type provides +/// several bases. It is given to the `register!` macro to restrict the use of the register to +/// implementors of this particular variant. +pub(crate) trait RegisterBase { + const BASE: usize; +} + /// Defines a dedicated type for a register with an absolute offset, including getter and setter /// methods for its fields and methods to read and write it from an `Io` region. /// @@ -56,20 +66,6 @@ /// The documentation strings are optional. If present, they will be added to the type's /// definition, or the field getter and setter methods they are attached to. /// -/// Putting a `+` before the address of the register makes it relative to a base: the `read` and -/// `write` methods take a `base` argument that is added to the specified address before access: -/// -/// ```no_run -/// register!(CPU_CTL @ +0x0000010, "CPU core control" { -/// 0:0 start as bool, "Start the CPU core"; -/// }); -/// -/// // Flip the `start` switch for the CPU core which base address is at `CPU_BASE`. -/// let cpuctl = CPU_CTL::read(&bar, CPU_BASE); -/// pr_info!("CPU CTL: {:#x}", cpuctl); -/// cpuctl.set_start(true).write(&bar, CPU_BASE); -/// ``` -/// /// It is also possible to create a alias register by using the `=> ALIAS` syntax. This is useful /// for cases where a register's interpretation depends on the context: /// @@ -85,6 +81,87 @@ /// /// In this example, `SCRATCH_0_BOOT_STATUS` uses the same I/O address as `SCRATCH`, while also /// providing its own `completed` field. +/// +/// ## Relative registers +/// +/// A register can be defined as being accessible from a fixed offset of a provided base. For +/// instance, imagine the following I/O space: +/// +/// ```text +/// +-----------------------------+ +/// | ... | +/// | | +/// 0x100--->+------------CPU0-------------+ +/// | | +/// 0x110--->+-----------------------------+ +/// | CPU_CTL | +/// +-----------------------------+ +/// | ... | +/// | | +/// | | +/// 0x200--->+------------CPU1-------------+ +/// | | +/// 0x210--->+-----------------------------+ +/// | CPU_CTL | +/// +-----------------------------+ +/// | ... | +/// +-----------------------------+ +/// ``` +/// +/// `CPU0` and `CPU1` both have a `CPU_CTL` register that starts at offset `0x10` of their I/O +/// space segment. Since both instances of `CPU_CTL` share the same layout, we don't want to define +/// them twice and would prefer a way to select which one to use from a single definition +/// +/// This can be done using the `Base[Offset]` syntax when specifying the register's address. +/// +/// `Base` is an arbitrary type (typically a ZST) to be used as a generic parameter of the +/// [`RegisterBase`] trait to provide the base as a constant, i.e. each type providing a base for +/// this register needs to implement `RegisterBase`. Here is the above example translated +/// into code: +/// +/// ```no_run +/// // Type used to identify the base. +/// pub(crate) struct CpuCtlBase; +/// +/// // ZST describing `CPU0`. +/// struct Cpu0; +/// impl RegisterBase for Cpu0 { +/// const BASE: usize = 0x100; +/// } +/// // Singleton of `CPU0` used to identify it. +/// const CPU0: Cpu0 = Cpu0; +/// +/// // ZST describing `CPU1`. +/// struct Cpu1; +/// impl RegisterBase for Cpu1 { +/// const BASE: usize = 0x200; +/// } +/// // Singleton of `CPU1` used to identify it. +/// const CPU1: Cpu1 = Cpu1; +/// +/// // This makes `CPU_CTL` accessible from all implementors of `RegisterBase`. +/// register!(CPU_CTL @ CpuCtlBase[0x10], "CPU core control" { +/// 0:0 start as bool, "Start the CPU core"; +/// }); +/// +/// // The `read`, `write` and `alter` methods of relative registers take an extra `base` argument +/// // that is used to resolve its final address by adding its `BASE` to the offset of the +/// // register. +/// +/// // Start `CPU0`. +/// CPU_CTL::alter(bar, &CPU0, |r| r.set_start(true)); +/// +/// // Start `CPU1`. +/// CPU_CTL::alter(bar, &CPU1, |r| r.set_start(true)); +/// +/// // Aliases can also be defined for relative register. +/// register!(CPU_CTL_ALIAS => CpuCtlBase[CPU_CTL], "Alias to CPU core control" { +/// 1:1 alias_start as bool, "Start the aliased CPU core"; +/// }); +/// +/// // Start the aliased `CPU0`. +/// CPU_CTL_ALIAS::alter(bar, &CPU0, |r| r.set_alias_start(true)); +/// ``` macro_rules! register { // Creates a register at a fixed offset of the MMIO space. ($name:ident @ $offset:literal $(, $comment:literal)? { $($fields:tt)* } ) => { @@ -98,16 +175,16 @@ macro_rules! register { register!(@io_fixed $name @ $alias::OFFSET); }; - // Creates a register at a relative offset from a base address. - ($name:ident @ + $offset:literal $(, $comment:literal)? { $($fields:tt)* } ) => { + // Creates a register at a relative offset from a base address provider. + ($name:ident @ $base:ty [ $offset:literal ] $(, $comment:literal)? { $($fields:tt)* } ) => { register!(@core $name $(, $comment)? { $($fields)* } ); - register!(@io_relative $name @ + $offset); + register!(@io_relative $name @ $base [ $offset ]); }; // Creates an alias register of relative offset register `alias` with its own fields. - ($name:ident => + $alias:ident $(, $comment:literal)? { $($fields:tt)* } ) => { + ($name:ident => $base:ty [ $alias:ident ] $(, $comment:literal)? { $($fields:tt)* }) => { register!(@core $name $(, $comment)? { $($fields)* } ); - register!(@io_relative $name @ + $alias::OFFSET); + register!(@io_relative $name @ $base [ $alias::OFFSET ]); }; // All rules below are helpers. @@ -380,39 +457,62 @@ pub(crate) fn alter( }; // Generates the IO accessors for a relative offset register. - (@io_relative $name:ident @ + $offset:literal) => { + (@io_relative $name:ident @ $base:ty [ $offset:expr ]) => { #[allow(dead_code)] impl $name { pub(crate) const OFFSET: usize = $offset; + /// Read the register from `io`, using the base address provided by `base` and adding + /// the register's offset to it. #[inline(always)] - pub(crate) fn read( + pub(crate) fn read( io: &T, - base: usize, + #[allow(unused_variables)] + base: &B, ) -> Self where T: ::core::ops::Deref>, + B: crate::regs::macros::RegisterBase<$base>, { - Self(io.read32(base + $offset)) + const OFFSET: usize = $name::OFFSET; + + let value = io.read32( + >::BASE + OFFSET + ); + + Self(value) } + /// Write the value contained in `self` to `io`, using the base address provided by + /// `base` and adding the register's offset to it. #[inline(always)] - pub(crate) fn write( + pub(crate) fn write( self, io: &T, - base: usize, + #[allow(unused_variables)] + base: &B, ) where T: ::core::ops::Deref>, + B: crate::regs::macros::RegisterBase<$base>, { - io.write32(self.0, base + $offset) + const OFFSET: usize = $name::OFFSET; + + io.write32( + self.0, + >::BASE + OFFSET + ); } + /// Read the register from `io`, using the base address provided by `base` and adding + /// the register's offset to it, then run `f` on its value to obtain a new value to + /// write back. #[inline(always)] - pub(crate) fn alter( + pub(crate) fn alter( io: &T, - base: usize, + base: &B, f: F, ) where T: ::core::ops::Deref>, + B: crate::regs::macros::RegisterBase<$base>, F: ::core::ops::FnOnce(Self) -> Self, { let reg = f(Self::read(io, base)); From e617f3a3702ca1ca2a498edbad0b12ceadc27321 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Fri, 18 Jul 2025 16:26:21 +0900 Subject: [PATCH 20/64] gpu: nova-core: falcon: add distinct base address for PFALCON2 Falcon engines have two distinct register bases: `PFALCON` and `PFALCON2`. So far we assumed that `PFALCON2` was located at `PFALCON + 0x1000` because that is the case of most engines, but there are exceptions (NVDEC uses `0x1c00`). Fix this shortcoming by leveraging the redesigned relative registers definitions to assign a distinct `PFalcon2Base` base address to each falcon engine. Reviewed-by: Lyude Paul Link: https://lore.kernel.org/r/20250718-nova-regs-v2-16-7b6a762aa1cd@nvidia.com Signed-off-by: Alexandre Courbot --- drivers/gpu/nova-core/falcon.rs | 7 ++++++- drivers/gpu/nova-core/falcon/gsp.rs | 6 +++++- drivers/gpu/nova-core/falcon/sec2.rs | 6 +++++- drivers/gpu/nova-core/regs.rs | 12 +++++++----- 4 files changed, 23 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs index 67265a0b5d7b..2ecdcc6b9b9d 100644 --- a/drivers/gpu/nova-core/falcon.rs +++ b/drivers/gpu/nova-core/falcon.rs @@ -278,11 +278,16 @@ fn from(value: bool) -> Self { /// Type used to represent the `PFALCON` registers address base for a given falcon engine. pub(crate) struct PFalconBase(()); +/// Type used to represent the `PFALCON2` registers address base for a given falcon engine. +pub(crate) struct PFalcon2Base(()); + /// Trait defining the parameters of a given Falcon engine. /// /// Each engine provides one base for `PFALCON` and `PFALCON2` registers. The `ID` constant is used /// to identify a given Falcon instance with register I/O methods. -pub(crate) trait FalconEngine: Sync + RegisterBase + Sized { +pub(crate) trait FalconEngine: + Sync + RegisterBase + RegisterBase + Sized +{ /// Singleton of the engine, used to identify it with register I/O methods. const ID: Self; } diff --git a/drivers/gpu/nova-core/falcon/gsp.rs b/drivers/gpu/nova-core/falcon/gsp.rs index 0db9f94036a6..f17599cb49fa 100644 --- a/drivers/gpu/nova-core/falcon/gsp.rs +++ b/drivers/gpu/nova-core/falcon/gsp.rs @@ -2,7 +2,7 @@ use crate::{ driver::Bar0, - falcon::{Falcon, FalconEngine, PFalconBase}, + falcon::{Falcon, FalconEngine, PFalcon2Base, PFalconBase}, regs::{self, macros::RegisterBase}, }; @@ -13,6 +13,10 @@ impl RegisterBase for Gsp { const BASE: usize = 0x00110000; } +impl RegisterBase for Gsp { + const BASE: usize = 0x00111000; +} + impl FalconEngine for Gsp { const ID: Self = Gsp(()); } diff --git a/drivers/gpu/nova-core/falcon/sec2.rs b/drivers/gpu/nova-core/falcon/sec2.rs index dbc486a712ff..815786c8480d 100644 --- a/drivers/gpu/nova-core/falcon/sec2.rs +++ b/drivers/gpu/nova-core/falcon/sec2.rs @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 -use crate::falcon::{FalconEngine, PFalconBase}; +use crate::falcon::{FalconEngine, PFalcon2Base, PFalconBase}; use crate::regs::macros::RegisterBase; /// Type specifying the `Sec2` falcon engine. Cannot be instantiated. @@ -10,6 +10,10 @@ impl RegisterBase for Sec2 { const BASE: usize = 0x00840000; } +impl RegisterBase for Sec2 { + const BASE: usize = 0x00841000; +} + impl FalconEngine for Sec2 { const ID: Self = Sec2(()); } diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs index 7730c5a475d5..e55525916bad 100644 --- a/drivers/gpu/nova-core/regs.rs +++ b/drivers/gpu/nova-core/regs.rs @@ -9,7 +9,7 @@ use crate::falcon::{ DmaTrfCmdSize, FalconCoreRev, FalconCoreRevSubversion, FalconFbifMemType, FalconFbifTarget, - FalconModSelAlgo, FalconSecurityModel, PFalconBase, PeregrineCoreSelect, + FalconModSelAlgo, FalconSecurityModel, PFalcon2Base, PFalconBase, PeregrineCoreSelect, }; use crate::gpu::{Architecture, Chipset}; use kernel::prelude::*; @@ -296,20 +296,22 @@ pub(crate) fn mem_scrubbing_done(self) -> bool { 7:7 allow_phys_no_ctx as bool; }); -register!(NV_PFALCON2_FALCON_MOD_SEL @ PFalconBase[0x00001180] { +/* PFALCON2 */ + +register!(NV_PFALCON2_FALCON_MOD_SEL @ PFalcon2Base[0x00000180] { 7:0 algo as u8 ?=> FalconModSelAlgo; }); -register!(NV_PFALCON2_FALCON_BROM_CURR_UCODE_ID @ PFalconBase[0x00001198] { +register!(NV_PFALCON2_FALCON_BROM_CURR_UCODE_ID @ PFalcon2Base[0x00000198] { 7:0 ucode_id as u8; }); -register!(NV_PFALCON2_FALCON_BROM_ENGIDMASK @ PFalconBase[0x0000119c] { +register!(NV_PFALCON2_FALCON_BROM_ENGIDMASK @ PFalcon2Base[0x0000019c] { 31:0 value as u32; }); // TODO[REGA]: this is an array of registers. -register!(NV_PFALCON2_FALCON_BROM_PARAADDR @ PFalconBase[0x00001210] { +register!(NV_PFALCON2_FALCON_BROM_PARAADDR @ PFalcon2Base[0x00000210] { 31:0 value as u32; }); From 20ed4a8695b277c296e3a30306ef8551903e1e04 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Fri, 18 Jul 2025 16:26:22 +0900 Subject: [PATCH 21/64] gpu: nova-core: register: add support for register arrays Having registers that can be interpreted identically in a contiguous I/O area (or at least, following a given stride) is a common way to organize registers, and is used by NVIDIA hardware. Thus, add a way to simply and safely declare such a layout using the register!() macro. Build-time bound-checking is effective for array accesses performed with a constant. For cases where the index cannot be known at compile time, `try_` variants of the accessors are also made available that return `EINVAL` if the access is out-of-bounds. Reviewed-by: Daniel Almeida Reviewed-by: Lyude Paul Link: https://lore.kernel.org/r/20250718-nova-regs-v2-17-7b6a762aa1cd@nvidia.com Signed-off-by: Alexandre Courbot --- drivers/gpu/nova-core/gpu.rs | 2 +- drivers/gpu/nova-core/regs.rs | 15 ++- drivers/gpu/nova-core/regs/macros.rs | 195 +++++++++++++++++++++++++++ 3 files changed, 204 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs index b5c9786619a9..8caecaf7dfb4 100644 --- a/drivers/gpu/nova-core/gpu.rs +++ b/drivers/gpu/nova-core/gpu.rs @@ -221,7 +221,7 @@ fn run_fwsec_frts( fwsec_frts.run(dev, falcon, bar)?; // SCRATCH_E contains the error code for FWSEC-FRTS. - let frts_status = regs::NV_PBUS_SW_SCRATCH_0E::read(bar).frts_err_code(); + let frts_status = regs::NV_PBUS_SW_SCRATCH_0E_FRTS_ERR::read(bar).frts_err_code(); if frts_status != 0 { dev_err!( dev, diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs index e55525916bad..c45de208055d 100644 --- a/drivers/gpu/nova-core/regs.rs +++ b/drivers/gpu/nova-core/regs.rs @@ -45,8 +45,10 @@ pub(crate) fn chipset(self) -> Result { // PBUS -// TODO[REGA]: this is an array of registers. -register!(NV_PBUS_SW_SCRATCH_0E@0x00001438 { +register!(NV_PBUS_SW_SCRATCH @ 0x00001400[64] {}); + +register!(NV_PBUS_SW_SCRATCH_0E_FRTS_ERR => NV_PBUS_SW_SCRATCH[0xe], + "scratch register 0xe used as FRTS firmware error code" { 31:16 frts_err_code as u16; }); @@ -124,13 +126,12 @@ pub(crate) fn higher_bound(self) -> u64 { 0:0 read_protection_level0 as bool, "Set after FWSEC lowers its protection level"; }); -// TODO[REGA]: This is an array of registers. -register!(NV_PGC6_AON_SECURE_SCRATCH_GROUP_05 @ 0x00118234 { - 31:0 value as u32; -}); +// OpenRM defines this as a register array, but doesn't specify its size and only uses its first +// element. Be conservative until we know the actual size or need to use more registers. +register!(NV_PGC6_AON_SECURE_SCRATCH_GROUP_05 @ 0x00118234[1] {}); register!( - NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT => NV_PGC6_AON_SECURE_SCRATCH_GROUP_05, + NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT => NV_PGC6_AON_SECURE_SCRATCH_GROUP_05[0], "Scratch group 05 register 0 used as GFW boot progress indicator" { 7:0 progress as u8, "Progress of GFW boot (0xff means completed)"; } diff --git a/drivers/gpu/nova-core/regs/macros.rs b/drivers/gpu/nova-core/regs/macros.rs index 005ee7afe279..f38177065c25 100644 --- a/drivers/gpu/nova-core/regs/macros.rs +++ b/drivers/gpu/nova-core/regs/macros.rs @@ -162,6 +162,57 @@ pub(crate) trait RegisterBase { /// // Start the aliased `CPU0`. /// CPU_CTL_ALIAS::alter(bar, &CPU0, |r| r.set_alias_start(true)); /// ``` +/// +/// ## Arrays of registers +/// +/// Some I/O areas contain consecutive values that can be interpreted in the same way. These areas +/// can be defined as an array of identical registers, allowing them to be accessed by index with +/// compile-time or runtime bound checking. Simply define their address as `Address[Size]`, and add +/// an `idx` parameter to their `read`, `write` and `alter` methods: +/// +/// ```no_run +/// # fn no_run() -> Result<(), Error> { +/// # fn get_scratch_idx() -> usize { +/// # 0x15 +/// # } +/// // Array of 64 consecutive registers with the same layout starting at offset `0x80`. +/// register!(SCRATCH @ 0x00000080[64], "Scratch registers" { +/// 31:0 value as u32; +/// }); +/// +/// // Read scratch register 0, i.e. I/O address `0x80`. +/// let scratch_0 = SCRATCH::read(bar, 0).value(); +/// // Read scratch register 15, i.e. I/O address `0x80 + (15 * 4)`. +/// let scratch_15 = SCRATCH::read(bar, 15).value(); +/// +/// // This is out of bounds and won't build. +/// // let scratch_128 = SCRATCH::read(bar, 128).value(); +/// +/// // Runtime-obtained array index. +/// let scratch_idx = get_scratch_idx(); +/// // Access on a runtime index returns an error if it is out-of-bounds. +/// let some_scratch = SCRATCH::try_read(bar, scratch_idx)?.value(); +/// +/// // Alias to a particular register in an array. +/// // Here `SCRATCH[8]` is used to convey the firmware exit code. +/// register!(FIRMWARE_STATUS => SCRATCH[8], "Firmware exit status code" { +/// 7:0 status as u8; +/// }); +/// +/// let status = FIRMWARE_STATUS::read(bar).status(); +/// +/// // Non-contiguous register arrays can be defined by adding a stride parameter. +/// // Here, each of the 16 registers of the array are separated by 8 bytes, meaning that the +/// // registers of the two declarations below are interleaved. +/// register!(SCRATCH_INTERLEAVED_0 @ 0x000000c0[16 ; 8], "Scratch registers bank 0" { +/// 31:0 value as u32; +/// }); +/// register!(SCRATCH_INTERLEAVED_1 @ 0x000000c4[16 ; 8], "Scratch registers bank 1" { +/// 31:0 value as u32; +/// }); +/// # Ok(()) +/// # } +/// ``` macro_rules! register { // Creates a register at a fixed offset of the MMIO space. ($name:ident @ $offset:literal $(, $comment:literal)? { $($fields:tt)* } ) => { @@ -187,6 +238,35 @@ macro_rules! register { register!(@io_relative $name @ $base [ $alias::OFFSET ]); }; + // Creates an array of registers at a fixed offset of the MMIO space. + ( + $name:ident @ $offset:literal [ $size:expr ; $stride:expr ] $(, $comment:literal)? { + $($fields:tt)* + } + ) => { + static_assert!(::core::mem::size_of::() <= $stride); + register!(@core $name $(, $comment)? { $($fields)* } ); + register!(@io_array $name @ $offset [ $size ; $stride ]); + }; + + // Shortcut for contiguous array of registers (stride == size of element). + ( + $name:ident @ $offset:literal [ $size:expr ] $(, $comment:literal)? { + $($fields:tt)* + } + ) => { + register!($name @ $offset [ $size ; ::core::mem::size_of::() ] $(, $comment)? { + $($fields)* + } ); + }; + + // Creates an alias of register `idx` of array of registers `alias` with its own fields. + ($name:ident => $alias:ident [ $idx:expr ] $(, $comment:literal)? { $($fields:tt)* }) => { + static_assert!($idx < $alias::SIZE); + register!(@core $name $(, $comment)? { $($fields)* } ); + register!(@io_fixed $name @ $alias::OFFSET + $idx * $alias::STRIDE ); + }; + // All rules below are helpers. // Defines the wrapper `$name` type, as well as its relevant implementations (`Debug`, @@ -520,4 +600,119 @@ pub(crate) fn alter( } } }; + + // Generates the IO accessors for an array of registers. + (@io_array $name:ident @ $offset:literal [ $size:expr ; $stride:expr ]) => { + #[allow(dead_code)] + impl $name { + pub(crate) const OFFSET: usize = $offset; + pub(crate) const SIZE: usize = $size; + pub(crate) const STRIDE: usize = $stride; + + /// Read the array register at index `idx` from its address in `io`. + #[inline(always)] + pub(crate) fn read( + io: &T, + idx: usize, + ) -> Self where + T: ::core::ops::Deref>, + { + build_assert!(idx < Self::SIZE); + + let offset = Self::OFFSET + (idx * Self::STRIDE); + let value = io.read32(offset); + + Self(value) + } + + /// Write the value contained in `self` to the array register with index `idx` in `io`. + #[inline(always)] + pub(crate) fn write( + self, + io: &T, + idx: usize + ) where + T: ::core::ops::Deref>, + { + build_assert!(idx < Self::SIZE); + + let offset = Self::OFFSET + (idx * Self::STRIDE); + + io.write32(self.0, offset); + } + + /// Read the array register at index `idx` in `io` and run `f` on its value to obtain a + /// new value to write back. + #[inline(always)] + pub(crate) fn alter( + io: &T, + idx: usize, + f: F, + ) where + T: ::core::ops::Deref>, + F: ::core::ops::FnOnce(Self) -> Self, + { + let reg = f(Self::read(io, idx)); + reg.write(io, idx); + } + + /// Read the array register at index `idx` from its address in `io`. + /// + /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the + /// access was out-of-bounds. + #[inline(always)] + pub(crate) fn try_read( + io: &T, + idx: usize, + ) -> ::kernel::error::Result where + T: ::core::ops::Deref>, + { + if idx < Self::SIZE { + Ok(Self::read(io, idx)) + } else { + Err(EINVAL) + } + } + + /// Write the value contained in `self` to the array register with index `idx` in `io`. + /// + /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the + /// access was out-of-bounds. + #[inline(always)] + pub(crate) fn try_write( + self, + io: &T, + idx: usize, + ) -> ::kernel::error::Result where + T: ::core::ops::Deref>, + { + if idx < Self::SIZE { + Ok(self.write(io, idx)) + } else { + Err(EINVAL) + } + } + + /// Read the array register at index `idx` in `io` and run `f` on its value to obtain a + /// new value to write back. + /// + /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the + /// access was out-of-bounds. + #[inline(always)] + pub(crate) fn try_alter( + io: &T, + idx: usize, + f: F, + ) -> ::kernel::error::Result where + T: ::core::ops::Deref>, + F: ::core::ops::FnOnce(Self) -> Self, + { + if idx < Self::SIZE { + Ok(Self::alter(io, idx, f)) + } else { + Err(EINVAL) + } + } + } + }; } From ec2f6c81d2a16a93d882488062e8e09e2f1d6865 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Fri, 18 Jul 2025 16:26:23 +0900 Subject: [PATCH 22/64] gpu: nova-core: falcon: use register arrays for FUSE registers FUSE registers are an array of 16 consecutive registers. Use the newly available register array feature to define them properly and improve the code using them. Reviewed-by: Lyude Paul Link: https://lore.kernel.org/r/20250718-nova-regs-v2-18-7b6a762aa1cd@nvidia.com Signed-off-by: Alexandre Courbot --- drivers/gpu/nova-core/falcon/hal/ga102.rs | 33 +++++++++++------------ drivers/gpu/nova-core/regs.rs | 8 +++--- 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/nova-core/falcon/hal/ga102.rs b/drivers/gpu/nova-core/falcon/hal/ga102.rs index 3fdacd19322d..13c945fd6d6b 100644 --- a/drivers/gpu/nova-core/falcon/hal/ga102.rs +++ b/drivers/gpu/nova-core/falcon/hal/ga102.rs @@ -42,35 +42,32 @@ fn signature_reg_fuse_version_ga102( engine_id_mask: u16, ucode_id: u8, ) -> Result { - // TODO[REGA]: The ucode fuse versions are contained in the - // FUSE_OPT_FPF__UCODE_VERSION registers, which are an array. Our register - // definition macros do not allow us to manage them properly, so we need to hardcode their - // addresses for now. Clean this up once we support register arrays. + const NV_FUSE_OPT_FPF_SIZE: u8 = regs::NV_FUSE_OPT_FPF_SIZE as u8; // Each engine has 16 ucode version registers numbered from 1 to 16. - if ucode_id == 0 || ucode_id > 16 { - dev_err!(dev, "invalid ucode id {:#x}", ucode_id); - return Err(EINVAL); - } + let ucode_idx = match ucode_id { + 1..=NV_FUSE_OPT_FPF_SIZE => (ucode_id - 1) as usize, + _ => { + dev_err!(dev, "invalid ucode id {:#x}", ucode_id); + return Err(EINVAL); + } + }; - // Base address of the FUSE registers array corresponding to the engine. - let reg_fuse_base = if engine_id_mask & 0x0001 != 0 { - regs::NV_FUSE_OPT_FPF_SEC2_UCODE1_VERSION::OFFSET + // `ucode_idx` is guaranteed to be in the range [0..15], making the `read` calls provable valid + // at build-time. + let reg_fuse_version = if engine_id_mask & 0x0001 != 0 { + regs::NV_FUSE_OPT_FPF_SEC2_UCODE1_VERSION::read(bar, ucode_idx).data() } else if engine_id_mask & 0x0004 != 0 { - regs::NV_FUSE_OPT_FPF_NVDEC_UCODE1_VERSION::OFFSET + regs::NV_FUSE_OPT_FPF_NVDEC_UCODE1_VERSION::read(bar, ucode_idx).data() } else if engine_id_mask & 0x0400 != 0 { - regs::NV_FUSE_OPT_FPF_GSP_UCODE1_VERSION::OFFSET + regs::NV_FUSE_OPT_FPF_GSP_UCODE1_VERSION::read(bar, ucode_idx).data() } else { dev_err!(dev, "unexpected engine_id_mask {:#x}", engine_id_mask); return Err(EINVAL); }; - // Read `reg_fuse_base[ucode_id - 1]`. - let reg_fuse_version = - bar.read32(reg_fuse_base + ((ucode_id - 1) as usize * core::mem::size_of::())); - // TODO[NUMM]: replace with `last_set_bit` once it lands. - Ok(u32::BITS - reg_fuse_version.leading_zeros()) + Ok(u16::BITS - reg_fuse_version.leading_zeros()) } fn program_brom_ga102(bar: &Bar0, params: &FalconBromParams) -> Result { diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs index c45de208055d..4ae2a91bcce8 100644 --- a/drivers/gpu/nova-core/regs.rs +++ b/drivers/gpu/nova-core/regs.rs @@ -182,15 +182,17 @@ pub(crate) fn vga_workspace_addr(self) -> Option { // FUSE -register!(NV_FUSE_OPT_FPF_NVDEC_UCODE1_VERSION @ 0x00824100 { +pub(crate) const NV_FUSE_OPT_FPF_SIZE: usize = 16; + +register!(NV_FUSE_OPT_FPF_NVDEC_UCODE1_VERSION @ 0x00824100[NV_FUSE_OPT_FPF_SIZE] { 15:0 data as u16; }); -register!(NV_FUSE_OPT_FPF_SEC2_UCODE1_VERSION @ 0x00824140 { +register!(NV_FUSE_OPT_FPF_SEC2_UCODE1_VERSION @ 0x00824140[NV_FUSE_OPT_FPF_SIZE] { 15:0 data as u16; }); -register!(NV_FUSE_OPT_FPF_GSP_UCODE1_VERSION @ 0x008241c0 { +register!(NV_FUSE_OPT_FPF_GSP_UCODE1_VERSION @ 0x008241c0[NV_FUSE_OPT_FPF_SIZE] { 15:0 data as u16; }); From 0988099646cfc6c72a4448cad39d4ee22ad457a7 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Fri, 18 Jul 2025 16:26:24 +0900 Subject: [PATCH 23/64] gpu: nova-core: register: add support for relative array registers Add support for declaring arrays of registers available from a variable base. This is effectively a combination of the relative and array registers features. nova-core does not make much use of this yet, but it will become helpful to have for GSP boot. Reviewed-by: Lyude Paul Link: https://lore.kernel.org/r/20250718-nova-regs-v2-19-7b6a762aa1cd@nvidia.com Signed-off-by: Alexandre Courbot --- Documentation/gpu/nova/core/todo.rst | 1 - drivers/gpu/nova-core/falcon.rs | 2 +- drivers/gpu/nova-core/falcon/hal/ga102.rs | 2 +- drivers/gpu/nova-core/regs.rs | 8 +- drivers/gpu/nova-core/regs/macros.rs | 242 ++++++++++++++++++++++ 5 files changed, 248 insertions(+), 7 deletions(-) diff --git a/Documentation/gpu/nova/core/todo.rst b/Documentation/gpu/nova/core/todo.rst index a1d12c1b289d..48b20656dcb1 100644 --- a/Documentation/gpu/nova/core/todo.rst +++ b/Documentation/gpu/nova/core/todo.rst @@ -131,7 +131,6 @@ crate so it can be used by other components as well. Features desired before this happens: -* Arrays of registers with build-time index validation, * Make I/O optional I/O (for field values that are not registers), * Support other sizes than `u32`, * Allow visibility control for registers and individual fields, diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs index 2ecdcc6b9b9d..d235a6f9efca 100644 --- a/drivers/gpu/nova-core/falcon.rs +++ b/drivers/gpu/nova-core/falcon.rs @@ -516,7 +516,7 @@ fn dma_wr>( pub(crate) fn dma_load>(&self, bar: &Bar0, fw: &F) -> Result { regs::NV_PFALCON_FBIF_CTL::alter(bar, &E::ID, |v| v.set_allow_phys_no_ctx(true)); regs::NV_PFALCON_FALCON_DMACTL::default().write(bar, &E::ID); - regs::NV_PFALCON_FBIF_TRANSCFG::alter(bar, &E::ID, |v| { + regs::NV_PFALCON_FBIF_TRANSCFG::alter(bar, &E::ID, 0, |v| { v.set_target(FalconFbifTarget::CoherentSysmem) .set_mem_type(FalconFbifMemType::Physical) }); diff --git a/drivers/gpu/nova-core/falcon/hal/ga102.rs b/drivers/gpu/nova-core/falcon/hal/ga102.rs index 13c945fd6d6b..0b1cbe7853b3 100644 --- a/drivers/gpu/nova-core/falcon/hal/ga102.rs +++ b/drivers/gpu/nova-core/falcon/hal/ga102.rs @@ -73,7 +73,7 @@ fn signature_reg_fuse_version_ga102( fn program_brom_ga102(bar: &Bar0, params: &FalconBromParams) -> Result { regs::NV_PFALCON2_FALCON_BROM_PARAADDR::default() .set_value(params.pkc_data_offset) - .write(bar, &E::ID); + .write(bar, &E::ID, 0); regs::NV_PFALCON2_FALCON_BROM_ENGIDMASK::default() .set_value(u32::from(params.engine_id_mask)) .write(bar, &E::ID); diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs index 4ae2a91bcce8..206dab2e1335 100644 --- a/drivers/gpu/nova-core/regs.rs +++ b/drivers/gpu/nova-core/regs.rs @@ -289,8 +289,7 @@ pub(crate) fn mem_scrubbing_done(self) -> bool { 0:0 reset as bool; }); -// TODO[REGA]: this is an array of registers. -register!(NV_PFALCON_FBIF_TRANSCFG @ PFalconBase[0x00000600] { +register!(NV_PFALCON_FBIF_TRANSCFG @ PFalconBase[0x00000600[8]] { 1:0 target as u8 ?=> FalconFbifTarget; 2:2 mem_type as bool => FalconFbifMemType; }); @@ -313,8 +312,9 @@ pub(crate) fn mem_scrubbing_done(self) -> bool { 31:0 value as u32; }); -// TODO[REGA]: this is an array of registers. -register!(NV_PFALCON2_FALCON_BROM_PARAADDR @ PFalcon2Base[0x00000210] { +// OpenRM defines this as a register array, but doesn't specify its size and only uses its first +// element. Be conservative until we know the actual size or need to use more registers. +register!(NV_PFALCON2_FALCON_BROM_PARAADDR @ PFalcon2Base[0x00000210[1]] { 31:0 value as u32; }); diff --git a/drivers/gpu/nova-core/regs/macros.rs b/drivers/gpu/nova-core/regs/macros.rs index f38177065c25..754c14ee7f40 100644 --- a/drivers/gpu/nova-core/regs/macros.rs +++ b/drivers/gpu/nova-core/regs/macros.rs @@ -213,6 +213,74 @@ pub(crate) trait RegisterBase { /// # Ok(()) /// # } /// ``` +/// +/// ## Relative arrays of registers +/// +/// Combining the two features described in the sections above, arrays of registers accessible from +/// a base can also be defined: +/// +/// ```no_run +/// # fn no_run() -> Result<(), Error> { +/// # fn get_scratch_idx() -> usize { +/// # 0x15 +/// # } +/// // Type used as parameter of `RegisterBase` to specify the base. +/// pub(crate) struct CpuCtlBase; +/// +/// // ZST describing `CPU0`. +/// struct Cpu0; +/// impl RegisterBase for Cpu0 { +/// const BASE: usize = 0x100; +/// } +/// // Singleton of `CPU0` used to identify it. +/// const CPU0: Cpu0 = Cpu0; +/// +/// // ZST describing `CPU1`. +/// struct Cpu1; +/// impl RegisterBase for Cpu1 { +/// const BASE: usize = 0x200; +/// } +/// // Singleton of `CPU1` used to identify it. +/// const CPU1: Cpu1 = Cpu1; +/// +/// // 64 per-cpu scratch registers, arranged as an contiguous array. +/// register!(CPU_SCRATCH @ CpuCtlBase[0x00000080[64]], "Per-CPU scratch registers" { +/// 31:0 value as u32; +/// }); +/// +/// let cpu0_scratch_0 = CPU_SCRATCH::read(bar, &Cpu0, 0).value(); +/// let cpu1_scratch_15 = CPU_SCRATCH::read(bar, &Cpu1, 15).value(); +/// +/// // This won't build. +/// // let cpu0_scratch_128 = CPU_SCRATCH::read(bar, &Cpu0, 128).value(); +/// +/// // Runtime-obtained array index. +/// let scratch_idx = get_scratch_idx(); +/// // Access on a runtime value returns an error if it is out-of-bounds. +/// let cpu0_some_scratch = CPU_SCRATCH::try_read(bar, &Cpu0, scratch_idx)?.value(); +/// +/// // `SCRATCH[8]` is used to convey the firmware exit code. +/// register!(CPU_FIRMWARE_STATUS => CpuCtlBase[CPU_SCRATCH[8]], +/// "Per-CPU firmware exit status code" { +/// 7:0 status as u8; +/// }); +/// +/// let cpu0_status = CPU_FIRMWARE_STATUS::read(bar, &Cpu0).status(); +/// +/// // Non-contiguous register arrays can be defined by adding a stride parameter. +/// // Here, each of the 16 registers of the array are separated by 8 bytes, meaning that the +/// // registers of the two declarations below are interleaved. +/// register!(CPU_SCRATCH_INTERLEAVED_0 @ CpuCtlBase[0x00000d00[16 ; 8]], +/// "Scratch registers bank 0" { +/// 31:0 value as u32; +/// }); +/// register!(CPU_SCRATCH_INTERLEAVED_1 @ CpuCtlBase[0x00000d04[16 ; 8]], +/// "Scratch registers bank 1" { +/// 31:0 value as u32; +/// }); +/// # Ok(()) +/// # } +/// ``` macro_rules! register { // Creates a register at a fixed offset of the MMIO space. ($name:ident @ $offset:literal $(, $comment:literal)? { $($fields:tt)* } ) => { @@ -260,7 +328,41 @@ macro_rules! register { } ); }; + // Creates an array of registers at a relative offset from a base address provider. + ( + $name:ident @ $base:ty [ $offset:literal [ $size:expr ; $stride:expr ] ] + $(, $comment:literal)? { $($fields:tt)* } + ) => { + static_assert!(::core::mem::size_of::() <= $stride); + register!(@core $name $(, $comment)? { $($fields)* } ); + register!(@io_relative_array $name @ $base [ $offset [ $size ; $stride ] ]); + }; + + // Shortcut for contiguous array of relative registers (stride == size of element). + ( + $name:ident @ $base:ty [ $offset:literal [ $size:expr ] ] $(, $comment:literal)? { + $($fields:tt)* + } + ) => { + register!($name @ $base [ $offset [ $size ; ::core::mem::size_of::() ] ] + $(, $comment)? { $($fields)* } ); + }; + + // Creates an alias of register `idx` of relative array of registers `alias` with its own + // fields. + ( + $name:ident => $base:ty [ $alias:ident [ $idx:expr ] ] $(, $comment:literal)? { + $($fields:tt)* + } + ) => { + static_assert!($idx < $alias::SIZE); + register!(@core $name $(, $comment)? { $($fields)* } ); + register!(@io_relative $name @ $base [ $alias::OFFSET + $idx * $alias::STRIDE ] ); + }; + // Creates an alias of register `idx` of array of registers `alias` with its own fields. + // This rule belongs to the (non-relative) register arrays set, but needs to be put last + // to avoid it being interpreted in place of the relative register array alias rule. ($name:ident => $alias:ident [ $idx:expr ] $(, $comment:literal)? { $($fields:tt)* }) => { static_assert!($idx < $alias::SIZE); register!(@core $name $(, $comment)? { $($fields)* } ); @@ -715,4 +817,144 @@ pub(crate) fn try_alter( } } }; + + // Generates the IO accessors for an array of relative registers. + ( + @io_relative_array $name:ident @ $base:ty + [ $offset:literal [ $size:expr ; $stride:expr ] ] + ) => { + #[allow(dead_code)] + impl $name { + pub(crate) const OFFSET: usize = $offset; + pub(crate) const SIZE: usize = $size; + pub(crate) const STRIDE: usize = $stride; + + /// Read the array register at index `idx` from `io`, using the base address provided + /// by `base` and adding the register's offset to it. + #[inline(always)] + pub(crate) fn read( + io: &T, + #[allow(unused_variables)] + base: &B, + idx: usize, + ) -> Self where + T: ::core::ops::Deref>, + B: crate::regs::macros::RegisterBase<$base>, + { + build_assert!(idx < Self::SIZE); + + let offset = >::BASE + + Self::OFFSET + (idx * Self::STRIDE); + let value = io.read32(offset); + + Self(value) + } + + /// Write the value contained in `self` to `io`, using the base address provided by + /// `base` and adding the offset of array register `idx` to it. + #[inline(always)] + pub(crate) fn write( + self, + io: &T, + #[allow(unused_variables)] + base: &B, + idx: usize + ) where + T: ::core::ops::Deref>, + B: crate::regs::macros::RegisterBase<$base>, + { + build_assert!(idx < Self::SIZE); + + let offset = >::BASE + + Self::OFFSET + (idx * Self::STRIDE); + + io.write32(self.0, offset); + } + + /// Read the array register at index `idx` from `io`, using the base address provided + /// by `base` and adding the register's offset to it, then run `f` on its value to + /// obtain a new value to write back. + #[inline(always)] + pub(crate) fn alter( + io: &T, + base: &B, + idx: usize, + f: F, + ) where + T: ::core::ops::Deref>, + B: crate::regs::macros::RegisterBase<$base>, + F: ::core::ops::FnOnce(Self) -> Self, + { + let reg = f(Self::read(io, base, idx)); + reg.write(io, base, idx); + } + + /// Read the array register at index `idx` from `io`, using the base address provided + /// by `base` and adding the register's offset to it. + /// + /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the + /// access was out-of-bounds. + #[inline(always)] + pub(crate) fn try_read( + io: &T, + base: &B, + idx: usize, + ) -> ::kernel::error::Result where + T: ::core::ops::Deref>, + B: crate::regs::macros::RegisterBase<$base>, + { + if idx < Self::SIZE { + Ok(Self::read(io, base, idx)) + } else { + Err(EINVAL) + } + } + + /// Write the value contained in `self` to `io`, using the base address provided by + /// `base` and adding the offset of array register `idx` to it. + /// + /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the + /// access was out-of-bounds. + #[inline(always)] + pub(crate) fn try_write( + self, + io: &T, + base: &B, + idx: usize, + ) -> ::kernel::error::Result where + T: ::core::ops::Deref>, + B: crate::regs::macros::RegisterBase<$base>, + { + if idx < Self::SIZE { + Ok(self.write(io, base, idx)) + } else { + Err(EINVAL) + } + } + + /// Read the array register at index `idx` from `io`, using the base address provided + /// by `base` and adding the register's offset to it, then run `f` on its value to + /// obtain a new value to write back. + /// + /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the + /// access was out-of-bounds. + #[inline(always)] + pub(crate) fn try_alter( + io: &T, + base: &B, + idx: usize, + f: F, + ) -> ::kernel::error::Result where + T: ::core::ops::Deref>, + B: crate::regs::macros::RegisterBase<$base>, + F: ::core::ops::FnOnce(Self) -> Self, + { + if idx < Self::SIZE { + Ok(Self::alter(io, base, idx, f)) + } else { + Err(EINVAL) + } + } + } + }; } From d234f3aef5928c2f0d692cf43768ecced0abd672 Mon Sep 17 00:00:00 2001 From: Shankari Anand Date: Wed, 20 Aug 2025 16:58:46 +0530 Subject: [PATCH 24/64] gpu: nova-core: Update ARef imports from sync::aref Update call sites in nova-core to import `ARef` from `sync::aref` instead of `types`. This aligns with the ongoing effort to move `ARef` and `AlwaysRefCounted` to sync. [acourbot@nvidia.com: use standard prefix for nova-core.] Suggested-by: Benno Lossin Link: https://github.com/Rust-for-Linux/linux/issues/1173 Signed-off-by: Shankari Anand Reviewed-by: Benno Lossin Link: https://lore.kernel.org/r/20250820112846.9665-1-shankari.ak0208@gmail.com Signed-off-by: Alexandre Courbot --- drivers/gpu/nova-core/falcon.rs | 2 +- drivers/gpu/nova-core/fb.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs index d235a6f9efca..645e65175cb6 100644 --- a/drivers/gpu/nova-core/falcon.rs +++ b/drivers/gpu/nova-core/falcon.rs @@ -7,8 +7,8 @@ use kernel::bindings; use kernel::device; use kernel::prelude::*; +use kernel::sync::aref::ARef; use kernel::time::Delta; -use kernel::types::ARef; use crate::dma::DmaObject; use crate::driver::Bar0; diff --git a/drivers/gpu/nova-core/fb.rs b/drivers/gpu/nova-core/fb.rs index 4a702525fff4..68559902ae78 100644 --- a/drivers/gpu/nova-core/fb.rs +++ b/drivers/gpu/nova-core/fb.rs @@ -4,7 +4,7 @@ use kernel::prelude::*; use kernel::sizes::*; -use kernel::types::ARef; +use kernel::sync::aref::ARef; use kernel::{dev_warn, device}; use crate::dma::DmaObject; From 99e7f8e83a9c5a56ca20c4948b6126248c82c67d Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 21 Aug 2025 13:49:32 +0900 Subject: [PATCH 25/64] gpu: nova-core: falcon: align DMA transfers to 256 bytes Falcon DMA transfers are done in 256 bytes increments, and the method responsible for initiating the transfer checked that the required length was indeed a multiple of 256. While correct, this also requires callers to specifically account for this limitation of DMA transfers, and we had for instance the fwsec code performing a seemingly arbitrary (and potentially overflowing) upwards alignment of the DMEM load size to match this requirement. Let's move that alignment into the loading code itself instead: since it is working in terms of number of transfers, we can turn this upwards alignment into a non-overflowing operation, and check that the requested transfer remains into the limits of the DMA object. This also allows us to remove a DMA-specific constant in the fwsec code. Signed-off-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250821-falcondma_256b-v2-1-83e8647a24b5@nvidia.com Signed-off-by: Danilo Krummrich --- drivers/gpu/nova-core/falcon.rs | 31 ++++++++++++++++++------- drivers/gpu/nova-core/firmware/fwsec.rs | 9 +------ 2 files changed, 23 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs index 645e65175cb6..be91aac6976a 100644 --- a/drivers/gpu/nova-core/falcon.rs +++ b/drivers/gpu/nova-core/falcon.rs @@ -463,14 +463,27 @@ fn dma_wr>( ); return Err(EINVAL); } - if load_offsets.len % DMA_LEN > 0 { - dev_err!( - self.dev, - "DMA transfer length must be a multiple of {}", - DMA_LEN - ); - return Err(EINVAL); - } + + // DMA transfers can only be done in units of 256 bytes. Compute how many such transfers we + // need to perform. + let num_transfers = load_offsets.len.div_ceil(DMA_LEN); + + // Check that the area we are about to transfer is within the bounds of the DMA object. + // Upper limit of transfer is `(num_transfers * DMA_LEN) + load_offsets.src_start`. + match num_transfers + .checked_mul(DMA_LEN) + .and_then(|size| size.checked_add(load_offsets.src_start)) + { + None => { + dev_err!(self.dev, "DMA transfer length overflow"); + return Err(EOVERFLOW); + } + Some(upper_bound) if upper_bound as usize > fw.size() => { + dev_err!(self.dev, "DMA transfer goes beyond range of DMA object"); + return Err(EINVAL); + } + Some(_) => (), + }; // Set up the base source DMA address. @@ -486,7 +499,7 @@ fn dma_wr>( .set_imem(target_mem == FalconMem::Imem) .set_sec(if sec { 1 } else { 0 }); - for pos in (0..load_offsets.len).step_by(DMA_LEN as usize) { + for pos in (0..num_transfers).map(|i| i * DMA_LEN) { // Perform a transfer of size `DMA_LEN`. regs::NV_PFALCON_FALCON_DMATRFMOFFS::default() .set_offs(load_offsets.dst_start + pos) diff --git a/drivers/gpu/nova-core/firmware/fwsec.rs b/drivers/gpu/nova-core/firmware/fwsec.rs index 0dff3cfa90af..47f5e4524072 100644 --- a/drivers/gpu/nova-core/firmware/fwsec.rs +++ b/drivers/gpu/nova-core/firmware/fwsec.rs @@ -202,9 +202,6 @@ pub(crate) struct FwsecFirmware { ucode: FirmwareDmaObject, } -// We need to load full DMEM pages. -const DMEM_LOAD_SIZE_ALIGN: u32 = 256; - impl FalconLoadParams for FwsecFirmware { fn imem_load_params(&self) -> FalconLoadTarget { FalconLoadTarget { @@ -218,11 +215,7 @@ fn dmem_load_params(&self) -> FalconLoadTarget { FalconLoadTarget { src_start: self.desc.imem_load_size, dst_start: self.desc.dmem_phys_base, - // TODO[NUMM]: replace with `align_up` once it lands. - len: self - .desc - .dmem_load_size - .next_multiple_of(DMEM_LOAD_SIZE_ALIGN), + len: self.desc.dmem_load_size, } } From 1db476d294c031c38d3e5cbc4d41faf6dfd77ffb Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Fri, 1 Aug 2025 22:24:09 +0900 Subject: [PATCH 26/64] rust: transmute: add `as_bytes` method for `AsBytes` trait Every type that implements `AsBytes` should be able to provide its byte representation. Introduce the `as_bytes` method that returns the implementer as a stream of bytes, and provide a default implementation that should be suitable for any type that satisfies `AsBytes`'s safety requirements. [acourbot@nvidia.com: use fully qualified `core::mem::size_of_val` to build with Rust 1.78.] Reviewed-by: Danilo Krummrich Reviewed-by: Benno Lossin Reviewed-by: Alice Ryhl Reviewed-by: Gary Guo Acked-by: Miguel Ojeda Link: https://lore.kernel.org/r/20250801-as_bytes-v5-1-975f87d5dc85@nvidia.com Signed-off-by: Alexandre Courbot --- rust/kernel/transmute.rs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/rust/kernel/transmute.rs b/rust/kernel/transmute.rs index 1c7d43771a37..a2d8f39182b2 100644 --- a/rust/kernel/transmute.rs +++ b/rust/kernel/transmute.rs @@ -47,7 +47,17 @@ macro_rules! impl_frombytes { /// /// Values of this type may not contain any uninitialized bytes. This type must not have interior /// mutability. -pub unsafe trait AsBytes {} +pub unsafe trait AsBytes { + /// Returns `self` as a slice of bytes. + fn as_bytes(&self) -> &[u8] { + // CAST: `Self` implements `AsBytes` thus all bytes of `self` are initialized. + let data = core::ptr::from_ref(self).cast::(); + let len = core::mem::size_of_val(self); + + // SAFETY: `data` is non-null and valid for reads of `len * sizeof::()` bytes. + unsafe { core::slice::from_raw_parts(data, len) } + } +} macro_rules! impl_asbytes { ($($({$($generics:tt)*})? $t:ty, )*) => { From 331c24e6ce814af2af74bac648d1ac1708873e9c Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Fri, 1 Aug 2025 22:24:10 +0900 Subject: [PATCH 27/64] rust: transmute: add `as_bytes_mut` method to `AsBytes` trait Types that implement both `AsBytes` and `FromBytes` can be safely modified as a slice of bytes. Add a `as_bytes_mut` method for that purpose. [acourbot@nvidia.com: use fully qualified `core::mem::size_of_val` to build with Rust 1.78.] Reviewed-by: Alice Ryhl Reviewed-by: Danilo Krummrich Reviewed-by: Gary Guo Reviewed-by: Benno Lossin Acked-by: Miguel Ojeda Link: https://lore.kernel.org/r/20250801-as_bytes-v5-2-975f87d5dc85@nvidia.com Signed-off-by: Alexandre Courbot --- rust/kernel/transmute.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/rust/kernel/transmute.rs b/rust/kernel/transmute.rs index a2d8f39182b2..517a432ac693 100644 --- a/rust/kernel/transmute.rs +++ b/rust/kernel/transmute.rs @@ -57,6 +57,21 @@ fn as_bytes(&self) -> &[u8] { // SAFETY: `data` is non-null and valid for reads of `len * sizeof::()` bytes. unsafe { core::slice::from_raw_parts(data, len) } } + + /// Returns `self` as a mutable slice of bytes. + fn as_bytes_mut(&mut self) -> &mut [u8] + where + Self: FromBytes, + { + // CAST: `Self` implements both `AsBytes` and `FromBytes` thus making `Self` + // bi-directionally transmutable to `[u8; size_of_val(self)]`. + let data = core::ptr::from_mut(self).cast::(); + let len = core::mem::size_of_val(self); + + // SAFETY: `data` is non-null and valid for read and writes of `len * sizeof::()` + // bytes. + unsafe { core::slice::from_raw_parts_mut(data, len) } + } } macro_rules! impl_asbytes { From 72031905cf2e36519a98b5becbe62dc10ebb4b97 Mon Sep 17 00:00:00 2001 From: "Christian S. Lima" Date: Sun, 24 Aug 2025 18:31:33 -0300 Subject: [PATCH 28/64] rust: transmute: Add methods for FromBytes trait The two methods added take a slice of bytes and return those bytes in a specific type. These methods are useful when we need to transform the stream of bytes into specific type. Since the `is_aligned` method for pointer types has been stabilized in `1.79` version and is being used in this patch, I'm enabling the feature. In this case, using this method is useful to check the alignment and avoid a giant boilerplate, such as `(foo.as_ptr() as usize) % core::mem::align_of::() == 0`. Also add `#[allow(clippy::incompatible_msrv)]` where needed until the MSRV is updated to `1.79`. Suggested-by: Benno Lossin Link: https://github.com/Rust-for-Linux/linux/issues/1119 Reviewed-by: Alice Ryhl Tested-by: Alexandre Courbot Reviewed-by: Alexandre Courbot Signed-off-by: Christian S. Lima Link: https://lore.kernel.org/r/20250824213134.27079-1-christiansantoslima21@gmail.com Acked-by: Miguel Ojeda [acourbot@nvidia.com: minor rewording of commit messages and doccomments] [acourbot@nvidia.com: revert slice implementation removal] [acourbot@nvidia.com: move incompatible_msrv clippy allow closer to site of need] [acourbot@nvidia.com: call the doctest method] Signed-off-by: Alexandre Courbot --- rust/kernel/lib.rs | 1 + rust/kernel/transmute.rs | 69 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 69 insertions(+), 1 deletion(-) diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index ed53169e795c..c859a8984bae 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -18,6 +18,7 @@ // // Stable since Rust 1.79.0. #![feature(inline_const)] +#![feature(pointer_is_aligned)] // // Stable since Rust 1.81.0. #![feature(lint_reasons)] diff --git a/rust/kernel/transmute.rs b/rust/kernel/transmute.rs index 517a432ac693..5fc7d4ed76b4 100644 --- a/rust/kernel/transmute.rs +++ b/rust/kernel/transmute.rs @@ -2,6 +2,8 @@ //! Traits for transmuting types. +use core::mem::size_of; + /// Types for which any bit pattern is valid. /// /// Not all types are valid for all values. For example, a `bool` must be either zero or one, so @@ -9,10 +11,75 @@ /// /// It's okay for the type to have padding, as initializing those bytes has no effect. /// +/// # Examples +/// +/// ``` +/// use kernel::transmute::FromBytes; +/// +/// # fn test() -> Option<()> { +/// let raw = [1, 2, 3, 4]; +/// +/// let result = u32::from_bytes(&raw)?; +/// +/// #[cfg(target_endian = "little")] +/// assert_eq!(*result, 0x4030201); +/// +/// #[cfg(target_endian = "big")] +/// assert_eq!(*result, 0x1020304); +/// +/// # Some(()) } +/// # test().ok_or(EINVAL)?; +/// # Ok::<(), Error>(()) +/// ``` +/// /// # Safety /// /// All bit-patterns must be valid for this type. This type must not have interior mutability. -pub unsafe trait FromBytes {} +pub unsafe trait FromBytes { + /// Converts a slice of bytes to a reference to `Self`. + /// + /// Succeeds if the reference is properly aligned, and the size of `bytes` is equal to that of + /// `T` and different from zero. + /// + /// Otherwise, returns [`None`]. + fn from_bytes(bytes: &[u8]) -> Option<&Self> + where + Self: Sized, + { + let slice_ptr = bytes.as_ptr().cast::(); + let size = size_of::(); + + #[allow(clippy::incompatible_msrv)] + if bytes.len() == size && slice_ptr.is_aligned() { + // SAFETY: Size and alignment were just checked. + unsafe { Some(&*slice_ptr) } + } else { + None + } + } + + /// Converts a mutable slice of bytes to a reference to `Self`. + /// + /// Succeeds if the reference is properly aligned, and the size of `bytes` is equal to that of + /// `T` and different from zero. + /// + /// Otherwise, returns [`None`]. + fn from_bytes_mut(bytes: &mut [u8]) -> Option<&mut Self> + where + Self: AsBytes + Sized, + { + let slice_ptr = bytes.as_mut_ptr().cast::(); + let size = size_of::(); + + #[allow(clippy::incompatible_msrv)] + if bytes.len() == size && slice_ptr.is_aligned() { + // SAFETY: Size and alignment were just checked. + unsafe { Some(&mut *slice_ptr) } + } else { + None + } + } +} macro_rules! impl_frombytes { ($($({$($generics:tt)*})? $t:ty, )*) => { From 09f90256e8902793f594517ef440698585eb3595 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Tue, 26 Aug 2025 13:07:37 +0900 Subject: [PATCH 29/64] rust: transmute: add `from_bytes_copy` method to `FromBytes` trait `FromBytes::from_bytes` comes with a few practical limitations: - It requires the bytes slice to have the same alignment as the returned type, which might not be guaranteed in the case of a byte stream, - It returns a reference, requiring the returned type to implement `Clone` if one wants to keep the value for longer than the lifetime of the slice. To overcome these when needed, add a `from_bytes_copy` with a default implementation in the trait. `from_bytes_copy` returns an owned value that is populated using an unaligned read, removing the lifetime constraint and making it usable even on non-aligned byte slices. Reviewed-by: Alice Ryhl Acked-by: Miguel Ojeda Reviewed-by: John Hubbard Reviewed-by: Benno Lossin Link: https://lore.kernel.org/r/20250826-nova_firmware-v2-1-93566252fe3a@nvidia.com Signed-off-by: Alexandre Courbot --- rust/kernel/transmute.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/rust/kernel/transmute.rs b/rust/kernel/transmute.rs index 5fc7d4ed76b4..cfc37d81adf2 100644 --- a/rust/kernel/transmute.rs +++ b/rust/kernel/transmute.rs @@ -79,6 +79,24 @@ fn from_bytes_mut(bytes: &mut [u8]) -> Option<&mut Self> None } } + + /// Creates an owned instance of `Self` by copying `bytes`. + /// + /// Unlike [`FromBytes::from_bytes`], which requires aligned input, this method can be used on + /// non-aligned data at the cost of a copy. + fn from_bytes_copy(bytes: &[u8]) -> Option + where + Self: Sized, + { + if bytes.len() == size_of::() { + // SAFETY: we just verified that `bytes` has the same size as `Self`, and per the + // invariants of `FromBytes`, any byte sequence of the correct length is a valid value + // for `Self`. + Some(unsafe { core::ptr::read_unaligned(bytes.as_ptr().cast::()) }) + } else { + None + } + } } macro_rules! impl_frombytes { From dff11511d1967b9fd82a5b4b60cf5045eaa545b6 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Fri, 8 Aug 2025 11:46:41 +0900 Subject: [PATCH 30/64] gpu: nova-core: vbios: replace pci::Device with device::Device The passed pci::Device is exclusively used for logging purposes, so it can be replaced by a regular device::Device, which allows us to remove the `as_ref()` indirections at each logging site. Reviewed-by: Joel Fernandes Acked-by: Danilo Krummrich Link: https://lore.kernel.org/r/20250808-vbios_device-v1-1-834bbbab6471@nvidia.com Signed-off-by: Alexandre Courbot --- drivers/gpu/nova-core/gpu.rs | 2 +- drivers/gpu/nova-core/vbios.rs | 135 ++++++++++++++------------------- 2 files changed, 57 insertions(+), 80 deletions(-) diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs index 8caecaf7dfb4..4d0f759610ec 100644 --- a/drivers/gpu/nova-core/gpu.rs +++ b/drivers/gpu/nova-core/gpu.rs @@ -298,7 +298,7 @@ pub(crate) fn new( let fb_layout = FbLayout::new(spec.chipset, bar)?; dev_dbg!(pdev.as_ref(), "{:#x?}\n", fb_layout); - let bios = Vbios::new(pdev, bar)?; + let bios = Vbios::new(pdev.as_ref(), bar)?; Self::run_fwsec_frts(pdev.as_ref(), &gsp_falcon, bar, &bios, &fb_layout)?; diff --git a/drivers/gpu/nova-core/vbios.rs b/drivers/gpu/nova-core/vbios.rs index 50fbea69fc00..ce32ab4cd5b2 100644 --- a/drivers/gpu/nova-core/vbios.rs +++ b/drivers/gpu/nova-core/vbios.rs @@ -8,7 +8,6 @@ use core::convert::TryFrom; use kernel::device; use kernel::error::Result; -use kernel::pci; use kernel::prelude::*; /// The offset of the VBIOS ROM in the BAR0 space. @@ -31,7 +30,7 @@ /// Vbios Reader for constructing the VBIOS data. struct VbiosIterator<'a> { - pdev: &'a pci::Device, + dev: &'a device::Device, bar0: &'a Bar0, /// VBIOS data vector: As BIOS images are scanned, they are added to this vector for reference /// or copying into other data structures. It is the entire scanned contents of the VBIOS which @@ -46,9 +45,9 @@ struct VbiosIterator<'a> { } impl<'a> VbiosIterator<'a> { - fn new(pdev: &'a pci::Device, bar0: &'a Bar0) -> Result { + fn new(dev: &'a device::Device, bar0: &'a Bar0) -> Result { Ok(Self { - pdev, + dev, bar0, data: KVec::new(), current_offset: 0, @@ -64,7 +63,7 @@ fn read_more(&mut self, len: usize) -> Result { // Ensure length is a multiple of 4 for 32-bit reads if len % core::mem::size_of::() != 0 { dev_err!( - self.pdev.as_ref(), + self.dev, "VBIOS read length {} is not a multiple of 4\n", len ); @@ -89,7 +88,7 @@ fn read_more(&mut self, len: usize) -> Result { /// Read bytes at a specific offset, filling any gap. fn read_more_at_offset(&mut self, offset: usize, len: usize) -> Result { if offset > BIOS_MAX_SCAN_LEN { - dev_err!(self.pdev.as_ref(), "Error: exceeded BIOS scan limit.\n"); + dev_err!(self.dev, "Error: exceeded BIOS scan limit.\n"); return Err(EINVAL); } @@ -115,7 +114,7 @@ fn read_bios_image_at_offset( if offset + len > data_len { self.read_more_at_offset(offset, len).inspect_err(|e| { dev_err!( - self.pdev.as_ref(), + self.dev, "Failed to read more at offset {:#x}: {:?}\n", offset, e @@ -123,9 +122,9 @@ fn read_bios_image_at_offset( })?; } - BiosImage::new(self.pdev, &self.data[offset..offset + len]).inspect_err(|err| { + BiosImage::new(self.dev, &self.data[offset..offset + len]).inspect_err(|err| { dev_err!( - self.pdev.as_ref(), + self.dev, "Failed to {} at offset {:#x}: {:?}\n", context, offset, @@ -146,10 +145,7 @@ fn next(&mut self) -> Option { } if self.current_offset > BIOS_MAX_SCAN_LEN { - dev_err!( - self.pdev.as_ref(), - "Error: exceeded BIOS scan limit, stopping scan\n" - ); + dev_err!(self.dev, "Error: exceeded BIOS scan limit, stopping scan\n"); return None; } @@ -192,18 +188,18 @@ impl Vbios { /// Probe for VBIOS extraction. /// /// Once the VBIOS object is built, `bar0` is not read for [`Vbios`] purposes anymore. - pub(crate) fn new(pdev: &pci::Device, bar0: &Bar0) -> Result { + pub(crate) fn new(dev: &device::Device, bar0: &Bar0) -> Result { // Images to extract from iteration let mut pci_at_image: Option = None; let mut first_fwsec_image: Option = None; let mut second_fwsec_image: Option = None; // Parse all VBIOS images in the ROM - for image_result in VbiosIterator::new(pdev, bar0)? { + for image_result in VbiosIterator::new(dev, bar0)? { let full_image = image_result?; dev_dbg!( - pdev.as_ref(), + dev, "Found BIOS image: size: {:#x}, type: {}, last: {}\n", full_image.image_size_bytes(), full_image.image_type_str(), @@ -234,14 +230,14 @@ pub(crate) fn new(pdev: &pci::Device, bar0: &Bar0) -> Result { (second_fwsec_image, first_fwsec_image, pci_at_image) { second - .setup_falcon_data(pdev, &pci_at, &first) - .inspect_err(|e| dev_err!(pdev.as_ref(), "Falcon data setup failed: {:?}\n", e))?; + .setup_falcon_data(dev, &pci_at, &first) + .inspect_err(|e| dev_err!(dev, "Falcon data setup failed: {:?}\n", e))?; Ok(Vbios { - fwsec_image: second.build(pdev)?, + fwsec_image: second.build(dev)?, }) } else { dev_err!( - pdev.as_ref(), + dev, "Missing required images for falcon data setup, skipping\n" ); Err(EINVAL) @@ -284,9 +280,9 @@ struct PcirStruct { } impl PcirStruct { - fn new(pdev: &pci::Device, data: &[u8]) -> Result { + fn new(dev: &device::Device, data: &[u8]) -> Result { if data.len() < core::mem::size_of::() { - dev_err!(pdev.as_ref(), "Not enough data for PcirStruct\n"); + dev_err!(dev, "Not enough data for PcirStruct\n"); return Err(EINVAL); } @@ -295,11 +291,7 @@ fn new(pdev: &pci::Device, data: &[u8]) -> Result { // Signature should be "PCIR" (0x52494350) or "NPDS" (0x5344504e). if &signature != b"PCIR" && &signature != b"NPDS" { - dev_err!( - pdev.as_ref(), - "Invalid signature for PcirStruct: {:?}\n", - signature - ); + dev_err!(dev, "Invalid signature for PcirStruct: {:?}\n", signature); return Err(EINVAL); } @@ -308,7 +300,7 @@ fn new(pdev: &pci::Device, data: &[u8]) -> Result { let image_len = u16::from_le_bytes([data[16], data[17]]); if image_len == 0 { - dev_err!(pdev.as_ref(), "Invalid image length: 0\n"); + dev_err!(dev, "Invalid image length: 0\n"); return Err(EINVAL); } @@ -467,7 +459,7 @@ struct PciRomHeader { } impl PciRomHeader { - fn new(pdev: &pci::Device, data: &[u8]) -> Result { + fn new(dev: &device::Device, data: &[u8]) -> Result { if data.len() < 26 { // Need at least 26 bytes to read pciDataStrucPtr and sizeOfBlock. return Err(EINVAL); @@ -479,7 +471,7 @@ fn new(pdev: &pci::Device, data: &[u8]) -> Result { match signature { 0xAA55 | 0xBB77 | 0x4E56 => {} _ => { - dev_err!(pdev.as_ref(), "ROM signature unknown {:#x}\n", signature); + dev_err!(dev, "ROM signature unknown {:#x}\n", signature); return Err(EINVAL); } } @@ -538,9 +530,9 @@ struct NpdeStruct { } impl NpdeStruct { - fn new(pdev: &pci::Device, data: &[u8]) -> Option { + fn new(dev: &device::Device, data: &[u8]) -> Option { if data.len() < core::mem::size_of::() { - dev_dbg!(pdev.as_ref(), "Not enough data for NpdeStruct\n"); + dev_dbg!(dev, "Not enough data for NpdeStruct\n"); return None; } @@ -549,17 +541,13 @@ fn new(pdev: &pci::Device, data: &[u8]) -> Option { // Signature should be "NPDE" (0x4544504E). if &signature != b"NPDE" { - dev_dbg!( - pdev.as_ref(), - "Invalid signature for NpdeStruct: {:?}\n", - signature - ); + dev_dbg!(dev, "Invalid signature for NpdeStruct: {:?}\n", signature); return None; } let subimage_len = u16::from_le_bytes([data[8], data[9]]); if subimage_len == 0 { - dev_dbg!(pdev.as_ref(), "Invalid subimage length: 0\n"); + dev_dbg!(dev, "Invalid subimage length: 0\n"); return None; } @@ -584,7 +572,7 @@ fn image_size_bytes(&self) -> usize { /// Try to find NPDE in the data, the NPDE is right after the PCIR. fn find_in_data( - pdev: &pci::Device, + dev: &device::Device, data: &[u8], rom_header: &PciRomHeader, pcir: &PcirStruct, @@ -596,12 +584,12 @@ fn find_in_data( // Check if we have enough data if npde_start + core::mem::size_of::() > data.len() { - dev_dbg!(pdev.as_ref(), "Not enough data for NPDE\n"); + dev_dbg!(dev, "Not enough data for NPDE\n"); return None; } // Try to create NPDE from the data - NpdeStruct::new(pdev, &data[npde_start..]) + NpdeStruct::new(dev, &data[npde_start..]) } } @@ -669,10 +657,10 @@ fn image_size_bytes(&self) -> usize { /// Create a [`BiosImageBase`] from a byte slice and convert it to a [`BiosImage`] which /// triggers the constructor of the specific BiosImage enum variant. - fn new(pdev: &pci::Device, data: &[u8]) -> Result { - let base = BiosImageBase::new(pdev, data)?; + fn new(dev: &device::Device, data: &[u8]) -> Result { + let base = BiosImageBase::new(dev, data)?; let image = base.into_image().inspect_err(|e| { - dev_err!(pdev.as_ref(), "Failed to create BiosImage: {:?}\n", e); + dev_err!(dev, "Failed to create BiosImage: {:?}\n", e); })?; Ok(image) @@ -773,16 +761,16 @@ fn into_image(self) -> Result { } /// Creates a new BiosImageBase from raw byte data. - fn new(pdev: &pci::Device, data: &[u8]) -> Result { + fn new(dev: &device::Device, data: &[u8]) -> Result { // Ensure we have enough data for the ROM header. if data.len() < 26 { - dev_err!(pdev.as_ref(), "Not enough data for ROM header\n"); + dev_err!(dev, "Not enough data for ROM header\n"); return Err(EINVAL); } // Parse the ROM header. - let rom_header = PciRomHeader::new(pdev, &data[0..26]) - .inspect_err(|e| dev_err!(pdev.as_ref(), "Failed to create PciRomHeader: {:?}\n", e))?; + let rom_header = PciRomHeader::new(dev, &data[0..26]) + .inspect_err(|e| dev_err!(dev, "Failed to create PciRomHeader: {:?}\n", e))?; // Get the PCI Data Structure using the pointer from the ROM header. let pcir_offset = rom_header.pci_data_struct_offset as usize; @@ -791,22 +779,22 @@ fn new(pdev: &pci::Device, data: &[u8]) -> Result { .ok_or(EINVAL) .inspect_err(|_| { dev_err!( - pdev.as_ref(), + dev, "PCIR offset {:#x} out of bounds (data length: {})\n", pcir_offset, data.len() ); dev_err!( - pdev.as_ref(), + dev, "Consider reading more data for construction of BiosImage\n" ); })?; - let pcir = PcirStruct::new(pdev, pcir_data) - .inspect_err(|e| dev_err!(pdev.as_ref(), "Failed to create PcirStruct: {:?}\n", e))?; + let pcir = PcirStruct::new(dev, pcir_data) + .inspect_err(|e| dev_err!(dev, "Failed to create PcirStruct: {:?}\n", e))?; // Look for NPDE structure if this is not an NBSI image (type != 0x70). - let npde = NpdeStruct::find_in_data(pdev, data, &rom_header, &pcir); + let npde = NpdeStruct::find_in_data(dev, data, &rom_header, &pcir); // Create a copy of the data. let mut data_copy = KVec::new(); @@ -848,7 +836,7 @@ fn get_bit_token(&self, token_id: u8) -> Result { /// /// This is just a 4 byte structure that contains a pointer to the Falcon data in the FWSEC /// image. - fn falcon_data_ptr(&self, pdev: &pci::Device) -> Result { + fn falcon_data_ptr(&self, dev: &device::Device) -> Result { let token = self.get_bit_token(BIT_TOKEN_ID_FALCON_DATA)?; // Make sure we don't go out of bounds @@ -859,14 +847,14 @@ fn falcon_data_ptr(&self, pdev: &pci::Device) -> Result { // read the 4 bytes at the offset specified in the token let offset = token.data_offset as usize; let bytes: [u8; 4] = self.base.data[offset..offset + 4].try_into().map_err(|_| { - dev_err!(pdev.as_ref(), "Failed to convert data slice to array"); + dev_err!(dev, "Failed to convert data slice to array"); EINVAL })?; let data_ptr = u32::from_le_bytes(bytes); if (data_ptr as usize) < self.base.data.len() { - dev_err!(pdev.as_ref(), "Falcon data pointer out of bounds\n"); + dev_err!(dev, "Falcon data pointer out of bounds\n"); return Err(EINVAL); } @@ -928,7 +916,7 @@ struct PmuLookupTable { } impl PmuLookupTable { - fn new(pdev: &pci::Device, data: &[u8]) -> Result { + fn new(dev: &device::Device, data: &[u8]) -> Result { if data.len() < 4 { return Err(EINVAL); } @@ -940,10 +928,7 @@ fn new(pdev: &pci::Device, data: &[u8]) -> Result { let required_bytes = header_len + (entry_count * entry_len); if data.len() < required_bytes { - dev_err!( - pdev.as_ref(), - "PmuLookupTable data length less than required\n" - ); + dev_err!(dev, "PmuLookupTable data length less than required\n"); return Err(EINVAL); } @@ -956,11 +941,7 @@ fn new(pdev: &pci::Device, data: &[u8]) -> Result { // Debug logging of entries (dumps the table data to dmesg) for i in (header_len..required_bytes).step_by(entry_len) { - dev_dbg!( - pdev.as_ref(), - "PMU entry: {:02x?}\n", - &data[i..][..entry_len] - ); + dev_dbg!(dev, "PMU entry: {:02x?}\n", &data[i..][..entry_len]); } Ok(PmuLookupTable { @@ -997,11 +978,11 @@ fn find_entry_by_type(&self, entry_type: u8) -> Result { impl FwSecBiosBuilder { fn setup_falcon_data( &mut self, - pdev: &pci::Device, + dev: &device::Device, pci_at_image: &PciAtBiosImage, first_fwsec: &FwSecBiosBuilder, ) -> Result { - let mut offset = pci_at_image.falcon_data_ptr(pdev)? as usize; + let mut offset = pci_at_image.falcon_data_ptr(dev)? as usize; let mut pmu_in_first_fwsec = false; // The falcon data pointer assumes that the PciAt and FWSEC images @@ -1025,9 +1006,9 @@ fn setup_falcon_data( if pmu_in_first_fwsec { self.pmu_lookup_table = - Some(PmuLookupTable::new(pdev, &first_fwsec.base.data[offset..])?); + Some(PmuLookupTable::new(dev, &first_fwsec.base.data[offset..])?); } else { - self.pmu_lookup_table = Some(PmuLookupTable::new(pdev, &self.base.data[offset..])?); + self.pmu_lookup_table = Some(PmuLookupTable::new(dev, &self.base.data[offset..])?); } match self @@ -1040,18 +1021,14 @@ fn setup_falcon_data( let mut ucode_offset = entry.data as usize; ucode_offset -= pci_at_image.base.data.len(); if ucode_offset < first_fwsec.base.data.len() { - dev_err!(pdev.as_ref(), "Falcon Ucode offset not in second Fwsec.\n"); + dev_err!(dev, "Falcon Ucode offset not in second Fwsec.\n"); return Err(EINVAL); } ucode_offset -= first_fwsec.base.data.len(); self.falcon_ucode_offset = Some(ucode_offset); } Err(e) => { - dev_err!( - pdev.as_ref(), - "PmuLookupTableEntry not found, error: {:?}\n", - e - ); + dev_err!(dev, "PmuLookupTableEntry not found, error: {:?}\n", e); return Err(EINVAL); } } @@ -1059,7 +1036,7 @@ fn setup_falcon_data( } /// Build the final FwSecBiosImage from this builder - fn build(self, pdev: &pci::Device) -> Result { + fn build(self, dev: &device::Device) -> Result { let ret = FwSecBiosImage { base: self.base, falcon_ucode_offset: self.falcon_ucode_offset.ok_or(EINVAL)?, @@ -1067,8 +1044,8 @@ fn build(self, pdev: &pci::Device) -> Result { if cfg!(debug_assertions) { // Print the desc header for debugging - let desc = ret.header(pdev.as_ref())?; - dev_dbg!(pdev.as_ref(), "PmuLookupTableEntry desc: {:#?}\n", desc); + let desc = ret.header(dev)?; + dev_dbg!(dev, "PmuLookupTableEntry desc: {:#?}\n", desc); } Ok(ret) From 93296e9d9528f0d87f2cf3fee494599060a0f14a Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Fri, 8 Aug 2025 11:46:42 +0900 Subject: [PATCH 31/64] gpu: nova-core: vbios: store reference to Device where relevant Now that the vbios code uses a non-bound `Device` instance, store an `ARef` to it at construction time so we can use it for logging without having to carry an extra argument on every method for that sole purpose. Reviewed-by: Joel Fernandes Acked-by: Danilo Krummrich Link: https://lore.kernel.org/r/20250808-vbios_device-v1-2-834bbbab6471@nvidia.com Signed-off-by: Alexandre Courbot --- drivers/gpu/nova-core/firmware/fwsec.rs | 8 +-- drivers/gpu/nova-core/vbios.rs | 69 +++++++++++++++---------- 2 files changed, 46 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/nova-core/firmware/fwsec.rs b/drivers/gpu/nova-core/firmware/fwsec.rs index 47f5e4524072..8edbb5c0572c 100644 --- a/drivers/gpu/nova-core/firmware/fwsec.rs +++ b/drivers/gpu/nova-core/firmware/fwsec.rs @@ -246,8 +246,8 @@ impl FalconFirmware for FwsecFirmware { impl FirmwareDmaObject { fn new_fwsec(dev: &Device, bios: &Vbios, cmd: FwsecCommand) -> Result { - let desc = bios.fwsec_image().header(dev)?; - let ucode = bios.fwsec_image().ucode(dev, desc)?; + let desc = bios.fwsec_image().header()?; + let ucode = bios.fwsec_image().ucode(desc)?; let mut dma_object = DmaObject::from_data(dev, ucode)?; let hdr_offset = (desc.imem_load_size + desc.interface_offset) as usize; @@ -336,7 +336,7 @@ pub(crate) fn new( let ucode_dma = FirmwareDmaObject::::new_fwsec(dev, bios, cmd)?; // Patch signature if needed. - let desc = bios.fwsec_image().header(dev)?; + let desc = bios.fwsec_image().header()?; let ucode_signed = if desc.signature_count != 0 { let sig_base_img = (desc.imem_load_size + desc.pkc_data_offset) as usize; let desc_sig_versions = u32::from(desc.signature_versions); @@ -375,7 +375,7 @@ pub(crate) fn new( dev_dbg!(dev, "patching signature with index {}\n", signature_idx); let signature = bios .fwsec_image() - .sigs(dev, desc) + .sigs(desc) .and_then(|sigs| sigs.get(signature_idx).ok_or(EINVAL))?; ucode_dma.patch_signature(signature, sig_base_img)? diff --git a/drivers/gpu/nova-core/vbios.rs b/drivers/gpu/nova-core/vbios.rs index ce32ab4cd5b2..e6a060714205 100644 --- a/drivers/gpu/nova-core/vbios.rs +++ b/drivers/gpu/nova-core/vbios.rs @@ -9,6 +9,7 @@ use kernel::device; use kernel::error::Result; use kernel::prelude::*; +use kernel::types::ARef; /// The offset of the VBIOS ROM in the BAR0 space. const ROM_OFFSET: usize = 0x300000; @@ -230,10 +231,10 @@ pub(crate) fn new(dev: &device::Device, bar0: &Bar0) -> Result { (second_fwsec_image, first_fwsec_image, pci_at_image) { second - .setup_falcon_data(dev, &pci_at, &first) + .setup_falcon_data(&pci_at, &first) .inspect_err(|e| dev_err!(dev, "Falcon data setup failed: {:?}\n", e))?; Ok(Vbios { - fwsec_image: second.build(dev)?, + fwsec_image: second.build()?, }) } else { dev_err!( @@ -742,9 +743,10 @@ fn try_from(base: BiosImageBase) -> Result { /// /// Each BiosImage type has a BiosImageBase type along with other image-specific fields. Note that /// Rust favors composition of types over inheritance. -#[derive(Debug)] #[expect(dead_code)] struct BiosImageBase { + /// Used for logging. + dev: ARef, /// PCI ROM Expansion Header rom_header: PciRomHeader, /// PCI Data Structure @@ -801,6 +803,7 @@ fn new(dev: &device::Device, data: &[u8]) -> Result { data_copy.extend_from_slice(data, GFP_KERNEL)?; Ok(BiosImageBase { + dev: dev.into(), rom_header, pcir, npde, @@ -836,7 +839,7 @@ fn get_bit_token(&self, token_id: u8) -> Result { /// /// This is just a 4 byte structure that contains a pointer to the Falcon data in the FWSEC /// image. - fn falcon_data_ptr(&self, dev: &device::Device) -> Result { + fn falcon_data_ptr(&self) -> Result { let token = self.get_bit_token(BIT_TOKEN_ID_FALCON_DATA)?; // Make sure we don't go out of bounds @@ -847,14 +850,14 @@ fn falcon_data_ptr(&self, dev: &device::Device) -> Result { // read the 4 bytes at the offset specified in the token let offset = token.data_offset as usize; let bytes: [u8; 4] = self.base.data[offset..offset + 4].try_into().map_err(|_| { - dev_err!(dev, "Failed to convert data slice to array"); + dev_err!(self.base.dev, "Failed to convert data slice to array"); EINVAL })?; let data_ptr = u32::from_le_bytes(bytes); if (data_ptr as usize) < self.base.data.len() { - dev_err!(dev, "Falcon data pointer out of bounds\n"); + dev_err!(self.base.dev, "Falcon data pointer out of bounds\n"); return Err(EINVAL); } @@ -978,11 +981,10 @@ fn find_entry_by_type(&self, entry_type: u8) -> Result { impl FwSecBiosBuilder { fn setup_falcon_data( &mut self, - dev: &device::Device, pci_at_image: &PciAtBiosImage, first_fwsec: &FwSecBiosBuilder, ) -> Result { - let mut offset = pci_at_image.falcon_data_ptr(dev)? as usize; + let mut offset = pci_at_image.falcon_data_ptr()? as usize; let mut pmu_in_first_fwsec = false; // The falcon data pointer assumes that the PciAt and FWSEC images @@ -1005,10 +1007,15 @@ fn setup_falcon_data( self.falcon_data_offset = Some(offset); if pmu_in_first_fwsec { - self.pmu_lookup_table = - Some(PmuLookupTable::new(dev, &first_fwsec.base.data[offset..])?); + self.pmu_lookup_table = Some(PmuLookupTable::new( + &self.base.dev, + &first_fwsec.base.data[offset..], + )?); } else { - self.pmu_lookup_table = Some(PmuLookupTable::new(dev, &self.base.data[offset..])?); + self.pmu_lookup_table = Some(PmuLookupTable::new( + &self.base.dev, + &self.base.data[offset..], + )?); } match self @@ -1021,14 +1028,18 @@ fn setup_falcon_data( let mut ucode_offset = entry.data as usize; ucode_offset -= pci_at_image.base.data.len(); if ucode_offset < first_fwsec.base.data.len() { - dev_err!(dev, "Falcon Ucode offset not in second Fwsec.\n"); + dev_err!(self.base.dev, "Falcon Ucode offset not in second Fwsec.\n"); return Err(EINVAL); } ucode_offset -= first_fwsec.base.data.len(); self.falcon_ucode_offset = Some(ucode_offset); } Err(e) => { - dev_err!(dev, "PmuLookupTableEntry not found, error: {:?}\n", e); + dev_err!( + self.base.dev, + "PmuLookupTableEntry not found, error: {:?}\n", + e + ); return Err(EINVAL); } } @@ -1036,7 +1047,7 @@ fn setup_falcon_data( } /// Build the final FwSecBiosImage from this builder - fn build(self, dev: &device::Device) -> Result { + fn build(self) -> Result { let ret = FwSecBiosImage { base: self.base, falcon_ucode_offset: self.falcon_ucode_offset.ok_or(EINVAL)?, @@ -1044,8 +1055,8 @@ fn build(self, dev: &device::Device) -> Result { if cfg!(debug_assertions) { // Print the desc header for debugging - let desc = ret.header(dev)?; - dev_dbg!(dev, "PmuLookupTableEntry desc: {:#?}\n", desc); + let desc = ret.header()?; + dev_dbg!(ret.base.dev, "PmuLookupTableEntry desc: {:#?}\n", desc); } Ok(ret) @@ -1054,13 +1065,16 @@ fn build(self, dev: &device::Device) -> Result { impl FwSecBiosImage { /// Get the FwSec header ([`FalconUCodeDescV3`]). - pub(crate) fn header(&self, dev: &device::Device) -> Result<&FalconUCodeDescV3> { + pub(crate) fn header(&self) -> Result<&FalconUCodeDescV3> { // Get the falcon ucode offset that was found in setup_falcon_data. let falcon_ucode_offset = self.falcon_ucode_offset; // Make sure the offset is within the data bounds. if falcon_ucode_offset + core::mem::size_of::() > self.base.data.len() { - dev_err!(dev, "fwsec-frts header not contained within BIOS bounds\n"); + dev_err!( + self.base.dev, + "fwsec-frts header not contained within BIOS bounds\n" + ); return Err(ERANGE); } @@ -1072,7 +1086,7 @@ pub(crate) fn header(&self, dev: &device::Device) -> Result<&FalconUCodeDescV3> let ver = (hdr & 0xff00) >> 8; if ver != 3 { - dev_err!(dev, "invalid fwsec firmware version: {:?}\n", ver); + dev_err!(self.base.dev, "invalid fwsec firmware version: {:?}\n", ver); return Err(EINVAL); } @@ -1092,7 +1106,7 @@ pub(crate) fn header(&self, dev: &device::Device) -> Result<&FalconUCodeDescV3> } /// Get the ucode data as a byte slice - pub(crate) fn ucode(&self, dev: &device::Device, desc: &FalconUCodeDescV3) -> Result<&[u8]> { + pub(crate) fn ucode(&self, desc: &FalconUCodeDescV3) -> Result<&[u8]> { let falcon_ucode_offset = self.falcon_ucode_offset; // The ucode data follows the descriptor. @@ -1104,15 +1118,16 @@ pub(crate) fn ucode(&self, dev: &device::Device, desc: &FalconUCodeDescV3) -> Re .data .get(ucode_data_offset..ucode_data_offset + size) .ok_or(ERANGE) - .inspect_err(|_| dev_err!(dev, "fwsec ucode data not contained within BIOS bounds\n")) + .inspect_err(|_| { + dev_err!( + self.base.dev, + "fwsec ucode data not contained within BIOS bounds\n" + ) + }) } /// Get the signatures as a byte slice - pub(crate) fn sigs( - &self, - dev: &device::Device, - desc: &FalconUCodeDescV3, - ) -> Result<&[Bcrt30Rsa3kSignature]> { + pub(crate) fn sigs(&self, desc: &FalconUCodeDescV3) -> Result<&[Bcrt30Rsa3kSignature]> { // The signatures data follows the descriptor. let sigs_data_offset = self.falcon_ucode_offset + core::mem::size_of::(); let sigs_size = @@ -1121,7 +1136,7 @@ pub(crate) fn sigs( // Make sure the data is within bounds. if sigs_data_offset + sigs_size > self.base.data.len() { dev_err!( - dev, + self.base.dev, "fwsec signatures data not contained within BIOS bounds\n" ); return Err(ERANGE); From 467971a908761540cc60b4da22639c440dc05462 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Wed, 20 Aug 2025 16:53:37 +0200 Subject: [PATCH 32/64] rust: page: implement BorrowedPage Currently, a Page always owns the underlying struct page. However, sometimes a struct page may be owned by some other entity, e.g. a vmalloc allocation. Hence, introduce BorrowedPage to support such cases, until the Ownable solution [1] lands. This is required by the scatterlist abstractions. Acked-by: Alice Ryhl Reviewed-by: Alexandre Courbot Tested-by: Alexandre Courbot Reviewed-by: Daniel Almeida Link: https://lore.kernel.org/rust-for-linux/ZnCzLIly3DRK2eab@boqun-archlinux/ [1] Link: https://lore.kernel.org/r/20250820145434.94745-2-dakr@kernel.org Signed-off-by: Danilo Krummrich --- rust/bindings/bindings_helper.h | 1 + rust/kernel/page.rs | 75 ++++++++++++++++++++++++++++++++- 2 files changed, 75 insertions(+), 1 deletion(-) diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h index 84d60635e8a9..0e140e07758b 100644 --- a/rust/bindings/bindings_helper.h +++ b/rust/bindings/bindings_helper.h @@ -57,6 +57,7 @@ #include #include #include +#include #include #include #include diff --git a/rust/kernel/page.rs b/rust/kernel/page.rs index 7c1b17246ed5..631718a6ad7d 100644 --- a/rust/kernel/page.rs +++ b/rust/kernel/page.rs @@ -9,7 +9,12 @@ error::Result, uaccess::UserSliceReader, }; -use core::ptr::{self, NonNull}; +use core::{ + marker::PhantomData, + mem::ManuallyDrop, + ops::Deref, + ptr::{self, NonNull}, +}; /// A bitwise shift for the page size. pub const PAGE_SHIFT: usize = bindings::PAGE_SHIFT as usize; @@ -30,6 +35,74 @@ pub const fn page_align(addr: usize) -> usize { (addr + (PAGE_SIZE - 1)) & PAGE_MASK } +/// Representation of a non-owning reference to a [`Page`]. +/// +/// This type provides a borrowed version of a [`Page`] that is owned by some other entity, e.g. a +/// [`Vmalloc`] allocation such as [`VBox`]. +/// +/// # Example +/// +/// ``` +/// # use kernel::{bindings, prelude::*}; +/// use kernel::page::{BorrowedPage, Page, PAGE_SIZE}; +/// # use core::{mem::MaybeUninit, ptr, ptr::NonNull }; +/// +/// fn borrow_page<'a>(vbox: &'a mut VBox>) -> BorrowedPage<'a> { +/// let ptr = ptr::from_ref(&**vbox); +/// +/// // SAFETY: `ptr` is a valid pointer to `Vmalloc` memory. +/// let page = unsafe { bindings::vmalloc_to_page(ptr.cast()) }; +/// +/// // SAFETY: `vmalloc_to_page` returns a valid pointer to a `struct page` for a valid +/// // pointer to `Vmalloc` memory. +/// let page = unsafe { NonNull::new_unchecked(page) }; +/// +/// // SAFETY: +/// // - `self.0` is a valid pointer to a `struct page`. +/// // - `self.0` is valid for the entire lifetime of `self`. +/// unsafe { BorrowedPage::from_raw(page) } +/// } +/// +/// let mut vbox = VBox::<[u8; PAGE_SIZE]>::new_uninit(GFP_KERNEL)?; +/// let page = borrow_page(&mut vbox); +/// +/// // SAFETY: There is no concurrent read or write to this page. +/// unsafe { page.fill_zero_raw(0, PAGE_SIZE)? }; +/// # Ok::<(), Error>(()) +/// ``` +/// +/// # Invariants +/// +/// The borrowed underlying pointer to a `struct page` is valid for the entire lifetime `'a`. +/// +/// [`VBox`]: kernel::alloc::VBox +/// [`Vmalloc`]: kernel::alloc::allocator::Vmalloc +pub struct BorrowedPage<'a>(ManuallyDrop, PhantomData<&'a Page>); + +impl<'a> BorrowedPage<'a> { + /// Constructs a [`BorrowedPage`] from a raw pointer to a `struct page`. + /// + /// # Safety + /// + /// - `ptr` must point to a valid `bindings::page`. + /// - `ptr` must remain valid for the entire lifetime `'a`. + pub unsafe fn from_raw(ptr: NonNull) -> Self { + let page = Page { page: ptr }; + + // INVARIANT: The safety requirements guarantee that `ptr` is valid for the entire lifetime + // `'a`. + Self(ManuallyDrop::new(page), PhantomData) + } +} + +impl<'a> Deref for BorrowedPage<'a> { + type Target = Page; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + /// A pointer to a page that owns the page allocation. /// /// # Invariants From 8e92c9902ff11a1c2aab229a3d7d4c1d7e5b698f Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Wed, 20 Aug 2025 16:53:38 +0200 Subject: [PATCH 33/64] rust: alloc: vmalloc: implement Vmalloc::to_page() Implement an abstraction of vmalloc_to_page() for subsequent use in the AsPageIter implementation of VBox and VVec. Reviewed-by: Alexandre Courbot Tested-by: Alexandre Courbot Reviewed-by: Alice Ryhl Reviewed-by: Daniel Almeida Link: https://lore.kernel.org/r/20250820145434.94745-3-dakr@kernel.org Signed-off-by: Danilo Krummrich --- rust/kernel/alloc/allocator.rs | 49 ++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/rust/kernel/alloc/allocator.rs b/rust/kernel/alloc/allocator.rs index aa2dfa9dca4c..2315f5063011 100644 --- a/rust/kernel/alloc/allocator.rs +++ b/rust/kernel/alloc/allocator.rs @@ -15,6 +15,7 @@ use crate::alloc::{AllocError, Allocator}; use crate::bindings; +use crate::page; use crate::pr_warn; /// The contiguous kernel allocator. @@ -140,6 +141,54 @@ unsafe fn realloc( } } +impl Vmalloc { + /// Convert a pointer to a [`Vmalloc`] allocation to a [`page::BorrowedPage`]. + /// + /// # Examples + /// + /// ``` + /// # use core::ptr::{NonNull, from_mut}; + /// # use kernel::{page, prelude::*}; + /// use kernel::alloc::allocator::Vmalloc; + /// + /// let mut vbox = VBox::<[u8; page::PAGE_SIZE]>::new_uninit(GFP_KERNEL)?; + /// + /// { + /// // SAFETY: By the type invariant of `Box` the inner pointer of `vbox` is non-null. + /// let ptr = unsafe { NonNull::new_unchecked(from_mut(&mut *vbox)) }; + /// + /// // SAFETY: + /// // `ptr` is a valid pointer to a `Vmalloc` allocation. + /// // `ptr` is valid for the entire lifetime of `page`. + /// let page = unsafe { Vmalloc::to_page(ptr.cast()) }; + /// + /// // SAFETY: There is no concurrent read or write to the same page. + /// unsafe { page.fill_zero_raw(0, page::PAGE_SIZE)? }; + /// } + /// # Ok::<(), Error>(()) + /// ``` + /// + /// # Safety + /// + /// - `ptr` must be a valid pointer to a [`Vmalloc`] allocation. + /// - `ptr` must remain valid for the entire duration of `'a`. + pub unsafe fn to_page<'a>(ptr: NonNull) -> page::BorrowedPage<'a> { + // SAFETY: `ptr` is a valid pointer to `Vmalloc` memory. + let page = unsafe { bindings::vmalloc_to_page(ptr.as_ptr().cast()) }; + + // SAFETY: `vmalloc_to_page` returns a valid pointer to a `struct page` for a valid pointer + // to `Vmalloc` memory. + let page = unsafe { NonNull::new_unchecked(page) }; + + // SAFETY: + // - `page` is a valid pointer to a `struct page`, given that by the safety requirements of + // this function `ptr` is a valid pointer to a `Vmalloc` allocation. + // - By the safety requirements of this function `ptr` is valid for the entire lifetime of + // `'a`. + unsafe { page::BorrowedPage::from_raw(page) } + } +} + // SAFETY: `realloc` delegates to `ReallocFunc::call`, which guarantees that // - memory remains valid until it is explicitly freed, // - passing a pointer to a valid memory allocation is OK, From 7937dca770393d34d1ad217580c5446d2e45417f Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Wed, 20 Aug 2025 16:53:39 +0200 Subject: [PATCH 34/64] rust: alloc: implement VmallocPageIter Introduce the VmallocPageIter type; an instance of VmallocPageIter may be exposed by owners of vmalloc allocations to provide borrowed access to its backing pages. For instance, this is useful to access and borrow the backing pages of allocation primitives, such as Box and Vec, backing a scatterlist. Reviewed-by: Daniel Almeida Reviewed-by: Alexandre Courbot Tested-by: Alexandre Courbot Reviewed-by: Alice Ryhl Suggested-by: Alice Ryhl Link: https://lore.kernel.org/r/20250820145434.94745-4-dakr@kernel.org [ Drop VmallocPageIter::base_address(), move to allocator/iter.rs and stub VmallocPageIter for allocator_test.rs. - Danilo ] Signed-off-by: Danilo Krummrich --- rust/kernel/alloc/allocator.rs | 3 + rust/kernel/alloc/allocator/iter.rs | 102 ++++++++++++++++++++++++++++ rust/kernel/alloc/allocator_test.rs | 29 ++++++++ 3 files changed, 134 insertions(+) create mode 100644 rust/kernel/alloc/allocator/iter.rs diff --git a/rust/kernel/alloc/allocator.rs b/rust/kernel/alloc/allocator.rs index 2315f5063011..90e03ad15760 100644 --- a/rust/kernel/alloc/allocator.rs +++ b/rust/kernel/alloc/allocator.rs @@ -18,6 +18,9 @@ use crate::page; use crate::pr_warn; +mod iter; +pub use self::iter::VmallocPageIter; + /// The contiguous kernel allocator. /// /// `Kmalloc` is typically used for physically contiguous allocations up to page size, but also diff --git a/rust/kernel/alloc/allocator/iter.rs b/rust/kernel/alloc/allocator/iter.rs new file mode 100644 index 000000000000..5759f86029b7 --- /dev/null +++ b/rust/kernel/alloc/allocator/iter.rs @@ -0,0 +1,102 @@ +// SPDX-License-Identifier: GPL-2.0 + +use super::Vmalloc; +use crate::page; +use core::marker::PhantomData; +use core::ptr::NonNull; + +/// An [`Iterator`] of [`page::BorrowedPage`] items owned by a [`Vmalloc`] allocation. +/// +/// # Guarantees +/// +/// The pages iterated by the [`Iterator`] appear in the order as they are mapped in the CPU's +/// virtual address space ascendingly. +/// +/// # Invariants +/// +/// - `buf` is a valid and [`page::PAGE_SIZE`] aligned pointer into a [`Vmalloc`] allocation. +/// - `size` is the number of bytes from `buf` until the end of the [`Vmalloc`] allocation `buf` +/// points to. +pub struct VmallocPageIter<'a> { + /// The base address of the [`Vmalloc`] buffer. + buf: NonNull, + /// The size of the buffer pointed to by `buf` in bytes. + size: usize, + /// The current page index of the [`Iterator`]. + index: usize, + _p: PhantomData>, +} + +impl<'a> Iterator for VmallocPageIter<'a> { + type Item = page::BorrowedPage<'a>; + + fn next(&mut self) -> Option { + let offset = self.index.checked_mul(page::PAGE_SIZE)?; + + // Even though `self.size()` may be smaller than `Self::page_count() * page::PAGE_SIZE`, it + // is always a number between `(Self::page_count() - 1) * page::PAGE_SIZE` and + // `Self::page_count() * page::PAGE_SIZE`, hence the check below is sufficient. + if offset < self.size() { + self.index += 1; + } else { + return None; + } + + // TODO: Use `NonNull::add()` instead, once the minimum supported compiler version is + // bumped to 1.80 or later. + // + // SAFETY: `offset` is in the interval `[0, (self.page_count() - 1) * page::PAGE_SIZE]`, + // hence the resulting pointer is guaranteed to be within the same allocation. + let ptr = unsafe { self.buf.as_ptr().add(offset) }; + + // SAFETY: `ptr` is guaranteed to be non-null given that it is derived from `self.buf`. + let ptr = unsafe { NonNull::new_unchecked(ptr) }; + + // SAFETY: + // - `ptr` is a valid pointer to a `Vmalloc` allocation. + // - `ptr` is valid for the duration of `'a`. + Some(unsafe { Vmalloc::to_page(ptr) }) + } + + fn size_hint(&self) -> (usize, Option) { + let remaining = self.page_count().saturating_sub(self.index); + + (remaining, Some(remaining)) + } +} + +impl<'a> VmallocPageIter<'a> { + /// Creates a new [`VmallocPageIter`] instance. + /// + /// # Safety + /// + /// - `buf` must be a [`page::PAGE_SIZE`] aligned pointer into a [`Vmalloc`] allocation. + /// - `buf` must be valid for at least the lifetime of `'a`. + /// - `size` must be the number of bytes from `buf` until the end of the [`Vmalloc`] allocation + /// `buf` points to. + pub unsafe fn new(buf: NonNull, size: usize) -> Self { + // INVARIANT: By the safety requirements, `buf` is a valid and `page::PAGE_SIZE` aligned + // pointer into a [`Vmalloc`] allocation. + Self { + buf, + size, + index: 0, + _p: PhantomData, + } + } + + /// Returns the size of the backing [`Vmalloc`] allocation in bytes. + /// + /// Note that this is the size the [`Vmalloc`] allocation has been allocated with. Hence, this + /// number may be smaller than `[`Self::page_count`] * [`page::PAGE_SIZE`]`. + #[inline] + pub fn size(&self) -> usize { + self.size + } + + /// Returns the number of pages owned by the backing [`Vmalloc`] allocation. + #[inline] + pub fn page_count(&self) -> usize { + self.size().div_ceil(page::PAGE_SIZE) + } +} diff --git a/rust/kernel/alloc/allocator_test.rs b/rust/kernel/alloc/allocator_test.rs index a3074480bd8d..f46b4b671389 100644 --- a/rust/kernel/alloc/allocator_test.rs +++ b/rust/kernel/alloc/allocator_test.rs @@ -12,8 +12,10 @@ use super::{flags::*, AllocError, Allocator, Flags}; use core::alloc::Layout; use core::cmp; +use core::marker::PhantomData; use core::ptr; use core::ptr::NonNull; +use kernel::page; /// The userspace allocator based on libc. pub struct Cmalloc; @@ -22,6 +24,33 @@ pub type Vmalloc = Kmalloc; pub type KVmalloc = Kmalloc; +pub struct VmallocPageIter<'a> { + _p: PhantomData>, +} + +impl<'a> Iterator for VmallocPageIter<'a> { + type Item = page::BorrowedPage<'a>; + + fn next(&mut self) -> Option { + None + } +} + +impl<'a> VmallocPageIter<'a> { + #[allow(clippy::missing_safety_doc)] + pub unsafe fn new(_buf: NonNull, _size: usize) -> Self { + Self { _p: PhantomData } + } + + pub fn size(&self) -> usize { + 0 + } + + pub fn page_count(&self) -> usize { + 0 + } +} + extern "C" { #[link_name = "aligned_alloc"] fn libc_aligned_alloc(align: usize, size: usize) -> *mut crate::ffi::c_void; From 866ec3bab19c33a66ad384bc69dff1cd83d7ed12 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Wed, 20 Aug 2025 16:53:40 +0200 Subject: [PATCH 35/64] rust: page: define trait AsPageIter The AsPageIter trait provides a common interface for types that provide a page iterator, such as VmallocPageIter. Subsequent patches will leverage this to let VBox and VVec provide a VmallocPageIter though this trait. Reviewed-by: Alice Ryhl Reviewed-by: Alexandre Courbot Tested-by: Alexandre Courbot Reviewed-by: Daniel Almeida Link: https://lore.kernel.org/r/20250820145434.94745-5-dakr@kernel.org Signed-off-by: Danilo Krummrich --- rust/kernel/page.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/rust/kernel/page.rs b/rust/kernel/page.rs index 631718a6ad7d..75ef096075cb 100644 --- a/rust/kernel/page.rs +++ b/rust/kernel/page.rs @@ -103,6 +103,18 @@ fn deref(&self) -> &Self::Target { } } +/// Trait to be implemented by types which provide an [`Iterator`] implementation of +/// [`BorrowedPage`] items, such as [`VmallocPageIter`](kernel::alloc::allocator::VmallocPageIter). +pub trait AsPageIter { + /// The [`Iterator`] type, e.g. [`VmallocPageIter`](kernel::alloc::allocator::VmallocPageIter). + type Iter<'a>: Iterator> + where + Self: 'a; + + /// Returns an [`Iterator`] of [`BorrowedPage`] items over all pages owned by `self`. + fn page_iter(&mut self) -> Self::Iter<'_>; +} + /// A pointer to a page that owns the page allocation. /// /// # Invariants From 671618432f46c553d9b82ad701ac054e16c8d55a Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Wed, 20 Aug 2025 16:53:41 +0200 Subject: [PATCH 36/64] rust: alloc: kbox: implement AsPageIter for VBox Implement AsPageIter for VBox; this allows to iterate and borrow the backing pages of a VBox. This, for instance, is useful in combination with VBox backing a scatterlist. Reviewed-by: Alice Ryhl Reviewed-by: Alexandre Courbot Tested-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250820145434.94745-6-dakr@kernel.org Signed-off-by: Danilo Krummrich --- rust/kernel/alloc/kbox.rs | 40 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/rust/kernel/alloc/kbox.rs b/rust/kernel/alloc/kbox.rs index 856d05aa60f1..b69ff4a1d748 100644 --- a/rust/kernel/alloc/kbox.rs +++ b/rust/kernel/alloc/kbox.rs @@ -3,7 +3,7 @@ //! Implementation of [`Box`]. #[allow(unused_imports)] // Used in doc comments. -use super::allocator::{KVmalloc, Kmalloc, Vmalloc}; +use super::allocator::{KVmalloc, Kmalloc, Vmalloc, VmallocPageIter}; use super::{AllocError, Allocator, Flags}; use core::alloc::Layout; use core::borrow::{Borrow, BorrowMut}; @@ -18,6 +18,7 @@ use crate::ffi::c_void; use crate::init::InPlaceInit; +use crate::page::AsPageIter; use crate::types::ForeignOwnable; use pin_init::{InPlaceWrite, Init, PinInit, ZeroableOption}; @@ -598,3 +599,40 @@ fn drop(&mut self) { unsafe { A::free(self.0.cast(), layout) }; } } + +/// # Examples +/// +/// ``` +/// # use kernel::prelude::*; +/// use kernel::alloc::allocator::VmallocPageIter; +/// use kernel::page::{AsPageIter, PAGE_SIZE}; +/// +/// let mut vbox = VBox::new((), GFP_KERNEL)?; +/// +/// assert!(vbox.page_iter().next().is_none()); +/// +/// let mut vbox = VBox::<[u8; PAGE_SIZE]>::new_uninit(GFP_KERNEL)?; +/// +/// let page = vbox.page_iter().next().expect("At least one page should be available.\n"); +/// +/// // SAFETY: There is no concurrent read or write to the same page. +/// unsafe { page.fill_zero_raw(0, PAGE_SIZE)? }; +/// # Ok::<(), Error>(()) +/// ``` +impl AsPageIter for VBox { + type Iter<'a> + = VmallocPageIter<'a> + where + T: 'a; + + fn page_iter(&mut self) -> Self::Iter<'_> { + let ptr = self.0.cast(); + let size = core::mem::size_of::(); + + // SAFETY: + // - `ptr` is a valid pointer to the beginning of a `Vmalloc` allocation. + // - `ptr` is guaranteed to be valid for the lifetime of `'a`. + // - `size` is the size of the `Vmalloc` allocation `ptr` points to. + unsafe { VmallocPageIter::new(ptr, size) } + } +} From 9acb4e630c3f3fc070b9962683fdde5ba1c4c70c Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Wed, 20 Aug 2025 16:53:42 +0200 Subject: [PATCH 37/64] rust: alloc: layout: implement ArrayLayout::size() Provide a convenience method for ArrayLayout to calculate the size of the ArrayLayout in bytes. Reviewed-by: Alexandre Courbot Tested-by: Alexandre Courbot Reviewed-by: Alice Ryhl Reviewed-by: Abdiel Janulgue Link: https://lore.kernel.org/r/20250820145434.94745-7-dakr@kernel.org Signed-off-by: Danilo Krummrich --- rust/kernel/alloc/layout.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/rust/kernel/alloc/layout.rs b/rust/kernel/alloc/layout.rs index 93ed514f7cc7..666accb7859c 100644 --- a/rust/kernel/alloc/layout.rs +++ b/rust/kernel/alloc/layout.rs @@ -98,6 +98,11 @@ pub const fn len(&self) -> usize { pub const fn is_empty(&self) -> bool { self.len == 0 } + + /// Returns the size of the [`ArrayLayout`] in bytes. + pub const fn size(&self) -> usize { + self.len() * core::mem::size_of::() + } } impl From> for Layout { From 779db37373a38f23e2534c7f4ffe8188f5237988 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Wed, 20 Aug 2025 16:53:43 +0200 Subject: [PATCH 38/64] rust: alloc: kvec: implement AsPageIter for VVec Implement AsPageIter for VVec; this allows to iterate and borrow the backing pages of a VVec. This, for instance, is useful in combination with VVec backing a scatterlist. Reviewed-by: Alice Ryhl Reviewed-by: Alexandre Courbot Tested-by: Alexandre Courbot Reviewed-by: Daniel Almeida Link: https://lore.kernel.org/r/20250820145434.94745-8-dakr@kernel.org Signed-off-by: Danilo Krummrich --- rust/kernel/alloc/kvec.rs | 40 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/rust/kernel/alloc/kvec.rs b/rust/kernel/alloc/kvec.rs index 3c72e0bdddb8..ac438e70a1ed 100644 --- a/rust/kernel/alloc/kvec.rs +++ b/rust/kernel/alloc/kvec.rs @@ -3,10 +3,11 @@ //! Implementation of [`Vec`]. use super::{ - allocator::{KVmalloc, Kmalloc, Vmalloc}, + allocator::{KVmalloc, Kmalloc, Vmalloc, VmallocPageIter}, layout::ArrayLayout, AllocError, Allocator, Box, Flags, }; +use crate::page::AsPageIter; use core::{ borrow::{Borrow, BorrowMut}, fmt, @@ -1017,6 +1018,43 @@ fn into_iter(self) -> Self::IntoIter { } } +/// # Examples +/// +/// ``` +/// # use kernel::prelude::*; +/// use kernel::alloc::allocator::VmallocPageIter; +/// use kernel::page::{AsPageIter, PAGE_SIZE}; +/// +/// let mut vec = VVec::::new(); +/// +/// assert!(vec.page_iter().next().is_none()); +/// +/// vec.reserve(PAGE_SIZE, GFP_KERNEL)?; +/// +/// let page = vec.page_iter().next().expect("At least one page should be available.\n"); +/// +/// // SAFETY: There is no concurrent read or write to the same page. +/// unsafe { page.fill_zero_raw(0, PAGE_SIZE)? }; +/// # Ok::<(), Error>(()) +/// ``` +impl AsPageIter for VVec { + type Iter<'a> + = VmallocPageIter<'a> + where + T: 'a; + + fn page_iter(&mut self) -> Self::Iter<'_> { + let ptr = self.ptr.cast(); + let size = self.layout.size(); + + // SAFETY: + // - `ptr` is a valid pointer to the beginning of a `Vmalloc` allocation. + // - `ptr` is guaranteed to be valid for the lifetime of `'a`. + // - `size` is the size of the `Vmalloc` allocation `ptr` points to. + unsafe { VmallocPageIter::new(ptr, size) } + } +} + /// An [`Iterator`] implementation for [`Vec`] that moves elements out of a vector. /// /// This structure is created by the [`Vec::into_iter`] method on [`Vec`] (provided by the From c2437c43cfb1f7f461669ba3cd74936fe345640b Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Thu, 28 Aug 2025 15:32:14 +0200 Subject: [PATCH 39/64] rust: dma: implement DataDirection Add the `DataDirection` struct, a newtype wrapper around the C `enum dma_data_direction`. This provides a type-safe Rust interface for specifying the direction of DMA transfers. Reviewed-by: Alice Ryhl Reviewed-by: Alexandre Courbot Reviewed-by: Daniel Almeida Reviewed-by: Lyude Paul Link: https://lore.kernel.org/r/20250828133323.53311-2-dakr@kernel.org Signed-off-by: Danilo Krummrich --- rust/bindings/bindings_helper.h | 1 + rust/kernel/dma.rs | 68 +++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h index 0e140e07758b..c2cc52ee9945 100644 --- a/rust/bindings/bindings_helper.h +++ b/rust/bindings/bindings_helper.h @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include diff --git a/rust/kernel/dma.rs b/rust/kernel/dma.rs index 2bc8ab51ec28..27b25f041f32 100644 --- a/rust/kernel/dma.rs +++ b/rust/kernel/dma.rs @@ -244,6 +244,74 @@ pub mod attrs { pub const DMA_ATTR_PRIVILEGED: Attrs = Attrs(bindings::DMA_ATTR_PRIVILEGED); } +/// DMA data direction. +/// +/// Corresponds to the C [`enum dma_data_direction`]. +/// +/// [`enum dma_data_direction`]: srctree/include/linux/dma-direction.h +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +#[repr(u32)] +pub enum DataDirection { + /// The DMA mapping is for bidirectional data transfer. + /// + /// This is used when the buffer can be both read from and written to by the device. + /// The cache for the corresponding memory region is both flushed and invalidated. + Bidirectional = Self::const_cast(bindings::dma_data_direction_DMA_BIDIRECTIONAL), + + /// The DMA mapping is for data transfer from memory to the device (write). + /// + /// The CPU has prepared data in the buffer, and the device will read it. + /// The cache for the corresponding memory region is flushed before device access. + ToDevice = Self::const_cast(bindings::dma_data_direction_DMA_TO_DEVICE), + + /// The DMA mapping is for data transfer from the device to memory (read). + /// + /// The device will write data into the buffer for the CPU to read. + /// The cache for the corresponding memory region is invalidated before CPU access. + FromDevice = Self::const_cast(bindings::dma_data_direction_DMA_FROM_DEVICE), + + /// The DMA mapping is not for data transfer. + /// + /// This is primarily for debugging purposes. With this direction, the DMA mapping API + /// will not perform any cache coherency operations. + None = Self::const_cast(bindings::dma_data_direction_DMA_NONE), +} + +impl DataDirection { + /// Casts the bindgen-generated enum type to a `u32` at compile time. + /// + /// This function will cause a compile-time error if the underlying value of the + /// C enum is out of bounds for `u32`. + const fn const_cast(val: bindings::dma_data_direction) -> u32 { + // CAST: The C standard allows compilers to choose different integer types for enums. + // To safely check the value, we cast it to a wide signed integer type (`i128`) + // which can hold any standard C integer enum type without truncation. + let wide_val = val as i128; + + // Check if the value is outside the valid range for the target type `u32`. + // CAST: `u32::MAX` is cast to `i128` to match the type of `wide_val` for the comparison. + if wide_val < 0 || wide_val > u32::MAX as i128 { + // Trigger a compile-time error in a const context. + build_error!("C enum value is out of bounds for the target type `u32`."); + } + + // CAST: This cast is valid because the check above guarantees that `wide_val` + // is within the representable range of `u32`. + wide_val as u32 + } +} + +impl From for bindings::dma_data_direction { + /// Returns the raw representation of [`enum dma_data_direction`]. + fn from(direction: DataDirection) -> Self { + // CAST: `direction as u32` gets the underlying representation of our `#[repr(u32)]` enum. + // The subsequent cast to `Self` (the bindgen type) assumes the C enum is compatible + // with the enum variants of `DataDirection`, which is a valid assumption given our + // compile-time checks. + direction as u32 as Self + } +} + /// An abstraction of the `dma_alloc_coherent` API. /// /// This is an abstraction around the `dma_alloc_coherent` API which is used to allocate and map From c7081ec661bd2e9bfabe665b72c91698c396792c Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Thu, 28 Aug 2025 15:32:15 +0200 Subject: [PATCH 40/64] rust: dma: add type alias for bindings::dma_addr_t Add a type alias for bindings::dma_addr_t (DmaAddress), such that we do not have to access bindings directly. Reviewed-by: Alice Ryhl Reviewed-by: Alexandre Courbot Reviewed-by: Daniel Almeida Reviewed-by: Lyude Paul Suggested-by: Alice Ryhl Link: https://lore.kernel.org/r/20250828133323.53311-3-dakr@kernel.org Signed-off-by: Danilo Krummrich --- drivers/gpu/nova-core/falcon.rs | 4 ++-- rust/kernel/dma.rs | 18 ++++++++++++++---- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs index be91aac6976a..2dbcdf26697b 100644 --- a/drivers/gpu/nova-core/falcon.rs +++ b/drivers/gpu/nova-core/falcon.rs @@ -4,8 +4,8 @@ use core::ops::Deref; use hal::FalconHal; -use kernel::bindings; use kernel::device; +use kernel::dma::DmaAddress; use kernel::prelude::*; use kernel::sync::aref::ARef; use kernel::time::Delta; @@ -455,7 +455,7 @@ fn dma_wr>( fw.dma_handle_with_offset(load_offsets.src_start as usize)?, ), }; - if dma_start % bindings::dma_addr_t::from(DMA_LEN) > 0 { + if dma_start % DmaAddress::from(DMA_LEN) > 0 { dev_err!( self.dev, "DMA transfer start addresses must be a multiple of {}", diff --git a/rust/kernel/dma.rs b/rust/kernel/dma.rs index 27b25f041f32..b2a6282876da 100644 --- a/rust/kernel/dma.rs +++ b/rust/kernel/dma.rs @@ -13,6 +13,16 @@ types::ARef, }; +/// DMA address type. +/// +/// Represents a bus address used for Direct Memory Access (DMA) operations. +/// +/// This is an alias of the kernel's `dma_addr_t`, which may be `u32` or `u64` depending on +/// `CONFIG_ARCH_DMA_ADDR_T_64BIT`. +/// +/// Note that this may be `u64` even on 32-bit architectures. +pub type DmaAddress = bindings::dma_addr_t; + /// Trait to be implemented by DMA capable bus devices. /// /// The [`dma::Device`](Device) trait should be implemented by bus specific device representations, @@ -343,7 +353,7 @@ fn from(direction: DataDirection) -> Self { // entire `CoherentAllocation` including the allocated memory itself. pub struct CoherentAllocation { dev: ARef, - dma_handle: bindings::dma_addr_t, + dma_handle: DmaAddress, count: usize, cpu_addr: *mut T, dma_attrs: Attrs, @@ -444,7 +454,7 @@ pub fn start_ptr_mut(&mut self) -> *mut T { /// Returns a DMA handle which may be given to the device as the DMA address base of /// the region. - pub fn dma_handle(&self) -> bindings::dma_addr_t { + pub fn dma_handle(&self) -> DmaAddress { self.dma_handle } @@ -452,13 +462,13 @@ pub fn dma_handle(&self) -> bindings::dma_addr_t { /// device as the DMA address base of the region. /// /// Returns `EINVAL` if `offset` is not within the bounds of the allocation. - pub fn dma_handle_with_offset(&self, offset: usize) -> Result { + pub fn dma_handle_with_offset(&self, offset: usize) -> Result { if offset >= self.count { Err(EINVAL) } else { // INVARIANT: The type invariant of `Self` guarantees that `size_of:: * count` fits // into a `usize`, and `offset` is inferior to `count`. - Ok(self.dma_handle + (offset * core::mem::size_of::()) as bindings::dma_addr_t) + Ok(self.dma_handle + (offset * core::mem::size_of::()) as DmaAddress) } } From 05aa6fb1c21d7fb9df735da24096d793223789d5 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Thu, 28 Aug 2025 15:32:16 +0200 Subject: [PATCH 41/64] rust: scatterlist: Add abstraction for sg_table Add a safe Rust abstraction for the kernel's scatter-gather list facilities (`struct scatterlist` and `struct sg_table`). This commit introduces `SGTable`, a wrapper that uses a generic parameter to provide compile-time guarantees about ownership and lifetime. The abstraction provides two primary states: - `SGTable>`: Represents a table whose resources are fully managed by Rust. It takes ownership of a page provider `P`, allocates the underlying `struct sg_table`, maps it for DMA, and handles all cleanup automatically upon drop. The DMA mapping's lifetime is tied to the associated device using `Devres`, ensuring it is correctly unmapped before the device is unbound. - `SGTable` (or just `SGTable`): A zero-cost representation of an externally managed `struct sg_table`. It is created from a raw pointer using `SGTable::from_raw()` and provides a lifetime-bound reference (`&'a SGTable`) for operations like iteration. The API exposes a safe iterator that yields `&SGEntry` references, allowing drivers to easily access the DMA address and length of each segment in the list. Reviewed-by: Alice Ryhl Reviewed-by: Alexandre Courbot Tested-by: Alexandre Courbot Reviewed-by: Daniel Almeida Reviewed-by: Lyude Paul Co-developed-by: Abdiel Janulgue Signed-off-by: Abdiel Janulgue Link: https://lore.kernel.org/r/20250828133323.53311-4-dakr@kernel.org Signed-off-by: Danilo Krummrich --- rust/helpers/helpers.c | 1 + rust/helpers/scatterlist.c | 24 ++ rust/kernel/lib.rs | 1 + rust/kernel/scatterlist.rs | 491 +++++++++++++++++++++++++++++++++++++ 4 files changed, 517 insertions(+) create mode 100644 rust/helpers/scatterlist.c create mode 100644 rust/kernel/scatterlist.rs diff --git a/rust/helpers/helpers.c b/rust/helpers/helpers.c index 7cf7fe95e41d..e94542bf6ea7 100644 --- a/rust/helpers/helpers.c +++ b/rust/helpers/helpers.c @@ -39,6 +39,7 @@ #include "rcu.c" #include "refcount.c" #include "regulator.c" +#include "scatterlist.c" #include "security.c" #include "signal.c" #include "slab.c" diff --git a/rust/helpers/scatterlist.c b/rust/helpers/scatterlist.c new file mode 100644 index 000000000000..80c956ee09ab --- /dev/null +++ b/rust/helpers/scatterlist.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include + +dma_addr_t rust_helper_sg_dma_address(struct scatterlist *sg) +{ + return sg_dma_address(sg); +} + +unsigned int rust_helper_sg_dma_len(struct scatterlist *sg) +{ + return sg_dma_len(sg); +} + +struct scatterlist *rust_helper_sg_next(struct scatterlist *sg) +{ + return sg_next(sg); +} + +void rust_helper_dma_unmap_sgtable(struct device *dev, struct sg_table *sgt, + enum dma_data_direction dir, unsigned long attrs) +{ + return dma_unmap_sgtable(dev, sgt, dir, attrs); +} diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index c859a8984bae..fcffc3988a90 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -114,6 +114,7 @@ pub mod rbtree; pub mod regulator; pub mod revocable; +pub mod scatterlist; pub mod security; pub mod seq_file; pub mod sizes; diff --git a/rust/kernel/scatterlist.rs b/rust/kernel/scatterlist.rs new file mode 100644 index 000000000000..9709dff60b5a --- /dev/null +++ b/rust/kernel/scatterlist.rs @@ -0,0 +1,491 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Abstractions for scatter-gather lists. +//! +//! C header: [`include/linux/scatterlist.h`](srctree/include/linux/scatterlist.h) +//! +//! Scatter-gather (SG) I/O is a memory access technique that allows devices to perform DMA +//! operations on data buffers that are not physically contiguous in memory. It works by creating a +//! "scatter-gather list", an array where each entry specifies the address and length of a +//! physically contiguous memory segment. +//! +//! The device's DMA controller can then read this list and process the segments sequentially as +//! part of one logical I/O request. This avoids the need for a single, large, physically contiguous +//! memory buffer, which can be difficult or impossible to allocate. +//! +//! This module provides safe Rust abstractions over the kernel's `struct scatterlist` and +//! `struct sg_table` types. +//! +//! The main entry point is the [`SGTable`] type, which represents a complete scatter-gather table. +//! It can be either: +//! +//! - An owned table ([`SGTable>`]), created from a Rust memory buffer (e.g., [`VVec`]). +//! This type manages the allocation of the `struct sg_table`, the DMA mapping of the buffer, and +//! the automatic cleanup of all resources. +//! - A borrowed reference (&[`SGTable`]), which provides safe, read-only access to a table that was +//! allocated by other (e.g., C) code. +//! +//! Individual entries in the table are represented by [`SGEntry`], which can be accessed by +//! iterating over an [`SGTable`]. + +use crate::{ + alloc, + alloc::allocator::VmallocPageIter, + bindings, + device::{Bound, Device}, + devres::Devres, + dma, error, + io::resource::ResourceSize, + page, + prelude::*, + types::{ARef, Opaque}, +}; +use core::{ops::Deref, ptr::NonNull}; + +/// A single entry in a scatter-gather list. +/// +/// An `SGEntry` represents a single, physically contiguous segment of memory that has been mapped +/// for DMA. +/// +/// Instances of this struct are obtained by iterating over an [`SGTable`]. Drivers do not create +/// or own [`SGEntry`] objects directly. +#[repr(transparent)] +pub struct SGEntry(Opaque); + +// SAFETY: `SGEntry` can be sent to any task. +unsafe impl Send for SGEntry {} + +// SAFETY: `SGEntry` has no interior mutability and can be accessed concurrently. +unsafe impl Sync for SGEntry {} + +impl SGEntry { + /// Convert a raw `struct scatterlist *` to a `&'a SGEntry`. + /// + /// # Safety + /// + /// Callers must ensure that the `struct scatterlist` pointed to by `ptr` is valid for the + /// lifetime `'a`. + #[inline] + unsafe fn from_raw<'a>(ptr: *mut bindings::scatterlist) -> &'a Self { + // SAFETY: The safety requirements of this function guarantee that `ptr` is a valid pointer + // to a `struct scatterlist` for the duration of `'a`. + unsafe { &*ptr.cast() } + } + + /// Obtain the raw `struct scatterlist *`. + #[inline] + fn as_raw(&self) -> *mut bindings::scatterlist { + self.0.get() + } + + /// Returns the DMA address of this SG entry. + /// + /// This is the address that the device should use to access the memory segment. + #[inline] + pub fn dma_address(&self) -> dma::DmaAddress { + // SAFETY: `self.as_raw()` is a valid pointer to a `struct scatterlist`. + unsafe { bindings::sg_dma_address(self.as_raw()) } + } + + /// Returns the length of this SG entry in bytes. + #[inline] + pub fn dma_len(&self) -> ResourceSize { + #[allow(clippy::useless_conversion)] + // SAFETY: `self.as_raw()` is a valid pointer to a `struct scatterlist`. + unsafe { bindings::sg_dma_len(self.as_raw()) }.into() + } +} + +/// The borrowed generic type of an [`SGTable`], representing a borrowed or externally managed +/// table. +#[repr(transparent)] +pub struct Borrowed(Opaque); + +// SAFETY: `Borrowed` can be sent to any task. +unsafe impl Send for Borrowed {} + +// SAFETY: `Borrowed` has no interior mutability and can be accessed concurrently. +unsafe impl Sync for Borrowed {} + +/// A scatter-gather table. +/// +/// This struct is a wrapper around the kernel's `struct sg_table`. It manages a list of DMA-mapped +/// memory segments that can be passed to a device for I/O operations. +/// +/// The generic parameter `T` is used as a generic type to distinguish between owned and borrowed +/// tables. +/// +/// - [`SGTable`]: An owned table created and managed entirely by Rust code. It handles +/// allocation, DMA mapping, and cleanup of all associated resources. See [`SGTable::new`]. +/// - [`SGTable`} (or simply [`SGTable`]): Represents a table whose lifetime is managed +/// externally. It can be used safely via a borrowed reference `&'a SGTable`, where `'a` is the +/// external lifetime. +/// +/// All [`SGTable`] variants can be iterated over the individual [`SGEntry`]s. +#[repr(transparent)] +#[pin_data] +pub struct SGTable { + #[pin] + inner: T, +} + +impl SGTable { + /// Creates a borrowed `&'a SGTable` from a raw `struct sg_table` pointer. + /// + /// This allows safe access to an `sg_table` that is managed elsewhere (for example, in C code). + /// + /// # Safety + /// + /// Callers must ensure that: + /// + /// - the `struct sg_table` pointed to by `ptr` is valid for the entire lifetime of `'a`, + /// - the data behind `ptr` is not modified concurrently for the duration of `'a`. + #[inline] + pub unsafe fn from_raw<'a>(ptr: *mut bindings::sg_table) -> &'a Self { + // SAFETY: The safety requirements of this function guarantee that `ptr` is a valid pointer + // to a `struct sg_table` for the duration of `'a`. + unsafe { &*ptr.cast() } + } + + #[inline] + fn as_raw(&self) -> *mut bindings::sg_table { + self.inner.0.get() + } + + /// Returns an [`SGTableIter`] bound to the lifetime of `self`. + pub fn iter(&self) -> SGTableIter<'_> { + // SAFETY: `self.as_raw()` is a valid pointer to a `struct sg_table`. + let nents = unsafe { (*self.as_raw()).nents }; + + let pos = if nents > 0 { + // SAFETY: `self.as_raw()` is a valid pointer to a `struct sg_table`. + let ptr = unsafe { (*self.as_raw()).sgl }; + + // SAFETY: `ptr` is guaranteed to be a valid pointer to a `struct scatterlist`. + Some(unsafe { SGEntry::from_raw(ptr) }) + } else { + None + }; + + SGTableIter { pos, nents } + } +} + +/// Represents the DMA mapping state of a `struct sg_table`. +/// +/// This is used as an inner type of [`Owned`] to manage the DMA mapping lifecycle. +/// +/// # Invariants +/// +/// - `sgt` is a valid pointer to a `struct sg_table` for the entire lifetime of the +/// [`DmaMappedSgt`]. +/// - `sgt` is always DMA mapped. +struct DmaMappedSgt { + sgt: NonNull, + dev: ARef, + dir: dma::DataDirection, +} + +// SAFETY: `DmaMappedSgt` can be sent to any task. +unsafe impl Send for DmaMappedSgt {} + +// SAFETY: `DmaMappedSgt` has no interior mutability and can be accessed concurrently. +unsafe impl Sync for DmaMappedSgt {} + +impl DmaMappedSgt { + /// # Safety + /// + /// - `sgt` must be a valid pointer to a `struct sg_table` for the entire lifetime of the + /// returned [`DmaMappedSgt`]. + /// - The caller must guarantee that `sgt` remains DMA mapped for the entire lifetime of + /// [`DmaMappedSgt`]. + unsafe fn new( + sgt: NonNull, + dev: &Device, + dir: dma::DataDirection, + ) -> Result { + // SAFETY: + // - `dev.as_raw()` is a valid pointer to a `struct device`, which is guaranteed to be + // bound to a driver for the duration of this call. + // - `sgt` is a valid pointer to a `struct sg_table`. + error::to_result(unsafe { + bindings::dma_map_sgtable(dev.as_raw(), sgt.as_ptr(), dir.into(), 0) + })?; + + // INVARIANT: By the safety requirements of this function it is guaranteed that `sgt` is + // valid for the entire lifetime of this object instance. + Ok(Self { + sgt, + dev: dev.into(), + dir, + }) + } +} + +impl Drop for DmaMappedSgt { + #[inline] + fn drop(&mut self) { + // SAFETY: + // - `self.dev.as_raw()` is a pointer to a valid `struct device`. + // - `self.dev` is the same device the mapping has been created for in `Self::new()`. + // - `self.sgt.as_ptr()` is a valid pointer to a `struct sg_table` by the type invariants + // of `Self`. + // - `self.dir` is the same `dma::DataDirection` the mapping has been created with in + // `Self::new()`. + unsafe { + bindings::dma_unmap_sgtable(self.dev.as_raw(), self.sgt.as_ptr(), self.dir.into(), 0) + }; + } +} + +/// A transparent wrapper around a `struct sg_table`. +/// +/// While we could also create the `struct sg_table` in the constructor of [`Owned`], we can't tear +/// down the `struct sg_table` in [`Owned::drop`]; the drop order in [`Owned`] matters. +#[repr(transparent)] +struct RawSGTable(Opaque); + +// SAFETY: `RawSGTable` can be sent to any task. +unsafe impl Send for RawSGTable {} + +// SAFETY: `RawSGTable` has no interior mutability and can be accessed concurrently. +unsafe impl Sync for RawSGTable {} + +impl RawSGTable { + /// # Safety + /// + /// - `pages` must be a slice of valid `struct page *`. + /// - The pages pointed to by `pages` must remain valid for the entire lifetime of the returned + /// [`RawSGTable`]. + unsafe fn new( + pages: &mut [*mut bindings::page], + size: usize, + max_segment: u32, + flags: alloc::Flags, + ) -> Result { + // `sg_alloc_table_from_pages_segment()` expects at least one page, otherwise it + // produces a NPE. + if pages.is_empty() { + return Err(EINVAL); + } + + let sgt = Opaque::zeroed(); + // SAFETY: + // - `sgt.get()` is a valid pointer to uninitialized memory. + // - As by the check above, `pages` is not empty. + error::to_result(unsafe { + bindings::sg_alloc_table_from_pages_segment( + sgt.get(), + pages.as_mut_ptr(), + pages.len().try_into()?, + 0, + size, + max_segment, + flags.as_raw(), + ) + })?; + + Ok(Self(sgt)) + } + + #[inline] + fn as_raw(&self) -> *mut bindings::sg_table { + self.0.get() + } +} + +impl Drop for RawSGTable { + #[inline] + fn drop(&mut self) { + // SAFETY: `sgt` is a valid and initialized `struct sg_table`. + unsafe { bindings::sg_free_table(self.0.get()) }; + } +} + +/// The [`Owned`] generic type of an [`SGTable`]. +/// +/// A [`SGTable`] signifies that the [`SGTable`] owns all associated resources: +/// +/// - The backing memory pages. +/// - The `struct sg_table` allocation (`sgt`). +/// - The DMA mapping, managed through a [`Devres`]-managed `DmaMappedSgt`. +/// +/// Users interact with this type through the [`SGTable`] handle and do not need to manage +/// [`Owned`] directly. +#[pin_data] +pub struct Owned

{ + // Note: The drop order is relevant; we first have to unmap the `struct sg_table`, then free the + // `struct sg_table` and finally free the backing pages. + #[pin] + dma: Devres, + sgt: RawSGTable, + _pages: P, +} + +// SAFETY: `Owned` can be sent to any task if `P` can be send to any task. +unsafe impl Send for Owned

{} + +// SAFETY: `Owned` has no interior mutability and can be accessed concurrently if `P` can be +// accessed concurrently. +unsafe impl Sync for Owned

{} + +impl

Owned

+where + for<'a> P: page::AsPageIter = VmallocPageIter<'a>> + 'static, +{ + fn new( + dev: &Device, + mut pages: P, + dir: dma::DataDirection, + flags: alloc::Flags, + ) -> Result + '_> { + let page_iter = pages.page_iter(); + let size = page_iter.size(); + + let mut page_vec: KVec<*mut bindings::page> = + KVec::with_capacity(page_iter.page_count(), flags)?; + + for page in page_iter { + page_vec.push(page.as_ptr(), flags)?; + } + + // `dma_max_mapping_size` returns `size_t`, but `sg_alloc_table_from_pages_segment()` takes + // an `unsigned int`. + // + // SAFETY: `dev.as_raw()` is a valid pointer to a `struct device`. + let max_segment = match unsafe { bindings::dma_max_mapping_size(dev.as_raw()) } { + 0 => u32::MAX, + max_segment => u32::try_from(max_segment).unwrap_or(u32::MAX), + }; + + Ok(try_pin_init!(&this in Self { + // SAFETY: + // - `page_vec` is a `KVec` of valid `struct page *` obtained from `pages`. + // - The pages contained in `pages` remain valid for the entire lifetime of the + // `RawSGTable`. + sgt: unsafe { RawSGTable::new(&mut page_vec, size, max_segment, flags) }?, + dma <- { + // SAFETY: `this` is a valid pointer to uninitialized memory. + let sgt = unsafe { &raw mut (*this.as_ptr()).sgt }.cast(); + + // SAFETY: `sgt` is guaranteed to be non-null. + let sgt = unsafe { NonNull::new_unchecked(sgt) }; + + // SAFETY: + // - It is guaranteed that the object returned by `DmaMappedSgt::new` won't out-live + // `sgt`. + // - `sgt` is never DMA unmapped manually. + Devres::new(dev, unsafe { DmaMappedSgt::new(sgt, dev, dir) }) + }, + _pages: pages, + })) + } +} + +impl

SGTable> +where + for<'a> P: page::AsPageIter = VmallocPageIter<'a>> + 'static, +{ + /// Allocates a new scatter-gather table from the given pages and maps it for DMA. + /// + /// This constructor creates a new [`SGTable`] that takes ownership of `P`. + /// It allocates a `struct sg_table`, populates it with entries corresponding to the physical + /// pages of `P`, and maps the table for DMA with the specified [`Device`] and + /// [`dma::DataDirection`]. + /// + /// The DMA mapping is managed through [`Devres`], ensuring that the DMA mapping is unmapped + /// once the associated [`Device`] is unbound, or when the [`SGTable`] is dropped. + /// + /// # Parameters + /// + /// * `dev`: The [`Device`] that will be performing the DMA. + /// * `pages`: The entity providing the backing pages. It must implement [`page::AsPageIter`]. + /// The ownership of this entity is moved into the new [`SGTable`]. + /// * `dir`: The [`dma::DataDirection`] of the DMA transfer. + /// * `flags`: Allocation flags for internal allocations (e.g., [`GFP_KERNEL`]). + /// + /// # Examples + /// + /// ``` + /// use kernel::{ + /// device::{Bound, Device}, + /// dma, page, + /// prelude::*, + /// scatterlist::{SGTable, Owned}, + /// }; + /// + /// fn test(dev: &Device) -> Result { + /// let size = 4 * page::PAGE_SIZE; + /// let pages = VVec::::with_capacity(size, GFP_KERNEL)?; + /// + /// let sgt = KBox::pin_init(SGTable::new( + /// dev, + /// pages, + /// dma::DataDirection::ToDevice, + /// GFP_KERNEL, + /// ), GFP_KERNEL)?; + /// + /// Ok(()) + /// } + /// ``` + pub fn new( + dev: &Device, + pages: P, + dir: dma::DataDirection, + flags: alloc::Flags, + ) -> impl PinInit + '_ { + try_pin_init!(Self { + inner <- Owned::new(dev, pages, dir, flags)? + }) + } +} + +impl

Deref for SGTable> { + type Target = SGTable; + + #[inline] + fn deref(&self) -> &Self::Target { + // SAFETY: + // - `self.inner.sgt.as_raw()` is a valid pointer to a `struct sg_table` for the entire + // lifetime of `self`. + // - The backing `struct sg_table` is not modified for the entire lifetime of `self`. + unsafe { SGTable::from_raw(self.inner.sgt.as_raw()) } + } +} + +mod private { + pub trait Sealed {} + + impl Sealed for super::Borrowed {} + impl

Sealed for super::Owned

{} +} + +/// An [`Iterator`] over the DMA mapped [`SGEntry`] items of an [`SGTable`]. +/// +/// Note that the existence of an [`SGTableIter`] does not guarantee that the [`SGEntry`] items +/// actually remain DMA mapped; they are prone to be unmapped on device unbind. +pub struct SGTableIter<'a> { + pos: Option<&'a SGEntry>, + /// The number of DMA mapped entries in a `struct sg_table`. + nents: c_uint, +} + +impl<'a> Iterator for SGTableIter<'a> { + type Item = &'a SGEntry; + + fn next(&mut self) -> Option { + let entry = self.pos?; + self.nents = self.nents.saturating_sub(1); + + // SAFETY: `entry.as_raw()` is a valid pointer to a `struct scatterlist`. + let next = unsafe { bindings::sg_next(entry.as_raw()) }; + + self.pos = (!next.is_null() && self.nents > 0).then(|| { + // SAFETY: If `next` is not NULL, `sg_next()` guarantees to return a valid pointer to + // the next `struct scatterlist`. + unsafe { SGEntry::from_raw(next) } + }); + + Some(entry) + } +} From 5444799d701cbf3c4b18c88fef8034b35d159615 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Thu, 28 Aug 2025 15:32:17 +0200 Subject: [PATCH 42/64] samples: rust: dma: add sample code for SGTable Add sample code for allocating and mapping a scatter-gather table (`SGTable`). Reviewed-by: Alexandre Courbot Reviewed-by: Daniel Almeida Reviewed-by: Lyude Paul Co-developed-by: Abdiel Janulgue Signed-off-by: Abdiel Janulgue Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250828133323.53311-5-dakr@kernel.org Signed-off-by: Danilo Krummrich --- samples/rust/rust_dma.rs | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/samples/rust/rust_dma.rs b/samples/rust/rust_dma.rs index c5e7cce68654..04007e29fd85 100644 --- a/samples/rust/rust_dma.rs +++ b/samples/rust/rust_dma.rs @@ -7,15 +7,19 @@ use kernel::{ bindings, device::Core, - dma::{CoherentAllocation, Device, DmaMask}, - pci, + dma::{CoherentAllocation, DataDirection, Device, DmaMask}, + page, pci, prelude::*, + scatterlist::{Owned, SGTable}, types::ARef, }; +#[pin_data(PinnedDrop)] struct DmaSampleDriver { pdev: ARef, ca: CoherentAllocation, + #[pin] + sgt: SGTable>>, } const TEST_VALUES: [(u32, u32); 5] = [ @@ -70,21 +74,30 @@ fn probe(pdev: &pci::Device, _info: &Self::IdInfo) -> Result) { + let dev = self.pdev.as_ref(); + + dev_info!(dev, "Unload DMA test driver.\n"); for (i, value) in TEST_VALUES.into_iter().enumerate() { let val0 = kernel::dma_read!(self.ca[i].h); @@ -99,6 +112,10 @@ fn drop(&mut self) { assert_eq!(val1, value.1); } } + + for (i, entry) in self.sgt.iter().enumerate() { + dev_info!(dev, "Entry[{}]: DMA address: {:#x}", i, entry.dma_address()); + } } } From c58466b85b16f139a4afc6163aa57d7445dae806 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Thu, 28 Aug 2025 15:32:18 +0200 Subject: [PATCH 43/64] MAINTAINERS: rust: dma: add scatterlist files Rename the "DMA MAPPING HELPERS DEVICE DRIVER API [RUST]" maintainers entry to "DMA MAPPING & SCATTERLIST API [RUST]" and add the corresponding scatterlist files. Reviewed-by: Lyude Paul Acked-by: Miguel Ojeda Link: https://lore.kernel.org/r/20250828133323.53311-6-dakr@kernel.org Signed-off-by: Danilo Krummrich --- MAINTAINERS | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 936650c7eff9..b97760467f09 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7238,7 +7238,7 @@ F: include/linux/dma-mapping.h F: include/linux/swiotlb.h F: kernel/dma/ -DMA MAPPING HELPERS DEVICE DRIVER API [RUST] +DMA MAPPING & SCATTERLIST API [RUST] M: Abdiel Janulgue M: Danilo Krummrich R: Daniel Almeida @@ -7249,7 +7249,9 @@ S: Supported W: https://rust-for-linux.com T: git https://github.com/Rust-for-Linux/linux.git alloc-next F: rust/helpers/dma.c +F: rust/helpers/scatterlist.c F: rust/kernel/dma.rs +F: rust/kernel/scatterlist.rs F: samples/rust/rust_dma.rs DMA-BUF HEAPS FRAMEWORK From e2580413a83680f679904ad2f2c1aa6969876469 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Mon, 1 Sep 2025 17:01:53 +0200 Subject: [PATCH 44/64] gpu: nova-core: take advantage of pci::Device::unbind() Now that we have pci::Device::unbind() we can unregister the sysmem flush page with a direct access the I/O resource, i.e. without RCU read side critical section. Signed-off-by: Danilo Krummrich Reviewed-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250901150207.63094-1-dakr@kernel.org Signed-off-by: Alexandre Courbot --- drivers/gpu/nova-core/driver.rs | 4 ++++ drivers/gpu/nova-core/gpu.rs | 22 ++++++++++++---------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs index 274989ea1fb4..02b3edd7bbdc 100644 --- a/drivers/gpu/nova-core/driver.rs +++ b/drivers/gpu/nova-core/driver.rs @@ -54,4 +54,8 @@ fn probe(pdev: &pci::Device, _info: &Self::IdInfo) -> Result, this: Pin<&Self>) { + this.gpu.unbind(pdev.as_ref()); + } } diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs index 4d0f759610ec..a0878ecdc18b 100644 --- a/drivers/gpu/nova-core/gpu.rs +++ b/drivers/gpu/nova-core/gpu.rs @@ -163,7 +163,7 @@ fn new(bar: &Bar0) -> Result { } /// Structure holding the resources required to operate the GPU. -#[pin_data(PinnedDrop)] +#[pin_data] pub(crate) struct Gpu { spec: Spec, /// MMIO mapping of PCI BAR 0 @@ -174,15 +174,6 @@ pub(crate) struct Gpu { sysmem_flush: SysmemFlush, } -#[pinned_drop] -impl PinnedDrop for Gpu { - fn drop(self: Pin<&mut Self>) { - // Unregister the sysmem flush page before we release it. - self.bar - .try_access_with(|b| self.sysmem_flush.unregister(b)); - } -} - impl Gpu { /// Helper function to load and run the FWSEC-FRTS firmware and confirm that it has properly /// created the WPR2 region. @@ -309,4 +300,15 @@ pub(crate) fn new( sysmem_flush, })) } + + /// Called when the corresponding [`Device`](device::Device) is unbound. + /// + /// Note: This method must only be called from `Driver::unbind`. + pub(crate) fn unbind(&self, dev: &device::Device) { + kernel::warn_on!(self + .bar + .access(dev) + .inspect(|bar| self.sysmem_flush.unregister(bar)) + .is_err()); + } } From 6ea42e9146f7ab8e59ffea8aa3300ad6710399dd Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Mon, 8 Sep 2025 14:46:36 -0400 Subject: [PATCH 45/64] rust: drm: gem: Simplify use of generics Now that my rust skills have been honed, I noticed that there's a lot of generics in our gem bindings that don't actually need to be here. Currently the hierarchy of traits in our gem bindings looks like this: * Drivers implement: * BaseDriverObject (has the callbacks) * DriverObject (has the drm::Driver type) * Crate implements: * IntoGEMObject for Object where T: DriverObject Handles conversion to/from raw object pointers * BaseObject for T where T: IntoGEMObject Provides methods common to all gem interfaces Also of note, this leaves us with two different drm::Driver associated types: * DriverObject::Driver * IntoGEMObject::Driver I'm not entirely sure of the original intent here unfortunately (if anyone is, please let me know!), but my guess is that the idea would be that some objects can implement IntoGEMObject using a different ::Driver than DriverObject - presumably to enable the usage of gem objects from different drivers. A reasonable usecase of course. However - if I'm not mistaken, I don't think that this is actually how things would go in practice. Driver implementations are of course implemented by their associated drivers, and generally drivers are not linked to each-other when building the kernel. Which is to say that even in a situation where we would theoretically deal with gem objects from another driver, we still wouldn't have access to its drm::driver::Driver implementation. It's more likely we would simply want a variant of gem objects in such a situation that have no association with a drm::driver::Driver type. Taking that into consideration, we can assume the following: * Anything that implements BaseDriverObject will implement DriverObject In other words, all BaseDriverObjects indirectly have an associated ::Driver type - so the two traits can be combined into one with no generics. * Not everything that implements IntoGEMObject will have an associated ::Driver, and that's OK. And with this, we now can do quite a bit of cleanup with the use of generics here. As such, this commit: * Removes the generics on BaseDriverObject * Moves DriverObject::Driver into BaseDriverObject * Removes DriverObject * Removes IntoGEMObject::Driver * Add AllocImpl::Driver, which we can use as a binding to figure out the correct File type for BaseObject Leaving us with a simpler trait hierarchy that now looks like this: * Drivers implement: BaseDriverObject * Crate implements: * IntoGEMObject for Object where T: DriverObject * BaseObject for T where T: IntoGEMObject Which makes the code a lot easier to understand and build on :). Signed-off-by: Lyude Paul Reviewed-by: Daniel Almeida Acked-by: Danilo Krummrich Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250908185239.135849-2-lyude@redhat.com Signed-off-by: Alice Ryhl --- drivers/gpu/drm/nova/gem.rs | 8 ++-- rust/kernel/drm/driver.rs | 3 ++ rust/kernel/drm/gem/mod.rs | 77 ++++++++++++++++--------------------- 3 files changed, 40 insertions(+), 48 deletions(-) diff --git a/drivers/gpu/drm/nova/gem.rs b/drivers/gpu/drm/nova/gem.rs index cd82773dab92..2760ba4f3450 100644 --- a/drivers/gpu/drm/nova/gem.rs +++ b/drivers/gpu/drm/nova/gem.rs @@ -16,16 +16,14 @@ #[pin_data] pub(crate) struct NovaObject {} -impl gem::BaseDriverObject> for NovaObject { +impl gem::DriverObject for NovaObject { + type Driver = NovaDriver; + fn new(_dev: &NovaDevice, _size: usize) -> impl PinInit { try_pin_init!(NovaObject {}) } } -impl gem::DriverObject for NovaObject { - type Driver = NovaDriver; -} - impl NovaObject { /// Create a new DRM GEM object. pub(crate) fn new(dev: &NovaDevice, size: usize) -> Result>> { diff --git a/rust/kernel/drm/driver.rs b/rust/kernel/drm/driver.rs index fe7e8d06961a..dae0f4d1bbe3 100644 --- a/rust/kernel/drm/driver.rs +++ b/rust/kernel/drm/driver.rs @@ -86,6 +86,9 @@ pub struct AllocOps { /// Trait for memory manager implementations. Implemented internally. pub trait AllocImpl: super::private::Sealed + drm::gem::IntoGEMObject { + /// The [`Driver`] implementation for this [`AllocImpl`]. + type Driver: drm::Driver; + /// The C callback operations for this memory manager. const ALLOC_OPS: AllocOps; } diff --git a/rust/kernel/drm/gem/mod.rs b/rust/kernel/drm/gem/mod.rs index b71821cfb5ea..f5b19865d745 100644 --- a/rust/kernel/drm/gem/mod.rs +++ b/rust/kernel/drm/gem/mod.rs @@ -15,31 +15,31 @@ use core::{mem, ops::Deref, ptr::NonNull}; /// GEM object functions, which must be implemented by drivers. -pub trait BaseDriverObject: Sync + Send + Sized { +pub trait DriverObject: Sync + Send + Sized { + /// Parent `Driver` for this object. + type Driver: drm::Driver; + /// Create a new driver data object for a GEM object of a given size. - fn new(dev: &drm::Device, size: usize) -> impl PinInit; + fn new(dev: &drm::Device, size: usize) -> impl PinInit; /// Open a new handle to an existing object, associated with a File. fn open( - _obj: &<::Driver as drm::Driver>::Object, - _file: &drm::File<<::Driver as drm::Driver>::File>, + _obj: &::Object, + _file: &drm::File<::File>, ) -> Result { Ok(()) } /// Close a handle to an existing object, associated with a File. fn close( - _obj: &<::Driver as drm::Driver>::Object, - _file: &drm::File<<::Driver as drm::Driver>::File>, + _obj: &::Object, + _file: &drm::File<::File>, ) { } } /// Trait that represents a GEM object subtype pub trait IntoGEMObject: Sized + super::private::Sealed + AlwaysRefCounted { - /// Owning driver for this type - type Driver: drm::Driver; - /// Returns a reference to the raw `drm_gem_object` structure, which must be valid as long as /// this owning object is valid. fn as_raw(&self) -> *mut bindings::drm_gem_object; @@ -74,25 +74,15 @@ unsafe fn dec_ref(obj: NonNull) { } } -/// Trait which must be implemented by drivers using base GEM objects. -pub trait DriverObject: BaseDriverObject> { - /// Parent `Driver` for this object. - type Driver: drm::Driver; -} - -extern "C" fn open_callback, U: BaseObject>( +extern "C" fn open_callback( raw_obj: *mut bindings::drm_gem_object, raw_file: *mut bindings::drm_file, ) -> core::ffi::c_int { // SAFETY: `open_callback` is only ever called with a valid pointer to a `struct drm_file`. - let file = unsafe { - drm::File::<<::Driver as drm::Driver>::File>::from_raw(raw_file) - }; - // SAFETY: `open_callback` is specified in the AllocOps structure for `Object`, ensuring that - // `raw_obj` is indeed contained within a `Object`. - let obj = unsafe { - <<::Driver as drm::Driver>::Object as IntoGEMObject>::from_raw(raw_obj) - }; + let file = unsafe { drm::File::<::File>::from_raw(raw_file) }; + // SAFETY: `open_callback` is specified in the AllocOps structure for `DriverObject`, + // ensuring that `raw_obj` is contained within a `DriverObject` + let obj = unsafe { <::Object as IntoGEMObject>::from_raw(raw_obj) }; match T::open(obj, file) { Err(e) => e.to_errno(), @@ -100,26 +90,21 @@ extern "C" fn open_callback, U: BaseObject>( } } -extern "C" fn close_callback, U: BaseObject>( +extern "C" fn close_callback( raw_obj: *mut bindings::drm_gem_object, raw_file: *mut bindings::drm_file, ) { // SAFETY: `open_callback` is only ever called with a valid pointer to a `struct drm_file`. - let file = unsafe { - drm::File::<<::Driver as drm::Driver>::File>::from_raw(raw_file) - }; + let file = unsafe { drm::File::<::File>::from_raw(raw_file) }; + // SAFETY: `close_callback` is specified in the AllocOps structure for `Object`, ensuring // that `raw_obj` is indeed contained within a `Object`. - let obj = unsafe { - <<::Driver as drm::Driver>::Object as IntoGEMObject>::from_raw(raw_obj) - }; + let obj = unsafe { <::Object as IntoGEMObject>::from_raw(raw_obj) }; T::close(obj, file); } impl IntoGEMObject for Object { - type Driver = T::Driver; - fn as_raw(&self) -> *mut bindings::drm_gem_object { self.obj.get() } @@ -141,10 +126,12 @@ fn size(&self) -> usize { /// Creates a new handle for the object associated with a given `File` /// (or returns an existing one). - fn create_handle( - &self, - file: &drm::File<<::Driver as drm::Driver>::File>, - ) -> Result { + fn create_handle(&self, file: &drm::File) -> Result + where + Self: AllocImpl, + D: drm::Driver, + F: drm::file::DriverFile, + { let mut handle: u32 = 0; // SAFETY: The arguments are all valid per the type invariants. to_result(unsafe { @@ -154,10 +141,12 @@ fn create_handle( } /// Looks up an object by its handle for a given `File`. - fn lookup_handle( - file: &drm::File<<::Driver as drm::Driver>::File>, - handle: u32, - ) -> Result> { + fn lookup_handle(file: &drm::File, handle: u32) -> Result> + where + Self: AllocImpl, + D: drm::Driver, + F: drm::file::DriverFile, + { // SAFETY: The arguments are all valid per the type invariants. let ptr = unsafe { bindings::drm_gem_object_lookup(file.as_raw().cast(), handle) }; if ptr.is_null() { @@ -212,8 +201,8 @@ impl Object { const OBJECT_FUNCS: bindings::drm_gem_object_funcs = bindings::drm_gem_object_funcs { free: Some(Self::free_callback), - open: Some(open_callback::>), - close: Some(close_callback::>), + open: Some(open_callback::), + close: Some(close_callback::), print_info: None, export: None, pin: None, @@ -296,6 +285,8 @@ fn deref(&self) -> &Self::Target { } impl AllocImpl for Object { + type Driver = T::Driver; + const ALLOC_OPS: AllocOps = AllocOps { gem_create_object: None, prime_handle_to_fd: None, From 1ed10db60f47306c1fd395f87bf6d443926a9488 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Mon, 8 Sep 2025 14:46:37 -0400 Subject: [PATCH 46/64] rust: drm: gem: Add DriverFile type alias MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Just to reduce the clutter with the File<…> types in gem.rs. Signed-off-by: Lyude Paul Reviewed-by: Daniel Almeida Acked-by: Danilo Krummrich Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250908185239.135849-3-lyude@redhat.com Signed-off-by: Alice Ryhl --- rust/kernel/drm/gem/mod.rs | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/rust/kernel/drm/gem/mod.rs b/rust/kernel/drm/gem/mod.rs index f5b19865d745..ead723980b27 100644 --- a/rust/kernel/drm/gem/mod.rs +++ b/rust/kernel/drm/gem/mod.rs @@ -14,6 +14,13 @@ }; use core::{mem, ops::Deref, ptr::NonNull}; +/// A type alias for retrieving a [`Driver`]s [`DriverFile`] implementation from its +/// [`DriverObject`] implementation. +/// +/// [`Driver`]: drm::Driver +/// [`DriverFile`]: drm::file::DriverFile +pub type DriverFile = drm::File<<::Driver as drm::Driver>::File>; + /// GEM object functions, which must be implemented by drivers. pub trait DriverObject: Sync + Send + Sized { /// Parent `Driver` for this object. @@ -23,19 +30,12 @@ pub trait DriverObject: Sync + Send + Sized { fn new(dev: &drm::Device, size: usize) -> impl PinInit; /// Open a new handle to an existing object, associated with a File. - fn open( - _obj: &::Object, - _file: &drm::File<::File>, - ) -> Result { + fn open(_obj: &::Object, _file: &DriverFile) -> Result { Ok(()) } /// Close a handle to an existing object, associated with a File. - fn close( - _obj: &::Object, - _file: &drm::File<::File>, - ) { - } + fn close(_obj: &::Object, _file: &DriverFile) {} } /// Trait that represents a GEM object subtype @@ -79,7 +79,8 @@ extern "C" fn open_callback( raw_file: *mut bindings::drm_file, ) -> core::ffi::c_int { // SAFETY: `open_callback` is only ever called with a valid pointer to a `struct drm_file`. - let file = unsafe { drm::File::<::File>::from_raw(raw_file) }; + let file = unsafe { DriverFile::::from_raw(raw_file) }; + // SAFETY: `open_callback` is specified in the AllocOps structure for `DriverObject`, // ensuring that `raw_obj` is contained within a `DriverObject` let obj = unsafe { <::Object as IntoGEMObject>::from_raw(raw_obj) }; @@ -95,7 +96,7 @@ extern "C" fn close_callback( raw_file: *mut bindings::drm_file, ) { // SAFETY: `open_callback` is only ever called with a valid pointer to a `struct drm_file`. - let file = unsafe { drm::File::<::File>::from_raw(raw_file) }; + let file = unsafe { DriverFile::::from_raw(raw_file) }; // SAFETY: `close_callback` is specified in the AllocOps structure for `Object`, ensuring // that `raw_obj` is indeed contained within a `Object`. From 6b35936f058d0cb9171c7be1424b62017b874913 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Mon, 8 Sep 2025 14:46:38 -0400 Subject: [PATCH 47/64] rust: drm: gem: Drop Object::SIZE Drive-by fix, it doesn't seem like anything actually uses this constant anymore. Signed-off-by: Lyude Paul Reviewed-by: Danilo Krummrich Reviewed-by: Daniel Almeida Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250908185239.135849-4-lyude@redhat.com Signed-off-by: Alice Ryhl --- rust/kernel/drm/gem/mod.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/rust/kernel/drm/gem/mod.rs b/rust/kernel/drm/gem/mod.rs index ead723980b27..fd872de3b669 100644 --- a/rust/kernel/drm/gem/mod.rs +++ b/rust/kernel/drm/gem/mod.rs @@ -12,7 +12,7 @@ prelude::*, types::{ARef, AlwaysRefCounted, Opaque}, }; -use core::{mem, ops::Deref, ptr::NonNull}; +use core::{ops::Deref, ptr::NonNull}; /// A type alias for retrieving a [`Driver`]s [`DriverFile`] implementation from its /// [`DriverObject`] implementation. @@ -197,9 +197,6 @@ pub struct Object { } impl Object { - /// The size of this object's structure. - pub const SIZE: usize = mem::size_of::(); - const OBJECT_FUNCS: bindings::drm_gem_object_funcs = bindings::drm_gem_object_funcs { free: Some(Self::free_callback), open: Some(open_callback::), From cf4fd52e323604ccfa8390917593e1fb965653ee Mon Sep 17 00:00:00 2001 From: Daniel Almeida Date: Wed, 10 Sep 2025 10:51:18 -0300 Subject: [PATCH 48/64] rust: drm: Introduce the Tyr driver for Arm Mali GPUs Add a Rust driver for ARM Mali CSF-based GPUs. It is a port of Panthor and therefore exposes Panthor's uAPI and name to userspace, and the product of a joint effort between Collabora, Arm and Google engineers. The aim is to incrementally develop Tyr with the abstractions that are currently available until it is consider to be in parity with Panthor feature-wise. The development of Tyr itself started in January, after a few failed attempts of converting Panthor piecewise through a mix of Rust and C code. There is a downstream branch that's much further ahead in terms of capabilities than this initial patch. The downstream code is capable of booting the MCU, doing sync VM_BINDS through the work-in-progress GPUVM abstraction and also doing (trivial) submits through Asahi's drm_scheduler and dma_fence abstractions. So basically, most of what one would expect a modern GPU driver to do, except for power management and some other very important adjacent pieces. It is not at the point where submits can correctly deal with dependencies, or at the point where it can rotate access to the GPU hardware fairly through a software scheduler, but that is simply a matter of writing more code. This first patch, however, only implements a subset of the current features available downstream, as the rest is not implementable without pulling in even more abstractions. In particular, a lot of things depend on properly mapping memory on a given VA range, which itself depends on the GPUVM abstraction that is currently work-in-progress. For this reason, we still cannot boot the MCU and thus, cannot do much for the moment. This constitutes a change in the overall strategy that we have been using to develop Tyr so far. By submitting small parts of the driver upstream iteratively, we aim to: a) evolve together with Nova and rvkms, hopefully reducing regressions due to upstream changes (that may break us because we were not there, in the first place) b) prove any work-in-progress abstractions by having them run on a real driver and hardware and, c) provide a reason to work on and review said abstractions by providing a user, which would be tyr itself. Despite its limited feature-set, we offer IGT tests. It is only tested on the rk3588, so any other SoC is probably not going to work at all for now. The skeleton is basically taken from Nova and also rust_platform_driver.rs. Lastly, the name "Tyr" is inspired by Norse mythology, reflecting ARM's tradition of naming their GPUs after Nordic mythological figures and places. Co-developed-by: Beata Michalska Signed-off-by: Beata Michalska Co-developed-by: Carsten Haitzler Signed-off-by: Carsten Haitzler Co-developed-by: Rob Herring Signed-off-by: Rob Herring Link: https://www.collabora.com/news-and-blog/news-and-events/introducing-tyr-a-new-rust-drm-driver.html Signed-off-by: Daniel Almeida Acked-by: Boris Brezillon [aliceryhl: minor Kconfig update on apply] [aliceryhl: s/drm::device::/drm::/] Link: https://lore.kernel.org/r/20250910-tyr-v3-1-dba3bc2ae623@collabora.com Co-developed-by: Alice Ryhl Signed-off-by: Alice Ryhl --- MAINTAINERS | 13 ++ drivers/gpu/drm/Kconfig | 2 + drivers/gpu/drm/Makefile | 1 + drivers/gpu/drm/tyr/Kconfig | 19 +++ drivers/gpu/drm/tyr/Makefile | 3 + drivers/gpu/drm/tyr/driver.rs | 205 +++++++++++++++++++++++++++++++ drivers/gpu/drm/tyr/file.rs | 56 +++++++++ drivers/gpu/drm/tyr/gem.rs | 18 +++ drivers/gpu/drm/tyr/gpu.rs | 219 ++++++++++++++++++++++++++++++++++ drivers/gpu/drm/tyr/regs.rs | 108 +++++++++++++++++ drivers/gpu/drm/tyr/tyr.rs | 22 ++++ rust/uapi/uapi_helper.h | 1 + 12 files changed, 667 insertions(+) create mode 100644 drivers/gpu/drm/tyr/Kconfig create mode 100644 drivers/gpu/drm/tyr/Makefile create mode 100644 drivers/gpu/drm/tyr/driver.rs create mode 100644 drivers/gpu/drm/tyr/file.rs create mode 100644 drivers/gpu/drm/tyr/gem.rs create mode 100644 drivers/gpu/drm/tyr/gpu.rs create mode 100644 drivers/gpu/drm/tyr/regs.rs create mode 100644 drivers/gpu/drm/tyr/tyr.rs diff --git a/MAINTAINERS b/MAINTAINERS index 62298a601bbe..8a11e6c5dd80 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2086,6 +2086,19 @@ F: Documentation/devicetree/bindings/gpu/arm,mali-valhall-csf.yaml F: drivers/gpu/drm/panthor/ F: include/uapi/drm/panthor_drm.h +ARM MALI TYR DRM DRIVER +M: Daniel Almeida +M: Alice Ryhl +L: dri-devel@lists.freedesktop.org +S: Supported +W: https://rust-for-linux.com/tyr-gpu-driver +W https://drm.pages.freedesktop.org/maintainer-tools/drm-rust.html +B: https://gitlab.freedesktop.org/panfrost/linux/-/issues +T: git https://gitlab.freedesktop.org/drm/rust/kernel.git +F: Documentation/devicetree/bindings/gpu/arm,mali-valhall-csf.yaml +F: drivers/gpu/drm/tyr/ +F: include/uapi/drm/panthor_drm.h + ARM MALI-DP DRM DRIVER M: Liviu Dudau S: Supported diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index f7ea8e895c0c..fda170730468 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -396,6 +396,8 @@ source "drivers/gpu/drm/sprd/Kconfig" source "drivers/gpu/drm/imagination/Kconfig" +source "drivers/gpu/drm/tyr/Kconfig" + config DRM_HYPERV tristate "DRM Support for Hyper-V synthetic video device" depends on DRM && PCI && HYPERV diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 4dafbdc8f86a..4b2f7d794275 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -220,6 +220,7 @@ obj-$(CONFIG_DRM_VBOXVIDEO) += vboxvideo/ obj-$(CONFIG_DRM_LIMA) += lima/ obj-$(CONFIG_DRM_PANFROST) += panfrost/ obj-$(CONFIG_DRM_PANTHOR) += panthor/ +obj-$(CONFIG_DRM_TYR) += tyr/ obj-$(CONFIG_DRM_ASPEED_GFX) += aspeed/ obj-$(CONFIG_DRM_MCDE) += mcde/ obj-$(CONFIG_DRM_TIDSS) += tidss/ diff --git a/drivers/gpu/drm/tyr/Kconfig b/drivers/gpu/drm/tyr/Kconfig new file mode 100644 index 000000000000..4b55308fd2eb --- /dev/null +++ b/drivers/gpu/drm/tyr/Kconfig @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: GPL-2.0 or MIT + +config DRM_TYR + tristate "Tyr (Rust DRM support for ARM Mali CSF-based GPUs)" + depends on DRM=y + depends on RUST + depends on ARM || ARM64 || COMPILE_TEST + depends on !GENERIC_ATOMIC64 # for IOMMU_IO_PGTABLE_LPAE + default n + help + Rust DRM driver for ARM Mali CSF-based GPUs. + + This driver is for Mali (or Immortalis) Valhall Gxxx GPUs. + + Note that the Mali-G68 and Mali-G78, while Valhall architecture, will + be supported with the panfrost driver as they are not CSF GPUs. + + if M is selected, the module will be called tyr. This driver is work + in progress and may not be functional. diff --git a/drivers/gpu/drm/tyr/Makefile b/drivers/gpu/drm/tyr/Makefile new file mode 100644 index 000000000000..ba545f65f2c0 --- /dev/null +++ b/drivers/gpu/drm/tyr/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 or MIT + +obj-$(CONFIG_DRM_TYR) += tyr.o diff --git a/drivers/gpu/drm/tyr/driver.rs b/drivers/gpu/drm/tyr/driver.rs new file mode 100644 index 000000000000..d5625dd1e41c --- /dev/null +++ b/drivers/gpu/drm/tyr/driver.rs @@ -0,0 +1,205 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +use kernel::c_str; +use kernel::clk::Clk; +use kernel::clk::OptionalClk; +use kernel::device::Bound; +use kernel::device::Core; +use kernel::device::Device; +use kernel::devres::Devres; +use kernel::drm; +use kernel::drm::ioctl; +use kernel::new_mutex; +use kernel::of; +use kernel::platform; +use kernel::prelude::*; +use kernel::regulator; +use kernel::regulator::Regulator; +use kernel::sizes::SZ_2M; +use kernel::sync::Arc; +use kernel::sync::Mutex; +use kernel::time; +use kernel::types::ARef; + +use crate::file::File; +use crate::gem::TyrObject; +use crate::gpu; +use crate::gpu::GpuInfo; +use crate::regs; + +pub(crate) type IoMem = kernel::io::mem::IoMem; + +/// Convenience type alias for the DRM device type for this driver. +pub(crate) type TyrDevice = drm::Device; + +#[pin_data(PinnedDrop)] +pub(crate) struct TyrDriver { + device: ARef, +} + +#[pin_data(PinnedDrop)] +pub(crate) struct TyrData { + pub(crate) pdev: ARef, + + #[pin] + clks: Mutex, + + #[pin] + regulators: Mutex, + + /// Some information on the GPU. + /// + /// This is mainly queried by userspace, i.e.: Mesa. + pub(crate) gpu_info: GpuInfo, +} + +// Both `Clk` and `Regulator` do not implement `Send` or `Sync`, but they +// should. There are patches on the mailing list to address this, but they have +// not landed yet. +// +// For now, add this workaround so that this patch compiles with the promise +// that it will be removed in a future patch. +// +// SAFETY: This will be removed in a future patch. +unsafe impl Send for TyrData {} +// SAFETY: This will be removed in a future patch. +unsafe impl Sync for TyrData {} + +fn issue_soft_reset(dev: &Device, iomem: &Devres) -> Result { + regs::GPU_CMD.write(dev, iomem, regs::GPU_CMD_SOFT_RESET)?; + + // TODO: We cannot poll, as there is no support in Rust currently, so we + // sleep. Change this when read_poll_timeout() is implemented in Rust. + kernel::time::delay::fsleep(time::Delta::from_millis(100)); + + if regs::GPU_IRQ_RAWSTAT.read(dev, iomem)? & regs::GPU_IRQ_RAWSTAT_RESET_COMPLETED == 0 { + dev_err!(dev, "GPU reset failed with errno\n"); + dev_err!( + dev, + "GPU_INT_RAWSTAT is {}\n", + regs::GPU_IRQ_RAWSTAT.read(dev, iomem)? + ); + + return Err(EIO); + } + + Ok(()) +} + +kernel::of_device_table!( + OF_TABLE, + MODULE_OF_TABLE, + ::IdInfo, + [ + (of::DeviceId::new(c_str!("rockchip,rk3588-mali")), ()), + (of::DeviceId::new(c_str!("arm,mali-valhall-csf")), ()) + ] +); + +impl platform::Driver for TyrDriver { + type IdInfo = (); + const OF_ID_TABLE: Option> = Some(&OF_TABLE); + + fn probe( + pdev: &platform::Device, + _info: Option<&Self::IdInfo>, + ) -> Result>> { + let core_clk = Clk::get(pdev.as_ref(), Some(c_str!("core")))?; + let stacks_clk = OptionalClk::get(pdev.as_ref(), Some(c_str!("stacks")))?; + let coregroup_clk = OptionalClk::get(pdev.as_ref(), Some(c_str!("coregroup")))?; + + core_clk.prepare_enable()?; + stacks_clk.prepare_enable()?; + coregroup_clk.prepare_enable()?; + + let mali_regulator = Regulator::::get(pdev.as_ref(), c_str!("mali"))?; + let sram_regulator = Regulator::::get(pdev.as_ref(), c_str!("sram"))?; + + let request = pdev.io_request_by_index(0).ok_or(ENODEV)?; + let iomem = Arc::pin_init(request.iomap_sized::(), GFP_KERNEL)?; + + issue_soft_reset(pdev.as_ref(), &iomem)?; + gpu::l2_power_on(pdev.as_ref(), &iomem)?; + + let gpu_info = GpuInfo::new(pdev.as_ref(), &iomem)?; + gpu_info.log(pdev); + + let platform: ARef = pdev.into(); + + let data = try_pin_init!(TyrData { + pdev: platform.clone(), + clks <- new_mutex!(Clocks { + core: core_clk, + stacks: stacks_clk, + coregroup: coregroup_clk, + }), + regulators <- new_mutex!(Regulators { + mali: mali_regulator, + sram: sram_regulator, + }), + gpu_info, + }); + + let tdev: ARef = drm::Device::new(pdev.as_ref(), data)?; + drm::driver::Registration::new_foreign_owned(&tdev, pdev.as_ref(), 0)?; + + let driver = KBox::pin_init(try_pin_init!(TyrDriver { device: tdev }), GFP_KERNEL)?; + + // We need this to be dev_info!() because dev_dbg!() does not work at + // all in Rust for now, and we need to see whether probe succeeded. + dev_info!(pdev.as_ref(), "Tyr initialized correctly.\n"); + Ok(driver) + } +} + +#[pinned_drop] +impl PinnedDrop for TyrDriver { + fn drop(self: Pin<&mut Self>) {} +} + +#[pinned_drop] +impl PinnedDrop for TyrData { + fn drop(self: Pin<&mut Self>) { + // TODO: the type-state pattern for Clks will fix this. + let clks = self.clks.lock(); + clks.core.disable_unprepare(); + clks.stacks.disable_unprepare(); + clks.coregroup.disable_unprepare(); + } +} + +// We need to retain the name "panthor" to achieve drop-in compatibility with +// the C driver in the userspace stack. +const INFO: drm::DriverInfo = drm::DriverInfo { + major: 1, + minor: 5, + patchlevel: 0, + name: c_str!("panthor"), + desc: c_str!("ARM Mali Tyr DRM driver"), +}; + +#[vtable] +impl drm::Driver for TyrDriver { + type Data = TyrData; + type File = File; + type Object = drm::gem::Object; + + const INFO: drm::DriverInfo = INFO; + + kernel::declare_drm_ioctls! { + (PANTHOR_DEV_QUERY, drm_panthor_dev_query, ioctl::RENDER_ALLOW, File::dev_query), + } +} + +#[pin_data] +struct Clocks { + core: Clk, + stacks: OptionalClk, + coregroup: OptionalClk, +} + +#[pin_data] +struct Regulators { + mali: Regulator, + sram: Regulator, +} diff --git a/drivers/gpu/drm/tyr/file.rs b/drivers/gpu/drm/tyr/file.rs new file mode 100644 index 000000000000..0ef432947b73 --- /dev/null +++ b/drivers/gpu/drm/tyr/file.rs @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +use kernel::drm; +use kernel::prelude::*; +use kernel::uaccess::UserSlice; +use kernel::uapi; + +use crate::driver::TyrDevice; +use crate::TyrDriver; + +#[pin_data] +pub(crate) struct File {} + +/// Convenience type alias for our DRM `File` type +pub(crate) type DrmFile = drm::file::File; + +impl drm::file::DriverFile for File { + type Driver = TyrDriver; + + fn open(_dev: &drm::Device) -> Result>> { + KBox::try_pin_init(try_pin_init!(Self {}), GFP_KERNEL) + } +} + +impl File { + pub(crate) fn dev_query( + tdev: &TyrDevice, + devquery: &mut uapi::drm_panthor_dev_query, + _file: &DrmFile, + ) -> Result { + if devquery.pointer == 0 { + match devquery.type_ { + uapi::drm_panthor_dev_query_type_DRM_PANTHOR_DEV_QUERY_GPU_INFO => { + devquery.size = core::mem::size_of_val(&tdev.gpu_info) as u32; + Ok(0) + } + _ => Err(EINVAL), + } + } else { + match devquery.type_ { + uapi::drm_panthor_dev_query_type_DRM_PANTHOR_DEV_QUERY_GPU_INFO => { + let mut writer = UserSlice::new( + UserPtr::from_addr(devquery.pointer as usize), + devquery.size as usize, + ) + .writer(); + + writer.write(&tdev.gpu_info)?; + + Ok(0) + } + _ => Err(EINVAL), + } + } + } +} diff --git a/drivers/gpu/drm/tyr/gem.rs b/drivers/gpu/drm/tyr/gem.rs new file mode 100644 index 000000000000..1273bf89dbd5 --- /dev/null +++ b/drivers/gpu/drm/tyr/gem.rs @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +use crate::driver::TyrDevice; +use crate::driver::TyrDriver; +use kernel::drm::gem; +use kernel::prelude::*; + +/// GEM Object inner driver data +#[pin_data] +pub(crate) struct TyrObject {} + +impl gem::DriverObject for TyrObject { + type Driver = TyrDriver; + + fn new(_dev: &TyrDevice, _size: usize) -> impl PinInit { + try_pin_init!(TyrObject {}) + } +} diff --git a/drivers/gpu/drm/tyr/gpu.rs b/drivers/gpu/drm/tyr/gpu.rs new file mode 100644 index 000000000000..6c582910dd5d --- /dev/null +++ b/drivers/gpu/drm/tyr/gpu.rs @@ -0,0 +1,219 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +use kernel::bits::genmask_u32; +use kernel::device::Bound; +use kernel::device::Device; +use kernel::devres::Devres; +use kernel::platform; +use kernel::prelude::*; +use kernel::time; +use kernel::transmute::AsBytes; + +use crate::driver::IoMem; +use crate::regs; + +/// Struct containing information that can be queried by userspace. This is read from +/// the GPU's registers. +/// +/// # Invariants +/// +/// - The layout of this struct identical to the C `struct drm_panthor_gpu_info`. +#[repr(C)] +pub(crate) struct GpuInfo { + pub(crate) gpu_id: u32, + pub(crate) gpu_rev: u32, + pub(crate) csf_id: u32, + pub(crate) l2_features: u32, + pub(crate) tiler_features: u32, + pub(crate) mem_features: u32, + pub(crate) mmu_features: u32, + pub(crate) thread_features: u32, + pub(crate) max_threads: u32, + pub(crate) thread_max_workgroup_size: u32, + pub(crate) thread_max_barrier_size: u32, + pub(crate) coherency_features: u32, + pub(crate) texture_features: [u32; 4], + pub(crate) as_present: u32, + pub(crate) pad0: u32, + pub(crate) shader_present: u64, + pub(crate) l2_present: u64, + pub(crate) tiler_present: u64, + pub(crate) core_features: u32, + pub(crate) pad: u32, +} + +impl GpuInfo { + pub(crate) fn new(dev: &Device, iomem: &Devres) -> Result { + let gpu_id = regs::GPU_ID.read(dev, iomem)?; + let csf_id = regs::GPU_CSF_ID.read(dev, iomem)?; + let gpu_rev = regs::GPU_REVID.read(dev, iomem)?; + let core_features = regs::GPU_CORE_FEATURES.read(dev, iomem)?; + let l2_features = regs::GPU_L2_FEATURES.read(dev, iomem)?; + let tiler_features = regs::GPU_TILER_FEATURES.read(dev, iomem)?; + let mem_features = regs::GPU_MEM_FEATURES.read(dev, iomem)?; + let mmu_features = regs::GPU_MMU_FEATURES.read(dev, iomem)?; + let thread_features = regs::GPU_THREAD_FEATURES.read(dev, iomem)?; + let max_threads = regs::GPU_THREAD_MAX_THREADS.read(dev, iomem)?; + let thread_max_workgroup_size = regs::GPU_THREAD_MAX_WORKGROUP_SIZE.read(dev, iomem)?; + let thread_max_barrier_size = regs::GPU_THREAD_MAX_BARRIER_SIZE.read(dev, iomem)?; + let coherency_features = regs::GPU_COHERENCY_FEATURES.read(dev, iomem)?; + + let texture_features = regs::GPU_TEXTURE_FEATURES0.read(dev, iomem)?; + + let as_present = regs::GPU_AS_PRESENT.read(dev, iomem)?; + + let shader_present = u64::from(regs::GPU_SHADER_PRESENT_LO.read(dev, iomem)?); + let shader_present = + shader_present | u64::from(regs::GPU_SHADER_PRESENT_HI.read(dev, iomem)?) << 32; + + let tiler_present = u64::from(regs::GPU_TILER_PRESENT_LO.read(dev, iomem)?); + let tiler_present = + tiler_present | u64::from(regs::GPU_TILER_PRESENT_HI.read(dev, iomem)?) << 32; + + let l2_present = u64::from(regs::GPU_L2_PRESENT_LO.read(dev, iomem)?); + let l2_present = l2_present | u64::from(regs::GPU_L2_PRESENT_HI.read(dev, iomem)?) << 32; + + Ok(Self { + gpu_id, + gpu_rev, + csf_id, + l2_features, + tiler_features, + mem_features, + mmu_features, + thread_features, + max_threads, + thread_max_workgroup_size, + thread_max_barrier_size, + coherency_features, + // TODO: Add texture_features_{1,2,3}. + texture_features: [texture_features, 0, 0, 0], + as_present, + pad0: 0, + shader_present, + l2_present, + tiler_present, + core_features, + pad: 0, + }) + } + + pub(crate) fn log(&self, pdev: &platform::Device) { + let major = (self.gpu_id >> 16) & 0xff; + let minor = (self.gpu_id >> 8) & 0xff; + let status = self.gpu_id & 0xff; + + let model_name = if let Some(model) = GPU_MODELS + .iter() + .find(|&f| f.major == major && f.minor == minor) + { + model.name + } else { + "unknown" + }; + + dev_info!( + pdev.as_ref(), + "mali-{} id 0x{:x} major 0x{:x} minor 0x{:x} status 0x{:x}", + model_name, + self.gpu_id >> 16, + major, + minor, + status + ); + + dev_info!( + pdev.as_ref(), + "Features: L2:{:#x} Tiler:{:#x} Mem:{:#x} MMU:{:#x} AS:{:#x}", + self.l2_features, + self.tiler_features, + self.mem_features, + self.mmu_features, + self.as_present + ); + + dev_info!( + pdev.as_ref(), + "shader_present=0x{:016x} l2_present=0x{:016x} tiler_present=0x{:016x}", + self.shader_present, + self.l2_present, + self.tiler_present + ); + } + + /// Returns the number of virtual address bits supported by the GPU. + #[expect(dead_code)] + pub(crate) fn va_bits(&self) -> u32 { + self.mmu_features & genmask_u32(0..=7) + } + + /// Returns the number of physical address bits supported by the GPU. + #[expect(dead_code)] + pub(crate) fn pa_bits(&self) -> u32 { + (self.mmu_features >> 8) & genmask_u32(0..=7) + } +} + +// SAFETY: `GpuInfo`'s invariant guarantees that it is the same type that is +// already exposed to userspace by the C driver. This implies that it fulfills +// the requirements for `AsBytes`. +// +// This means: +// +// - No implicit padding, +// - No kernel pointers, +// - No interior mutability. +unsafe impl AsBytes for GpuInfo {} + +struct GpuModels { + name: &'static str, + major: u32, + minor: u32, +} + +const GPU_MODELS: [GpuModels; 1] = [GpuModels { + name: "g610", + major: 10, + minor: 7, +}]; + +#[allow(dead_code)] +pub(crate) struct GpuId { + pub(crate) arch_major: u32, + pub(crate) arch_minor: u32, + pub(crate) arch_rev: u32, + pub(crate) prod_major: u32, + pub(crate) ver_major: u32, + pub(crate) ver_minor: u32, + pub(crate) ver_status: u32, +} + +impl From for GpuId { + fn from(value: u32) -> Self { + GpuId { + arch_major: (value & genmask_u32(28..=31)) >> 28, + arch_minor: (value & genmask_u32(24..=27)) >> 24, + arch_rev: (value & genmask_u32(20..=23)) >> 20, + prod_major: (value & genmask_u32(16..=19)) >> 16, + ver_major: (value & genmask_u32(12..=15)) >> 12, + ver_minor: (value & genmask_u32(4..=11)) >> 4, + ver_status: value & genmask_u32(0..=3), + } + } +} + +/// Powers on the l2 block. +pub(crate) fn l2_power_on(dev: &Device, iomem: &Devres) -> Result { + regs::L2_PWRON_LO.write(dev, iomem, 1)?; + + // TODO: We cannot poll, as there is no support in Rust currently, so we + // sleep. Change this when read_poll_timeout() is implemented in Rust. + kernel::time::delay::fsleep(time::Delta::from_millis(100)); + + if regs::L2_READY_LO.read(dev, iomem)? != 1 { + dev_err!(dev, "Failed to power on the GPU\n"); + return Err(EIO); + } + + Ok(()) +} diff --git a/drivers/gpu/drm/tyr/regs.rs b/drivers/gpu/drm/tyr/regs.rs new file mode 100644 index 000000000000..f46933aaa221 --- /dev/null +++ b/drivers/gpu/drm/tyr/regs.rs @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +// We don't expect that all the registers and fields will be used, even in the +// future. +// +// Nevertheless, it is useful to have most of them defined, like the C driver +// does. +#![allow(dead_code)] + +use kernel::bits::bit_u32; +use kernel::device::Bound; +use kernel::device::Device; +use kernel::devres::Devres; +use kernel::prelude::*; + +use crate::driver::IoMem; + +/// Represents a register in the Register Set +/// +/// TODO: Replace this with the Nova `register!()` macro when it is available. +/// In particular, this will automatically give us 64bit register reads and +/// writes. +pub(crate) struct Register; + +impl Register { + #[inline] + pub(crate) fn read(&self, dev: &Device, iomem: &Devres) -> Result { + let value = (*iomem).access(dev)?.read32(OFFSET); + Ok(value) + } + + #[inline] + pub(crate) fn write(&self, dev: &Device, iomem: &Devres, value: u32) -> Result { + (*iomem).access(dev)?.write32(value, OFFSET); + Ok(()) + } +} + +pub(crate) const GPU_ID: Register<0x0> = Register; +pub(crate) const GPU_L2_FEATURES: Register<0x4> = Register; +pub(crate) const GPU_CORE_FEATURES: Register<0x8> = Register; +pub(crate) const GPU_CSF_ID: Register<0x1c> = Register; +pub(crate) const GPU_REVID: Register<0x280> = Register; +pub(crate) const GPU_TILER_FEATURES: Register<0xc> = Register; +pub(crate) const GPU_MEM_FEATURES: Register<0x10> = Register; +pub(crate) const GPU_MMU_FEATURES: Register<0x14> = Register; +pub(crate) const GPU_AS_PRESENT: Register<0x18> = Register; +pub(crate) const GPU_IRQ_RAWSTAT: Register<0x20> = Register; + +pub(crate) const GPU_IRQ_RAWSTAT_FAULT: u32 = bit_u32(0); +pub(crate) const GPU_IRQ_RAWSTAT_PROTECTED_FAULT: u32 = bit_u32(1); +pub(crate) const GPU_IRQ_RAWSTAT_RESET_COMPLETED: u32 = bit_u32(8); +pub(crate) const GPU_IRQ_RAWSTAT_POWER_CHANGED_SINGLE: u32 = bit_u32(9); +pub(crate) const GPU_IRQ_RAWSTAT_POWER_CHANGED_ALL: u32 = bit_u32(10); +pub(crate) const GPU_IRQ_RAWSTAT_CLEAN_CACHES_COMPLETED: u32 = bit_u32(17); +pub(crate) const GPU_IRQ_RAWSTAT_DOORBELL_STATUS: u32 = bit_u32(18); +pub(crate) const GPU_IRQ_RAWSTAT_MCU_STATUS: u32 = bit_u32(19); + +pub(crate) const GPU_IRQ_CLEAR: Register<0x24> = Register; +pub(crate) const GPU_IRQ_MASK: Register<0x28> = Register; +pub(crate) const GPU_IRQ_STAT: Register<0x2c> = Register; +pub(crate) const GPU_CMD: Register<0x30> = Register; +pub(crate) const GPU_CMD_SOFT_RESET: u32 = 1 | (1 << 8); +pub(crate) const GPU_CMD_HARD_RESET: u32 = 1 | (2 << 8); +pub(crate) const GPU_THREAD_FEATURES: Register<0xac> = Register; +pub(crate) const GPU_THREAD_MAX_THREADS: Register<0xa0> = Register; +pub(crate) const GPU_THREAD_MAX_WORKGROUP_SIZE: Register<0xa4> = Register; +pub(crate) const GPU_THREAD_MAX_BARRIER_SIZE: Register<0xa8> = Register; +pub(crate) const GPU_TEXTURE_FEATURES0: Register<0xb0> = Register; +pub(crate) const GPU_SHADER_PRESENT_LO: Register<0x100> = Register; +pub(crate) const GPU_SHADER_PRESENT_HI: Register<0x104> = Register; +pub(crate) const GPU_TILER_PRESENT_LO: Register<0x110> = Register; +pub(crate) const GPU_TILER_PRESENT_HI: Register<0x114> = Register; +pub(crate) const GPU_L2_PRESENT_LO: Register<0x120> = Register; +pub(crate) const GPU_L2_PRESENT_HI: Register<0x124> = Register; +pub(crate) const L2_READY_LO: Register<0x160> = Register; +pub(crate) const L2_READY_HI: Register<0x164> = Register; +pub(crate) const L2_PWRON_LO: Register<0x1a0> = Register; +pub(crate) const L2_PWRON_HI: Register<0x1a4> = Register; +pub(crate) const L2_PWRTRANS_LO: Register<0x220> = Register; +pub(crate) const L2_PWRTRANS_HI: Register<0x204> = Register; +pub(crate) const L2_PWRACTIVE_LO: Register<0x260> = Register; +pub(crate) const L2_PWRACTIVE_HI: Register<0x264> = Register; + +pub(crate) const MCU_CONTROL: Register<0x700> = Register; +pub(crate) const MCU_CONTROL_ENABLE: u32 = 1; +pub(crate) const MCU_CONTROL_AUTO: u32 = 2; +pub(crate) const MCU_CONTROL_DISABLE: u32 = 0; + +pub(crate) const MCU_STATUS: Register<0x704> = Register; +pub(crate) const MCU_STATUS_DISABLED: u32 = 0; +pub(crate) const MCU_STATUS_ENABLED: u32 = 1; +pub(crate) const MCU_STATUS_HALT: u32 = 2; +pub(crate) const MCU_STATUS_FATAL: u32 = 3; + +pub(crate) const GPU_COHERENCY_FEATURES: Register<0x300> = Register; + +pub(crate) const JOB_IRQ_RAWSTAT: Register<0x1000> = Register; +pub(crate) const JOB_IRQ_CLEAR: Register<0x1004> = Register; +pub(crate) const JOB_IRQ_MASK: Register<0x1008> = Register; +pub(crate) const JOB_IRQ_STAT: Register<0x100c> = Register; + +pub(crate) const JOB_IRQ_GLOBAL_IF: u32 = bit_u32(31); + +pub(crate) const MMU_IRQ_RAWSTAT: Register<0x2000> = Register; +pub(crate) const MMU_IRQ_CLEAR: Register<0x2004> = Register; +pub(crate) const MMU_IRQ_MASK: Register<0x2008> = Register; +pub(crate) const MMU_IRQ_STAT: Register<0x200c> = Register; diff --git a/drivers/gpu/drm/tyr/tyr.rs b/drivers/gpu/drm/tyr/tyr.rs new file mode 100644 index 000000000000..861d1db43072 --- /dev/null +++ b/drivers/gpu/drm/tyr/tyr.rs @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +//! Arm Mali Tyr DRM driver. +//! +//! The name "Tyr" is inspired by Norse mythology, reflecting Arm's tradition of +//! naming their GPUs after Nordic mythological figures and places. + +use crate::driver::TyrDriver; + +mod driver; +mod file; +mod gem; +mod gpu; +mod regs; + +kernel::module_platform_driver! { + type: TyrDriver, + name: "tyr", + authors: ["The Tyr driver authors"], + description: "Arm Mali Tyr DRM driver", + license: "Dual MIT/GPL", +} diff --git a/rust/uapi/uapi_helper.h b/rust/uapi/uapi_helper.h index 1409441359f5..d4a239cf2a64 100644 --- a/rust/uapi/uapi_helper.h +++ b/rust/uapi/uapi_helper.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include From 3d536277449e4de4609d4f0b8302ae7fd00b04b2 Mon Sep 17 00:00:00 2001 From: Benno Lossin Date: Mon, 18 Aug 2025 15:18:44 +0200 Subject: [PATCH 49/64] rust: pin-init: examples: error: use `Error` in `fn main()` When running this example with no cargo features enabled, the compiler warns on 1.89: error: struct `Error` is never constructed --> examples/error.rs:11:12 | 11 | pub struct Error; | ^^^^^ | = note: `-D dead-code` implied by `-D warnings` = help: to override `-D warnings` add `#[allow(dead_code)]` Thus use the error in the main function to avoid this warning. Signed-off-by: Benno Lossin --- rust/pin-init/examples/error.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/rust/pin-init/examples/error.rs b/rust/pin-init/examples/error.rs index e0cc258746ce..8f4e135eb8ba 100644 --- a/rust/pin-init/examples/error.rs +++ b/rust/pin-init/examples/error.rs @@ -24,4 +24,6 @@ fn from(_: AllocError) -> Self { } #[allow(dead_code)] -fn main() {} +fn main() { + let _ = Error; +} From 62a9c709612c1cf0d5684022499f34c56c99b662 Mon Sep 17 00:00:00 2001 From: Benno Lossin Date: Tue, 19 Aug 2025 09:22:00 +0200 Subject: [PATCH 50/64] rust: pin-init: README: add information banner on the rename to `pin-init` Signed-off-by: Benno Lossin --- rust/pin-init/README.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/rust/pin-init/README.md b/rust/pin-init/README.md index a4c01a8d78b2..723e275445d4 100644 --- a/rust/pin-init/README.md +++ b/rust/pin-init/README.md @@ -6,6 +6,18 @@ ![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/Rust-for-Linux/pin-init/test.yml) # `pin-init` +> [!NOTE] +> +> This crate was originally named [`pinned-init`], but the migration to +> `pin-init` is not yet complete. The `legcay` branch contains the current +> version of the `pinned-init` crate & the `main` branch already incorporates +> the rename to `pin-init`. +> +> There are still some changes needed on the kernel side before the migration +> can be completed. + +[`pinned-init`]: https://crates.io/crates/pinned-init + Library to safely and fallibly initialize pinned `struct`s using in-place constructors. From d49c56368c0cc98cdf02ae21dd275eba92f1c333 Mon Sep 17 00:00:00 2001 From: Benno Lossin Date: Fri, 5 Sep 2025 19:12:06 +0200 Subject: [PATCH 51/64] rust: pin-init: rename `project` -> `project_this` in doctest The next commit makes the `#[pin_data]` attribute generate a `project` function that would collide with any existing ones. Signed-off-by: Benno Lossin --- rust/pin-init/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/pin-init/src/lib.rs b/rust/pin-init/src/lib.rs index 62e013a5cc20..2d0d9fd12524 100644 --- a/rust/pin-init/src/lib.rs +++ b/rust/pin-init/src/lib.rs @@ -994,7 +994,7 @@ macro_rules! try_init { /// } /// /// impl Foo { -/// fn project(self: Pin<&mut Self>) -> Pin<&mut T> { +/// fn project_this(self: Pin<&mut Self>) -> Pin<&mut T> { /// assert_pinned!(Foo, elem, T, inline); /// /// // SAFETY: The field is structurally pinned. From 619db96daf942dad974c6c8157ed06d52f7bb969 Mon Sep 17 00:00:00 2001 From: Benno Lossin Date: Fri, 5 Sep 2025 19:12:07 +0200 Subject: [PATCH 52/64] rust: pin-init: add pin projections to `#[pin_data]` Make the `#[pin_data]` macro generate a `*Projection` struct that holds either `Pin<&mut Field>` or `&mut Field` for every field of the original struct. Which version is chosen depends on weather there is a `#[pin]` or not respectively. Access to this projected version is enabled through generating `fn project(self: Pin<&mut Self>) -> SelfProjection<'_>`. [ Adapt workqueue to use the new projection instead of its own, custom one - Benno ] Reviewed-by: Gary Guo Reviewed-by: Boqun Feng Signed-off-by: Benno Lossin --- rust/kernel/workqueue.rs | 9 +----- rust/pin-init/src/macros.rs | 61 +++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 8 deletions(-) diff --git a/rust/kernel/workqueue.rs b/rust/kernel/workqueue.rs index b9343d5bc00f..706e833e9702 100644 --- a/rust/kernel/workqueue.rs +++ b/rust/kernel/workqueue.rs @@ -356,18 +356,11 @@ struct ClosureWork { func: Option, } -impl ClosureWork { - fn project(self: Pin<&mut Self>) -> &mut Option { - // SAFETY: The `func` field is not structurally pinned. - unsafe { &mut self.get_unchecked_mut().func } - } -} - impl WorkItem for ClosureWork { type Pointer = Pin>; fn run(mut this: Pin>) { - if let Some(func) = this.as_mut().project().take() { + if let Some(func) = this.as_mut().project().func.take() { (func)() } } diff --git a/rust/pin-init/src/macros.rs b/rust/pin-init/src/macros.rs index 9ced630737b8..668dcee9b7af 100644 --- a/rust/pin-init/src/macros.rs +++ b/rust/pin-init/src/macros.rs @@ -831,6 +831,17 @@ macro_rules! __pin_data { $($fields)* } + $crate::__pin_data!(make_pin_projections: + @vis($vis), + @name($name), + @impl_generics($($impl_generics)*), + @ty_generics($($ty_generics)*), + @decl_generics($($decl_generics)*), + @where($($whr)*), + @pinned($($pinned)*), + @not_pinned($($not_pinned)*), + ); + // We put the rest into this const item, because it then will not be accessible to anything // outside. const _: () = { @@ -980,6 +991,56 @@ fn drop(&mut self) { stringify!($($rest)*), ); }; + (make_pin_projections: + @vis($vis:vis), + @name($name:ident), + @impl_generics($($impl_generics:tt)*), + @ty_generics($($ty_generics:tt)*), + @decl_generics($($decl_generics:tt)*), + @where($($whr:tt)*), + @pinned($($(#[$($p_attr:tt)*])* $pvis:vis $p_field:ident : $p_type:ty),* $(,)?), + @not_pinned($($(#[$($attr:tt)*])* $fvis:vis $field:ident : $type:ty),* $(,)?), + ) => { + $crate::macros::paste! { + #[doc(hidden)] + $vis struct [< $name Projection >] <'__pin, $($decl_generics)*> { + $($(#[$($p_attr)*])* $pvis $p_field : ::core::pin::Pin<&'__pin mut $p_type>,)* + $($(#[$($attr)*])* $fvis $field : &'__pin mut $type,)* + ___pin_phantom_data: ::core::marker::PhantomData<&'__pin mut ()>, + } + + impl<$($impl_generics)*> $name<$($ty_generics)*> + where $($whr)* + { + /// Pin-projects all fields of `Self`. + /// + /// These fields are structurally pinned: + $(#[doc = ::core::concat!(" - `", ::core::stringify!($p_field), "`")])* + /// + /// These fields are **not** structurally pinned: + $(#[doc = ::core::concat!(" - `", ::core::stringify!($field), "`")])* + #[inline] + $vis fn project<'__pin>( + self: ::core::pin::Pin<&'__pin mut Self>, + ) -> [< $name Projection >] <'__pin, $($ty_generics)*> { + // SAFETY: we only give access to `&mut` for fields not structurally pinned. + let this = unsafe { ::core::pin::Pin::get_unchecked_mut(self) }; + [< $name Projection >] { + $( + // SAFETY: `$p_field` is structurally pinned. + $(#[$($p_attr)*])* + $p_field : unsafe { ::core::pin::Pin::new_unchecked(&mut this.$p_field) }, + )* + $( + $(#[$($attr)*])* + $field : &mut this.$field, + )* + ___pin_phantom_data: ::core::marker::PhantomData, + } + } + } + } + }; (make_pin_data: @pin_data($pin_data:ident), @impl_generics($($impl_generics:tt)*), From 1fa516794fdd27b96cee77f8b12ac916b8b6a9a7 Mon Sep 17 00:00:00 2001 From: Benno Lossin Date: Fri, 5 Sep 2025 16:05:31 +0200 Subject: [PATCH 53/64] rust: pin-init: add code blocks to `[try_][pin_]init!` macros Allow writing `_: { /* any number of statements */ }` in initializers to run arbitrary code during initialization. try_init!(MyStruct { _: { if check_something() { return Err(MyError); } }, foo: Foo::new(val), _: { println!("successfully initialized `MyStruct`"); }, }) Tested-by: Alexandre Courbot Reviewed-by: Gary Guo Reviewed-by: Alice Ryhl Tested-by: Danilo Krummrich Reviewed-by: Danilo Krummrich Signed-off-by: Benno Lossin --- rust/pin-init/src/lib.rs | 2 ++ rust/pin-init/src/macros.rs | 29 +++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/rust/pin-init/src/lib.rs b/rust/pin-init/src/lib.rs index 2d0d9fd12524..dd553212836e 100644 --- a/rust/pin-init/src/lib.rs +++ b/rust/pin-init/src/lib.rs @@ -740,6 +740,8 @@ macro_rules! stack_try_pin_init { /// As already mentioned in the examples above, inside of `pin_init!` a `struct` initializer with /// the following modifications is expected: /// - Fields that you want to initialize in-place have to use `<-` instead of `:`. +/// - You can use `_: { /* run any user-code here */ },` anywhere where you can place fields in +/// order to run arbitrary code. /// - In front of the initializer you can write `&this in` to have access to a [`NonNull`] /// pointer named `this` inside of the initializer. /// - Using struct update syntax one can place `..Zeroable::init_zeroed()` at the very end of the diff --git a/rust/pin-init/src/macros.rs b/rust/pin-init/src/macros.rs index 668dcee9b7af..c3462080b7b6 100644 --- a/rust/pin-init/src/macros.rs +++ b/rust/pin-init/src/macros.rs @@ -1263,6 +1263,21 @@ fn assert_zeroable(_: *mut T) {} // have been initialized. Therefore we can now dismiss the guards by forgetting them. $(::core::mem::forget($guards);)* }; + (init_slot($($use_data:ident)?): + @data($data:ident), + @slot($slot:ident), + @guards($($guards:ident,)*), + // arbitrary code block + @munch_fields(_: { $($code:tt)* }, $($rest:tt)*), + ) => { + { $($code)* } + $crate::__init_internal!(init_slot($($use_data)?): + @data($data), + @slot($slot), + @guards($($guards,)*), + @munch_fields($($rest)*), + ); + }; (init_slot($use_data:ident): // `use_data` is present, so we use the `data` to init fields. @data($data:ident), @slot($slot:ident), @@ -1358,6 +1373,20 @@ fn assert_zeroable(_: *mut T) {} ); } }; + (make_initializer: + @slot($slot:ident), + @type_name($t:path), + @munch_fields(_: { $($code:tt)* }, $($rest:tt)*), + @acc($($acc:tt)*), + ) => { + // code blocks are ignored for the initializer check + $crate::__init_internal!(make_initializer: + @slot($slot), + @type_name($t), + @munch_fields($($rest)*), + @acc($($acc)*), + ); + }; (make_initializer: @slot($slot:ident), @type_name($t:path), From 42415d163e5df6db799c7de6262d707e402c2c7e Mon Sep 17 00:00:00 2001 From: Benno Lossin Date: Fri, 5 Sep 2025 16:00:46 +0200 Subject: [PATCH 54/64] rust: pin-init: add references to previously initialized fields After initializing a field in an initializer macro, create a variable holding a reference that points at that field. The type is either `Pin<&mut T>` or `&mut T` depending on the field's structural pinning kind. [ Applied fixes to devres and rust_driver_pci sample - Benno] Reviewed-by: Danilo Krummrich Signed-off-by: Benno Lossin --- rust/kernel/devres.rs | 6 +- rust/pin-init/src/macros.rs | 149 ++++++++++++++++++++++++-------- samples/rust/rust_driver_pci.rs | 2 +- 3 files changed, 118 insertions(+), 39 deletions(-) diff --git a/rust/kernel/devres.rs b/rust/kernel/devres.rs index da18091143a6..91dbf3f4b166 100644 --- a/rust/kernel/devres.rs +++ b/rust/kernel/devres.rs @@ -134,11 +134,9 @@ pub fn new<'a, E>( T: 'a, Error: From, { - let callback = Self::devres_callback; - try_pin_init!(&this in Self { dev: dev.into(), - callback, + callback: Self::devres_callback, // INVARIANT: `inner` is properly initialized. inner <- { // SAFETY: `this` is a valid pointer to uninitialized memory. @@ -151,7 +149,7 @@ pub fn new<'a, E>( // properly initialized, because we require `dev` (i.e. the *bound* device) to // live at least as long as the returned `impl PinInit`. to_result(unsafe { - bindings::devm_add_action(dev.as_raw(), Some(callback), inner.cast()) + bindings::devm_add_action(dev.as_raw(), Some(*callback), inner.cast()) })?; Opaque::pin_init(try_pin_init!(Inner { diff --git a/rust/pin-init/src/macros.rs b/rust/pin-init/src/macros.rs index c3462080b7b6..d6acf2cd291e 100644 --- a/rust/pin-init/src/macros.rs +++ b/rust/pin-init/src/macros.rs @@ -1049,38 +1049,56 @@ impl<$($impl_generics)*> $name<$($ty_generics)*> @pinned($($(#[$($p_attr:tt)*])* $pvis:vis $p_field:ident : $p_type:ty),* $(,)?), @not_pinned($($(#[$($attr:tt)*])* $fvis:vis $field:ident : $type:ty),* $(,)?), ) => { - // For every field, we create a projection function according to its projection type. If a - // field is structurally pinned, then it must be initialized via `PinInit`, if it is not - // structurally pinned, then it can be initialized via `Init`. - // - // The functions are `unsafe` to prevent accidentally calling them. - #[allow(dead_code)] - #[expect(clippy::missing_safety_doc)] - impl<$($impl_generics)*> $pin_data<$($ty_generics)*> - where $($whr)* - { - $( - $(#[$($p_attr)*])* - $pvis unsafe fn $p_field( - self, - slot: *mut $p_type, - init: impl $crate::PinInit<$p_type, E>, - ) -> ::core::result::Result<(), E> { - // SAFETY: TODO. - unsafe { $crate::PinInit::__pinned_init(init, slot) } - } - )* - $( - $(#[$($attr)*])* - $fvis unsafe fn $field( - self, - slot: *mut $type, - init: impl $crate::Init<$type, E>, - ) -> ::core::result::Result<(), E> { - // SAFETY: TODO. - unsafe { $crate::Init::__init(init, slot) } - } - )* + $crate::macros::paste! { + // For every field, we create a projection function according to its projection type. If a + // field is structurally pinned, then it must be initialized via `PinInit`, if it is not + // structurally pinned, then it can be initialized via `Init`. + // + // The functions are `unsafe` to prevent accidentally calling them. + #[allow(dead_code)] + #[expect(clippy::missing_safety_doc)] + impl<$($impl_generics)*> $pin_data<$($ty_generics)*> + where $($whr)* + { + $( + $(#[$($p_attr)*])* + $pvis unsafe fn $p_field( + self, + slot: *mut $p_type, + init: impl $crate::PinInit<$p_type, E>, + ) -> ::core::result::Result<(), E> { + // SAFETY: TODO. + unsafe { $crate::PinInit::__pinned_init(init, slot) } + } + + $(#[$($p_attr)*])* + $pvis unsafe fn [<__project_ $p_field>]<'__slot>( + self, + slot: &'__slot mut $p_type, + ) -> ::core::pin::Pin<&'__slot mut $p_type> { + ::core::pin::Pin::new_unchecked(slot) + } + )* + $( + $(#[$($attr)*])* + $fvis unsafe fn $field( + self, + slot: *mut $type, + init: impl $crate::Init<$type, E>, + ) -> ::core::result::Result<(), E> { + // SAFETY: TODO. + unsafe { $crate::Init::__init(init, slot) } + } + + $(#[$($attr)*])* + $fvis unsafe fn [<__project_ $field>]<'__slot>( + self, + slot: &'__slot mut $type, + ) -> &'__slot mut $type { + slot + } + )* + } } }; } @@ -1292,6 +1310,13 @@ fn assert_zeroable(_: *mut T) {} // return when an error/panic occurs. // We also use the `data` to require the correct trait (`Init` or `PinInit`) for `$field`. unsafe { $data.$field(::core::ptr::addr_of_mut!((*$slot).$field), init)? }; + // SAFETY: + // - the project function does the correct field projection, + // - the field has been initialized, + // - the reference is only valid until the end of the initializer. + #[allow(unused_variables)] + let $field = $crate::macros::paste!(unsafe { $data.[< __project_ $field >](&mut (*$slot).$field) }); + // Create the drop guard: // // We rely on macro hygiene to make it impossible for users to access this local variable. @@ -1323,6 +1348,14 @@ fn assert_zeroable(_: *mut T) {} // SAFETY: `slot` is valid, because we are inside of an initializer closure, we // return when an error/panic occurs. unsafe { $crate::Init::__init(init, ::core::ptr::addr_of_mut!((*$slot).$field))? }; + + // SAFETY: + // - the field is not structurally pinned, since the line above must compile, + // - the field has been initialized, + // - the reference is only valid until the end of the initializer. + #[allow(unused_variables)] + let $field = unsafe { &mut (*$slot).$field }; + // Create the drop guard: // // We rely on macro hygiene to make it impossible for users to access this local variable. @@ -1341,7 +1374,7 @@ fn assert_zeroable(_: *mut T) {} ); } }; - (init_slot($($use_data:ident)?): + (init_slot(): // No `use_data`, so all fields are not structurally pinned @data($data:ident), @slot($slot:ident), @guards($($guards:ident,)*), @@ -1355,6 +1388,15 @@ fn assert_zeroable(_: *mut T) {} // SAFETY: The memory at `slot` is uninitialized. unsafe { ::core::ptr::write(::core::ptr::addr_of_mut!((*$slot).$field), $field) }; } + + #[allow(unused_variables)] + // SAFETY: + // - the field is not structurally pinned, since no `use_data` was required to create this + // initializer, + // - the field has been initialized, + // - the reference is only valid until the end of the initializer. + let $field = unsafe { &mut (*$slot).$field }; + // Create the drop guard: // // We rely on macro hygiene to make it impossible for users to access this local variable. @@ -1365,7 +1407,46 @@ fn assert_zeroable(_: *mut T) {} $crate::__internal::DropGuard::new(::core::ptr::addr_of_mut!((*$slot).$field)) }; - $crate::__init_internal!(init_slot($($use_data)?): + $crate::__init_internal!(init_slot(): + @data($data), + @slot($slot), + @guards([< __ $field _guard >], $($guards,)*), + @munch_fields($($rest)*), + ); + } + }; + (init_slot($use_data:ident): + @data($data:ident), + @slot($slot:ident), + @guards($($guards:ident,)*), + // Init by-value. + @munch_fields($field:ident $(: $val:expr)?, $($rest:tt)*), + ) => { + { + $(let $field = $val;)? + // Initialize the field. + // + // SAFETY: The memory at `slot` is uninitialized. + unsafe { ::core::ptr::write(::core::ptr::addr_of_mut!((*$slot).$field), $field) }; + } + // SAFETY: + // - the project function does the correct field projection, + // - the field has been initialized, + // - the reference is only valid until the end of the initializer. + #[allow(unused_variables)] + let $field = $crate::macros::paste!(unsafe { $data.[< __project_ $field >](&mut (*$slot).$field) }); + + // Create the drop guard: + // + // We rely on macro hygiene to make it impossible for users to access this local variable. + // We use `paste!` to create new hygiene for `$field`. + $crate::macros::paste! { + // SAFETY: We forget the guard later when initialization has succeeded. + let [< __ $field _guard >] = unsafe { + $crate::__internal::DropGuard::new(::core::ptr::addr_of_mut!((*$slot).$field)) + }; + + $crate::__init_internal!(init_slot($use_data): @data($data), @slot($slot), @guards([< __ $field _guard >], $($guards,)*), diff --git a/samples/rust/rust_driver_pci.rs b/samples/rust/rust_driver_pci.rs index 606946ff4d7f..1ac0b06fa3b3 100644 --- a/samples/rust/rust_driver_pci.rs +++ b/samples/rust/rust_driver_pci.rs @@ -78,8 +78,8 @@ fn probe(pdev: &pci::Device, info: &Self::IdInfo) -> Result let drvdata = KBox::pin_init( try_pin_init!(Self { - pdev: pdev.into(), bar <- pdev.iomap_region_sized::<{ Regs::END }>(0, c_str!("rust_driver_pci")), + pdev: pdev.into(), index: *info, }), GFP_KERNEL, From f0fbbff7e3082b0e1065aef83cc1418ed2cb1b69 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Sat, 13 Sep 2025 23:12:14 +0900 Subject: [PATCH 55/64] gpu: nova-core: require `Send` on `FalconEngine` and `FalconHal` We want to store the GSP and SEC2 falcon instances inside the `Gpu` structure, but doing so require these types to implement `Send` for `pci::Driver` to remain implementable on `NovaCore`, which embeds `Gpu`. All implementors of `FalconEngine` and `FalconHal` satisfy the requirements of `Send`, and these traits also already required `Sync`, so this a minor tweak. Acked-by: Danilo Krummrich Link: https://lore.kernel.org/r/20250913-nova_firmware-v6-1-9007079548b0@nvidia.com Signed-off-by: Alexandre Courbot --- drivers/gpu/nova-core/falcon.rs | 2 +- drivers/gpu/nova-core/falcon/hal.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs index 2dbcdf26697b..b16207e7242f 100644 --- a/drivers/gpu/nova-core/falcon.rs +++ b/drivers/gpu/nova-core/falcon.rs @@ -286,7 +286,7 @@ fn from(value: bool) -> Self { /// Each engine provides one base for `PFALCON` and `PFALCON2` registers. The `ID` constant is used /// to identify a given Falcon instance with register I/O methods. pub(crate) trait FalconEngine: - Sync + RegisterBase + RegisterBase + Sized + Send + Sync + RegisterBase + RegisterBase + Sized { /// Singleton of the engine, used to identify it with register I/O methods. const ID: Self; diff --git a/drivers/gpu/nova-core/falcon/hal.rs b/drivers/gpu/nova-core/falcon/hal.rs index b233bc365882..bba288455617 100644 --- a/drivers/gpu/nova-core/falcon/hal.rs +++ b/drivers/gpu/nova-core/falcon/hal.rs @@ -13,7 +13,7 @@ /// Implements chipset-specific low-level operations. The trait is generic against [`FalconEngine`] /// so its `BASE` parameter can be used in order to avoid runtime bound checks when accessing /// registers. -pub(crate) trait FalconHal: Sync { +pub(crate) trait FalconHal: Send + Sync { /// Activates the Falcon core if the engine is a risvc/falcon dual engine. fn select_core(&self, _falcon: &Falcon, _bar: &Bar0) -> Result { Ok(()) From e7c96980ea4d93e79b43b07c5489d700207b0055 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Sat, 13 Sep 2025 23:12:15 +0900 Subject: [PATCH 56/64] gpu: nova-core: move GSP boot code to its own module Right now the GSP boot code is very incomplete and limited to running FRTS, so having it in `Gpu::new` is not a big constraint. However, this will change as we add more steps of the GSP boot process, and not all GPU families follow the same procedure, so having these steps in a dedicated method is the logical construct. There is also the fact the GSP will require its own runtime data, and while it won't immediately need to be pinned, we want to be ready for the time where it will - most likely when it starts using mutexes. Thus, add an empty `Gsp` type that is pinned inside `Gpu` and initialized using a pin initializer. This sets the constraint we need to observe from the start, and could spare us some costly refactoring down the road. Then, move the code related to GSP boot to the `gsp::boot` module, as part of the `Gsp` implementation. Doing so allows us to make `Gpu::new` return a fallible `impl PinInit` instead of a `Result.` This is more idiomatic when working with pinned objects, and sets up the pinned initialization pattern we want to preserve as the code grows more complex. Acked-by: Danilo Krummrich Link: https://lore.kernel.org/r/20250913-nova_firmware-v6-2-9007079548b0@nvidia.com Signed-off-by: Alexandre Courbot --- drivers/gpu/nova-core/driver.rs | 9 +- drivers/gpu/nova-core/gpu.rs | 159 ++++++++--------------------- drivers/gpu/nova-core/gsp.rs | 17 +++ drivers/gpu/nova-core/gsp/boot.rs | 118 +++++++++++++++++++++ drivers/gpu/nova-core/nova_core.rs | 1 + 5 files changed, 184 insertions(+), 120 deletions(-) create mode 100644 drivers/gpu/nova-core/gsp.rs create mode 100644 drivers/gpu/nova-core/gsp/boot.rs diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs index 02b3edd7bbdc..1380b47617f7 100644 --- a/drivers/gpu/nova-core/driver.rs +++ b/drivers/gpu/nova-core/driver.rs @@ -34,14 +34,19 @@ fn probe(pdev: &pci::Device, _info: &Self::IdInfo) -> Result(0, c_str!("nova-core/bar0")), GFP_KERNEL, )?; + // Used to provided a `&Bar0` to `Gpu::new` without tying it to the lifetime of + // `devres_bar`. + let bar_clone = Arc::clone(&devres_bar); + let bar = bar_clone.access(pdev.as_ref())?; + let this = KBox::pin_init( try_pin_init!(Self { - gpu <- Gpu::new(pdev, bar)?, + gpu <- Gpu::new(pdev, devres_bar, bar), _reg: auxiliary::Registration::new( pdev.as_ref(), c_str!("nova-drm"), diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs index a0878ecdc18b..3c019df01d30 100644 --- a/drivers/gpu/nova-core/gpu.rs +++ b/drivers/gpu/nova-core/gpu.rs @@ -3,15 +3,13 @@ use kernel::{device, devres::Devres, error::code::*, pci, prelude::*, sync::Arc}; use crate::driver::Bar0; -use crate::falcon::{gsp::Gsp, sec2::Sec2, Falcon}; -use crate::fb::FbLayout; +use crate::falcon::{gsp::Gsp as GspFalcon, sec2::Sec2 as Sec2Falcon, Falcon}; use crate::fb::SysmemFlush; -use crate::firmware::fwsec::{FwsecCommand, FwsecFirmware}; use crate::firmware::{Firmware, FIRMWARE_VERSION}; use crate::gfw; +use crate::gsp::Gsp; use crate::regs; use crate::util; -use crate::vbios::Vbios; use core::fmt; macro_rules! define_chipset { @@ -172,133 +170,58 @@ pub(crate) struct Gpu { /// System memory page required for flushing all pending GPU-side memory writes done through /// PCIE into system memory, via sysmembar (A GPU-initiated HW memory-barrier operation). sysmem_flush: SysmemFlush, + /// GSP falcon instance, used for GSP boot up and cleanup. + gsp_falcon: Falcon, + /// SEC2 falcon instance, used for GSP boot up and cleanup. + sec2_falcon: Falcon, + /// GSP runtime data. Temporarily an empty placeholder. + #[pin] + gsp: Gsp, } impl Gpu { - /// Helper function to load and run the FWSEC-FRTS firmware and confirm that it has properly - /// created the WPR2 region. - /// - /// TODO: this needs to be moved into a larger type responsible for booting the whole GSP - /// (`GspBooter`?). - fn run_fwsec_frts( - dev: &device::Device, - falcon: &Falcon, - bar: &Bar0, - bios: &Vbios, - fb_layout: &FbLayout, - ) -> Result<()> { - // Check that the WPR2 region does not already exists - if it does, we cannot run - // FWSEC-FRTS until the GPU is reset. - if regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI::read(bar).higher_bound() != 0 { - dev_err!( - dev, - "WPR2 region already exists - GPU needs to be reset to proceed\n" - ); - return Err(EBUSY); - } - - let fwsec_frts = FwsecFirmware::new( - dev, - falcon, - bar, - bios, - FwsecCommand::Frts { - frts_addr: fb_layout.frts.start, - frts_size: fb_layout.frts.end - fb_layout.frts.start, - }, - )?; - - // Run FWSEC-FRTS to create the WPR2 region. - fwsec_frts.run(dev, falcon, bar)?; - - // SCRATCH_E contains the error code for FWSEC-FRTS. - let frts_status = regs::NV_PBUS_SW_SCRATCH_0E_FRTS_ERR::read(bar).frts_err_code(); - if frts_status != 0 { - dev_err!( - dev, - "FWSEC-FRTS returned with error code {:#x}", - frts_status - ); - - return Err(EIO); - } - - // Check that the WPR2 region has been created as we requested. - let (wpr2_lo, wpr2_hi) = ( - regs::NV_PFB_PRI_MMU_WPR2_ADDR_LO::read(bar).lower_bound(), - regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI::read(bar).higher_bound(), - ); - - match (wpr2_lo, wpr2_hi) { - (_, 0) => { - dev_err!(dev, "WPR2 region not created after running FWSEC-FRTS\n"); - - Err(EIO) - } - (wpr2_lo, _) if wpr2_lo != fb_layout.frts.start => { - dev_err!( - dev, - "WPR2 region created at unexpected address {:#x}; expected {:#x}\n", - wpr2_lo, - fb_layout.frts.start, - ); - - Err(EIO) - } - (wpr2_lo, wpr2_hi) => { - dev_dbg!(dev, "WPR2: {:#x}-{:#x}\n", wpr2_lo, wpr2_hi); - dev_dbg!(dev, "GPU instance built\n"); - - Ok(()) - } - } - } - - pub(crate) fn new( - pdev: &pci::Device, + pub(crate) fn new<'a>( + pdev: &'a pci::Device, devres_bar: Arc>, - ) -> Result> { - let bar = devres_bar.access(pdev.as_ref())?; - let spec = Spec::new(bar)?; - let fw = Firmware::new(pdev.as_ref(), spec.chipset, FIRMWARE_VERSION)?; + bar: &'a Bar0, + ) -> impl PinInit + 'a { + try_pin_init!(Self { + spec: Spec::new(bar).inspect(|spec| { + dev_info!( + pdev.as_ref(), + "NVIDIA (Chipset: {}, Architecture: {:?}, Revision: {})\n", + spec.chipset, + spec.chipset.arch(), + spec.revision + ); + })?, - dev_info!( - pdev.as_ref(), - "NVIDIA (Chipset: {}, Architecture: {:?}, Revision: {})\n", - spec.chipset, - spec.chipset.arch(), - spec.revision - ); + // We must wait for GFW_BOOT completion before doing any significant setup on the GPU. + _: { + gfw::wait_gfw_boot_completion(bar) + .inspect_err(|_| dev_err!(pdev.as_ref(), "GFW boot did not complete"))?; + }, - // We must wait for GFW_BOOT completion before doing any significant setup on the GPU. - gfw::wait_gfw_boot_completion(bar) - .inspect_err(|_| dev_err!(pdev.as_ref(), "GFW boot did not complete"))?; + fw <- Firmware::new(pdev.as_ref(), spec.chipset, FIRMWARE_VERSION)?, - let sysmem_flush = SysmemFlush::register(pdev.as_ref(), bar, spec.chipset)?; + sysmem_flush: SysmemFlush::register(pdev.as_ref(), bar, spec.chipset)?, - let gsp_falcon = Falcon::::new( - pdev.as_ref(), - spec.chipset, - bar, - spec.chipset > Chipset::GA100, - )?; - gsp_falcon.clear_swgen0_intr(bar); + gsp_falcon: Falcon::new( + pdev.as_ref(), + spec.chipset, + bar, + spec.chipset > Chipset::GA100, + ) + .inspect(|falcon| falcon.clear_swgen0_intr(bar))?, - let _sec2_falcon = Falcon::::new(pdev.as_ref(), spec.chipset, bar, true)?; + sec2_falcon: Falcon::new(pdev.as_ref(), spec.chipset, bar, true)?, - let fb_layout = FbLayout::new(spec.chipset, bar)?; - dev_dbg!(pdev.as_ref(), "{:#x?}\n", fb_layout); + gsp <- Gsp::new(), - let bios = Vbios::new(pdev.as_ref(), bar)?; + _: { gsp.boot(pdev, bar, spec.chipset, gsp_falcon, sec2_falcon)? }, - Self::run_fwsec_frts(pdev.as_ref(), &gsp_falcon, bar, &bios, &fb_layout)?; - - Ok(pin_init!(Self { - spec, bar: devres_bar, - fw, - sysmem_flush, - })) + }) } /// Called when the corresponding [`Device`](device::Device) is unbound. diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs new file mode 100644 index 000000000000..33cc4f2011c1 --- /dev/null +++ b/drivers/gpu/nova-core/gsp.rs @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-2.0 + +mod boot; + +use kernel::prelude::*; + +/// GSP runtime data. +/// +/// This is an empty pinned placeholder for now. +#[pin_data] +pub(crate) struct Gsp {} + +impl Gsp { + pub(crate) fn new() -> impl PinInit { + pin_init!(Self {}) + } +} diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs new file mode 100644 index 000000000000..6625a11ccc88 --- /dev/null +++ b/drivers/gpu/nova-core/gsp/boot.rs @@ -0,0 +1,118 @@ +// SPDX-License-Identifier: GPL-2.0 + +use kernel::device; +use kernel::pci; +use kernel::prelude::*; + +use crate::driver::Bar0; +use crate::falcon::{gsp::Gsp, sec2::Sec2, Falcon}; +use crate::fb::FbLayout; +use crate::firmware::fwsec::{FwsecCommand, FwsecFirmware}; +use crate::gpu::Chipset; +use crate::regs; +use crate::vbios::Vbios; + +impl super::Gsp { + /// Helper function to load and run the FWSEC-FRTS firmware and confirm that it has properly + /// created the WPR2 region. + fn run_fwsec_frts( + dev: &device::Device, + falcon: &Falcon, + bar: &Bar0, + bios: &Vbios, + fb_layout: &FbLayout, + ) -> Result<()> { + // Check that the WPR2 region does not already exists - if it does, we cannot run + // FWSEC-FRTS until the GPU is reset. + if regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI::read(bar).higher_bound() != 0 { + dev_err!( + dev, + "WPR2 region already exists - GPU needs to be reset to proceed\n" + ); + return Err(EBUSY); + } + + let fwsec_frts = FwsecFirmware::new( + dev, + falcon, + bar, + bios, + FwsecCommand::Frts { + frts_addr: fb_layout.frts.start, + frts_size: fb_layout.frts.end - fb_layout.frts.start, + }, + )?; + + // Run FWSEC-FRTS to create the WPR2 region. + fwsec_frts.run(dev, falcon, bar)?; + + // SCRATCH_E contains the error code for FWSEC-FRTS. + let frts_status = regs::NV_PBUS_SW_SCRATCH_0E_FRTS_ERR::read(bar).frts_err_code(); + if frts_status != 0 { + dev_err!( + dev, + "FWSEC-FRTS returned with error code {:#x}", + frts_status + ); + + return Err(EIO); + } + + // Check that the WPR2 region has been created as we requested. + let (wpr2_lo, wpr2_hi) = ( + regs::NV_PFB_PRI_MMU_WPR2_ADDR_LO::read(bar).lower_bound(), + regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI::read(bar).higher_bound(), + ); + + match (wpr2_lo, wpr2_hi) { + (_, 0) => { + dev_err!(dev, "WPR2 region not created after running FWSEC-FRTS\n"); + + Err(EIO) + } + (wpr2_lo, _) if wpr2_lo != fb_layout.frts.start => { + dev_err!( + dev, + "WPR2 region created at unexpected address {:#x}; expected {:#x}\n", + wpr2_lo, + fb_layout.frts.start, + ); + + Err(EIO) + } + (wpr2_lo, wpr2_hi) => { + dev_dbg!(dev, "WPR2: {:#x}-{:#x}\n", wpr2_lo, wpr2_hi); + dev_dbg!(dev, "GPU instance built\n"); + + Ok(()) + } + } + } + + /// Attempt to boot the GSP. + /// + /// This is a GPU-dependent and complex procedure that involves loading firmware files from + /// user-space, patching them with signatures, and building firmware-specific intricate data + /// structures that the GSP will use at runtime. + /// + /// Upon return, the GSP is up and running, and its runtime object given as return value. + pub(crate) fn boot( + self: Pin<&mut Self>, + pdev: &pci::Device, + bar: &Bar0, + chipset: Chipset, + gsp_falcon: &Falcon, + _sec2_falcon: &Falcon, + ) -> Result { + let dev = pdev.as_ref(); + + let bios = Vbios::new(dev, bar)?; + + let fb_layout = FbLayout::new(chipset, bar)?; + dev_dbg!(dev, "{:#x?}\n", fb_layout); + + Self::run_fwsec_frts(dev, gsp_falcon, bar, &bios, &fb_layout)?; + + Ok(()) + } +} diff --git a/drivers/gpu/nova-core/nova_core.rs b/drivers/gpu/nova-core/nova_core.rs index cb2bbb30cba1..fffcaee2249f 100644 --- a/drivers/gpu/nova-core/nova_core.rs +++ b/drivers/gpu/nova-core/nova_core.rs @@ -9,6 +9,7 @@ mod firmware; mod gfw; mod gpu; +mod gsp; mod regs; mod util; mod vbios; From b345c917d7c1e3841d286560afca476ab7caff74 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Sat, 13 Sep 2025 23:12:16 +0900 Subject: [PATCH 57/64] gpu: nova-core: add Chipset::name() method There are a few cases where we need the lowercase name of a given chipset, notably to resolve firmware files paths for dynamic loading or to build the module information. So far, we relied on a static `NAMES` array for the latter, and some CString hackery for the former. Replace both with a new `name` const method that returns the lowercase name of a chipset instance. We can generate it using the `paste!` macro. Using this method removes the need to create a `CString` when loading firmware, and lets us remove a couple of utility functions that now have no user. Acked-by: Danilo Krummrich Link: https://lore.kernel.org/r/20250913-nova_firmware-v6-3-9007079548b0@nvidia.com Signed-off-by: Alexandre Courbot --- drivers/gpu/nova-core/firmware.rs | 8 +++----- drivers/gpu/nova-core/gpu.rs | 25 +++++++++++++++++-------- drivers/gpu/nova-core/util.rs | 20 -------------------- 3 files changed, 20 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/nova-core/firmware.rs b/drivers/gpu/nova-core/firmware.rs index 2931912ddba0..213d4506a53f 100644 --- a/drivers/gpu/nova-core/firmware.rs +++ b/drivers/gpu/nova-core/firmware.rs @@ -30,9 +30,7 @@ pub(crate) struct Firmware { impl Firmware { pub(crate) fn new(dev: &device::Device, chipset: Chipset, ver: &str) -> Result { - let mut chip_name = CString::try_from_fmt(fmt!("{chipset}"))?; - chip_name.make_ascii_lowercase(); - let chip_name = &*chip_name; + let chip_name = chipset.name(); let request = |name_| { CString::try_from_fmt(fmt!("nvidia/{chip_name}/gsp/{name_}-{ver}.bin")) @@ -180,8 +178,8 @@ pub(crate) const fn create( let mut this = Self(firmware::ModInfoBuilder::new(module_name)); let mut i = 0; - while i < gpu::Chipset::NAMES.len() { - this = this.make_entry_chipset(gpu::Chipset::NAMES[i]); + while i < gpu::Chipset::ALL.len() { + this = this.make_entry_chipset(gpu::Chipset::ALL[i].name()); i += 1; } diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs index 3c019df01d30..d2395335727a 100644 --- a/drivers/gpu/nova-core/gpu.rs +++ b/drivers/gpu/nova-core/gpu.rs @@ -9,7 +9,6 @@ use crate::gfw; use crate::gsp::Gsp; use crate::regs; -use crate::util; use core::fmt; macro_rules! define_chipset { @@ -26,13 +25,23 @@ impl Chipset { $( Chipset::$variant, )* ]; - pub(crate) const NAMES: [&'static str; Self::ALL.len()] = [ - $( util::const_bytes_to_str( - util::to_lowercase_bytes::<{ stringify!($variant).len() }>( - stringify!($variant) - ).as_slice() - ), )* - ]; + ::kernel::macros::paste!( + /// Returns the name of this chipset, in lowercase. + /// + /// # Examples + /// + /// ``` + /// let chipset = Chipset::GA102; + /// assert_eq!(chipset.name(), "ga102"); + /// ``` + pub(crate) const fn name(&self) -> &'static str { + match *self { + $( + Chipset::$variant => stringify!([<$variant:lower>]), + )* + } + } + ); } // TODO[FPRI]: replace with something like derive(FromPrimitive) diff --git a/drivers/gpu/nova-core/util.rs b/drivers/gpu/nova-core/util.rs index 76cedf3710d7..bf35f00cb732 100644 --- a/drivers/gpu/nova-core/util.rs +++ b/drivers/gpu/nova-core/util.rs @@ -3,26 +3,6 @@ use kernel::prelude::*; use kernel::time::{Delta, Instant, Monotonic}; -pub(crate) const fn to_lowercase_bytes(s: &str) -> [u8; N] { - let src = s.as_bytes(); - let mut dst = [0; N]; - let mut i = 0; - - while i < src.len() && i < N { - dst[i] = (src[i] as char).to_ascii_lowercase() as u8; - i += 1; - } - - dst -} - -pub(crate) const fn const_bytes_to_str(bytes: &[u8]) -> &str { - match core::str::from_utf8(bytes) { - Ok(string) => string, - Err(_) => kernel::build_error!("Bytes are not valid UTF-8."), - } -} - /// Wait until `cond` is true or `timeout` elapsed. /// /// When `cond` evaluates to `Some`, its return value is returned. From ebe658732c6325aa3f4063726af00c0d12449d3c Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Sat, 13 Sep 2025 23:12:17 +0900 Subject: [PATCH 58/64] gpu: nova-core: firmware: move firmware request code into a function When all the firmware files are loaded from `Firmware::new`, it makes sense to have the firmware request code as a closure. However, since we eventually want each individual firmware constructor to request its own file (and get rid of `Firmware` altogether), move this code into a dedicated function that can be called by individual firmware types. Acked-by: Danilo Krummrich Link: https://lore.kernel.org/r/20250913-nova_firmware-v6-4-9007079548b0@nvidia.com Signed-off-by: Alexandre Courbot --- drivers/gpu/nova-core/firmware.rs | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/nova-core/firmware.rs b/drivers/gpu/nova-core/firmware.rs index 213d4506a53f..8e40a671eca8 100644 --- a/drivers/gpu/nova-core/firmware.rs +++ b/drivers/gpu/nova-core/firmware.rs @@ -19,6 +19,19 @@ pub(crate) const FIRMWARE_VERSION: &str = "535.113.01"; +/// Requests the GPU firmware `name` suitable for `chipset`, with version `ver`. +fn request_firmware( + dev: &device::Device, + chipset: gpu::Chipset, + name: &str, + ver: &str, +) -> Result { + let chip_name = chipset.name(); + + CString::try_from_fmt(fmt!("nvidia/{chip_name}/gsp/{name}-{ver}.bin")) + .and_then(|path| firmware::Firmware::request(&path, dev)) +} + /// Structure encapsulating the firmware blobs required for the GPU to operate. #[expect(dead_code)] pub(crate) struct Firmware { @@ -30,12 +43,7 @@ pub(crate) struct Firmware { impl Firmware { pub(crate) fn new(dev: &device::Device, chipset: Chipset, ver: &str) -> Result { - let chip_name = chipset.name(); - - let request = |name_| { - CString::try_from_fmt(fmt!("nvidia/{chip_name}/gsp/{name_}-{ver}.bin")) - .and_then(|path| firmware::Firmware::request(&path, dev)) - }; + let request = |name| request_firmware(dev, chipset, name, ver); Ok(Firmware { booter_load: request("booter_load")?, From d6cb7319e64eecbdaa63e54a38dec03b30a4fdfd Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Sat, 13 Sep 2025 23:12:18 +0900 Subject: [PATCH 59/64] gpu: nova-core: firmware: add support for common firmware header Several firmware files loaded from userspace feature a common header that describes their payload. Add basic support for it so subsequent patches can leverage it. Acked-by: Danilo Krummrich Link: https://lore.kernel.org/r/20250913-nova_firmware-v6-5-9007079548b0@nvidia.com Signed-off-by: Alexandre Courbot --- drivers/gpu/nova-core/firmware.rs | 62 +++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/drivers/gpu/nova-core/firmware.rs b/drivers/gpu/nova-core/firmware.rs index 8e40a671eca8..dbc62c00d95b 100644 --- a/drivers/gpu/nova-core/firmware.rs +++ b/drivers/gpu/nova-core/firmware.rs @@ -4,11 +4,13 @@ //! to be loaded into a given execution unit. use core::marker::PhantomData; +use core::mem::size_of; use kernel::device; use kernel::firmware; use kernel::prelude::*; use kernel::str::CString; +use kernel::transmute::FromBytes; use crate::dma::DmaObject; use crate::falcon::FalconFirmware; @@ -156,6 +158,66 @@ fn no_patch_signature(self) -> FirmwareDmaObject { } } +/// Header common to most firmware files. +#[repr(C)] +#[derive(Debug, Clone)] +struct BinHdr { + /// Magic number, must be `0x10de`. + bin_magic: u32, + /// Version of the header. + bin_ver: u32, + /// Size in bytes of the binary (to be ignored). + bin_size: u32, + /// Offset of the start of the application-specific header. + header_offset: u32, + /// Offset of the start of the data payload. + data_offset: u32, + /// Size in bytes of the data payload. + data_size: u32, +} + +// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. +unsafe impl FromBytes for BinHdr {} + +// A firmware blob starting with a `BinHdr`. +struct BinFirmware<'a> { + hdr: BinHdr, + fw: &'a [u8], +} + +#[expect(dead_code)] +impl<'a> BinFirmware<'a> { + /// Interpret `fw` as a firmware image starting with a [`BinHdr`], and returns the + /// corresponding [`BinFirmware`] that can be used to extract its payload. + fn new(fw: &'a firmware::Firmware) -> Result { + const BIN_MAGIC: u32 = 0x10de; + let fw = fw.data(); + + fw.get(0..size_of::()) + // Extract header. + .and_then(BinHdr::from_bytes_copy) + // Validate header. + .and_then(|hdr| { + if hdr.bin_magic == BIN_MAGIC { + Some(hdr) + } else { + None + } + }) + .map(|hdr| Self { hdr, fw }) + .ok_or(EINVAL) + } + + /// Returns the data payload of the firmware, or `None` if the data range is out of bounds of + /// the firmware image. + fn data(&self) -> Option<&[u8]> { + let fw_start = self.hdr.data_offset as usize; + let fw_size = self.hdr.data_size as usize; + + self.fw.get(fw_start..fw_start + fw_size) + } +} + pub(crate) struct ModInfoBuilder(firmware::ModInfoBuilder); impl ModInfoBuilder { From 3e5c9681bf86461d26e9db85eeb9b8cbaa256e6a Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Sat, 13 Sep 2025 23:12:19 +0900 Subject: [PATCH 60/64] gpu: nova-core: firmware: process Booter and patch its signature The Booter signed firmware is an essential part of bringing up the GSP on Turing and Ampere. It is loaded on the sec2 falcon core and is responsible for loading and running the RISC-V GSP bootloader into the GSP core. Add support for parsing the Booter firmware loaded from userspace, patch its signatures, and store it into a form that is ready to be loaded and executed on the sec2 falcon. Then, move the Booter instance from the `Firmware` struct to the `start_gsp` method since it doesn't need to be kept after the GSP is booted. We do not run Booter yet, as its own payload (the GSP bootloader and firmware image) still need to be prepared. Acked-by: Danilo Krummrich Link: https://lore.kernel.org/r/20250913-nova_firmware-v6-6-9007079548b0@nvidia.com Signed-off-by: Alexandre Courbot --- drivers/gpu/nova-core/falcon.rs | 4 +- drivers/gpu/nova-core/firmware.rs | 6 +- drivers/gpu/nova-core/firmware/booter.rs | 375 +++++++++++++++++++++++ drivers/gpu/nova-core/gsp/boot.rs | 17 +- 4 files changed, 393 insertions(+), 9 deletions(-) create mode 100644 drivers/gpu/nova-core/firmware/booter.rs diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs index b16207e7242f..37e6298195e4 100644 --- a/drivers/gpu/nova-core/falcon.rs +++ b/drivers/gpu/nova-core/falcon.rs @@ -293,7 +293,7 @@ pub(crate) trait FalconEngine: } /// Represents a portion of the firmware to be loaded into a particular memory (e.g. IMEM or DMEM). -#[derive(Debug)] +#[derive(Debug, Clone)] pub(crate) struct FalconLoadTarget { /// Offset from the start of the source object to copy from. pub(crate) src_start: u32, @@ -304,7 +304,7 @@ pub(crate) struct FalconLoadTarget { } /// Parameters for the falcon boot ROM. -#[derive(Debug)] +#[derive(Debug, Clone)] pub(crate) struct FalconBromParams { /// Offset in `DMEM`` of the firmware's signature. pub(crate) pkc_data_offset: u32, diff --git a/drivers/gpu/nova-core/firmware.rs b/drivers/gpu/nova-core/firmware.rs index dbc62c00d95b..d821744d9961 100644 --- a/drivers/gpu/nova-core/firmware.rs +++ b/drivers/gpu/nova-core/firmware.rs @@ -17,6 +17,7 @@ use crate::gpu; use crate::gpu::Chipset; +pub(crate) mod booter; pub(crate) mod fwsec; pub(crate) const FIRMWARE_VERSION: &str = "535.113.01"; @@ -37,8 +38,6 @@ fn request_firmware( /// Structure encapsulating the firmware blobs required for the GPU to operate. #[expect(dead_code)] pub(crate) struct Firmware { - booter_load: firmware::Firmware, - booter_unload: firmware::Firmware, bootloader: firmware::Firmware, gsp: firmware::Firmware, } @@ -48,8 +47,6 @@ pub(crate) fn new(dev: &device::Device, chipset: Chipset, ver: &str) -> Result { fw: &'a [u8], } -#[expect(dead_code)] impl<'a> BinFirmware<'a> { /// Interpret `fw` as a firmware image starting with a [`BinHdr`], and returns the /// corresponding [`BinFirmware`] that can be used to extract its payload. diff --git a/drivers/gpu/nova-core/firmware/booter.rs b/drivers/gpu/nova-core/firmware/booter.rs new file mode 100644 index 000000000000..b4ff1b17e4a0 --- /dev/null +++ b/drivers/gpu/nova-core/firmware/booter.rs @@ -0,0 +1,375 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Support for loading and patching the `Booter` firmware. `Booter` is a Heavy Secured firmware +//! running on [`Sec2`], that is used on Turing/Ampere to load the GSP firmware into the GSP falcon +//! (and optionally unload it through a separate firmware image). + +use core::marker::PhantomData; +use core::mem::size_of; +use core::ops::Deref; + +use kernel::device; +use kernel::prelude::*; +use kernel::transmute::FromBytes; + +use crate::dma::DmaObject; +use crate::driver::Bar0; +use crate::falcon::sec2::Sec2; +use crate::falcon::{Falcon, FalconBromParams, FalconFirmware, FalconLoadParams, FalconLoadTarget}; +use crate::firmware::{BinFirmware, FirmwareDmaObject, FirmwareSignature, Signed, Unsigned}; +use crate::gpu::Chipset; + +/// Local convenience function to return a copy of `S` by reinterpreting the bytes starting at +/// `offset` in `slice`. +fn frombytes_at(slice: &[u8], offset: usize) -> Result { + slice + .get(offset..offset + size_of::()) + .and_then(S::from_bytes_copy) + .ok_or(EINVAL) +} + +/// Heavy-Secured firmware header. +/// +/// Such firmwares have an application-specific payload that needs to be patched with a given +/// signature. +#[repr(C)] +#[derive(Debug, Clone)] +struct HsHeaderV2 { + /// Offset to the start of the signatures. + sig_prod_offset: u32, + /// Size in bytes of the signatures. + sig_prod_size: u32, + /// Offset to a `u32` containing the location at which to patch the signature in the microcode + /// image. + patch_loc_offset: u32, + /// Offset to a `u32` containing the index of the signature to patch. + patch_sig_offset: u32, + /// Start offset to the signature metadata. + meta_data_offset: u32, + /// Size in bytes of the signature metadata. + meta_data_size: u32, + /// Offset to a `u32` containing the number of signatures in the signatures section. + num_sig_offset: u32, + /// Offset of the application-specific header. + header_offset: u32, + /// Size in bytes of the application-specific header. + header_size: u32, +} + +// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. +unsafe impl FromBytes for HsHeaderV2 {} + +/// Heavy-Secured Firmware image container. +/// +/// This provides convenient access to the fields of [`HsHeaderV2`] that are actually indices to +/// read from in the firmware data. +struct HsFirmwareV2<'a> { + hdr: HsHeaderV2, + fw: &'a [u8], +} + +impl<'a> HsFirmwareV2<'a> { + /// Interprets the header of `bin_fw` as a [`HsHeaderV2`] and returns an instance of + /// `HsFirmwareV2` for further parsing. + /// + /// Fails if the header pointed at by `bin_fw` is not within the bounds of the firmware image. + fn new(bin_fw: &BinFirmware<'a>) -> Result { + frombytes_at::(bin_fw.fw, bin_fw.hdr.header_offset as usize) + .map(|hdr| Self { hdr, fw: bin_fw.fw }) + } + + /// Returns the location at which the signatures should be patched in the microcode image. + /// + /// Fails if the offset of the patch location is outside the bounds of the firmware + /// image. + fn patch_location(&self) -> Result { + frombytes_at::(self.fw, self.hdr.patch_loc_offset as usize) + } + + /// Returns an iterator to the signatures of the firmware. The iterator can be empty if the + /// firmware is unsigned. + /// + /// Fails if the pointed signatures are outside the bounds of the firmware image. + fn signatures_iter(&'a self) -> Result>> { + let num_sig = frombytes_at::(self.fw, self.hdr.num_sig_offset as usize)?; + let iter = match self.hdr.sig_prod_size.checked_div(num_sig) { + // If there are no signatures, return an iterator that will yield zero elements. + None => (&[] as &[u8]).chunks_exact(1), + Some(sig_size) => { + let patch_sig = frombytes_at::(self.fw, self.hdr.patch_sig_offset as usize)?; + let signatures_start = (self.hdr.sig_prod_offset + patch_sig) as usize; + + self.fw + // Get signatures range. + .get(signatures_start..signatures_start + self.hdr.sig_prod_size as usize) + .ok_or(EINVAL)? + .chunks_exact(sig_size as usize) + } + }; + + // Map the byte slices into signatures. + Ok(iter.map(BooterSignature)) + } +} + +/// Signature parameters, as defined in the firmware. +#[repr(C)] +struct HsSignatureParams { + /// Fuse version to use. + fuse_ver: u32, + /// Mask of engine IDs this firmware applies to. + engine_id_mask: u32, + /// ID of the microcode. + ucode_id: u32, +} + +// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. +unsafe impl FromBytes for HsSignatureParams {} + +impl HsSignatureParams { + /// Returns the signature parameters contained in `hs_fw`. + /// + /// Fails if the meta data parameter of `hs_fw` is outside the bounds of the firmware image, or + /// if its size doesn't match that of [`HsSignatureParams`]. + fn new(hs_fw: &HsFirmwareV2<'_>) -> Result { + let start = hs_fw.hdr.meta_data_offset as usize; + let end = start + .checked_add(hs_fw.hdr.meta_data_size as usize) + .ok_or(EINVAL)?; + + hs_fw + .fw + .get(start..end) + .and_then(Self::from_bytes_copy) + .ok_or(EINVAL) + } +} + +/// Header for code and data load offsets. +#[repr(C)] +#[derive(Debug, Clone)] +struct HsLoadHeaderV2 { + // Offset at which the code starts. + os_code_offset: u32, + // Total size of the code, for all apps. + os_code_size: u32, + // Offset at which the data starts. + os_data_offset: u32, + // Size of the data. + os_data_size: u32, + // Number of apps following this header. Each app is described by a [`HsLoadHeaderV2App`]. + num_apps: u32, +} + +// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. +unsafe impl FromBytes for HsLoadHeaderV2 {} + +impl HsLoadHeaderV2 { + /// Returns the load header contained in `hs_fw`. + /// + /// Fails if the header pointed at by `hs_fw` is not within the bounds of the firmware image. + fn new(hs_fw: &HsFirmwareV2<'_>) -> Result { + frombytes_at::(hs_fw.fw, hs_fw.hdr.header_offset as usize) + } +} + +/// Header for app code loader. +#[repr(C)] +#[derive(Debug, Clone)] +struct HsLoadHeaderV2App { + /// Offset at which to load the app code. + offset: u32, + /// Length in bytes of the app code. + len: u32, +} + +// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. +unsafe impl FromBytes for HsLoadHeaderV2App {} + +impl HsLoadHeaderV2App { + /// Returns the [`HsLoadHeaderV2App`] for app `idx` of `hs_fw`. + /// + /// Fails if `idx` is larger than the number of apps declared in `hs_fw`, or if the header is + /// not within the bounds of the firmware image. + fn new(hs_fw: &HsFirmwareV2<'_>, idx: u32) -> Result { + let load_hdr = HsLoadHeaderV2::new(hs_fw)?; + if idx >= load_hdr.num_apps { + Err(EINVAL) + } else { + frombytes_at::( + hs_fw.fw, + (hs_fw.hdr.header_offset as usize) + // Skip the load header... + .checked_add(size_of::()) + // ... and jump to app header `idx`. + .and_then(|offset| { + offset.checked_add((idx as usize).checked_mul(size_of::())?) + }) + .ok_or(EINVAL)?, + ) + } + } +} + +/// Signature for Booter firmware. Their size is encoded into the header and not known a compile +/// time, so we just wrap a byte slices on which we can implement [`FirmwareSignature`]. +struct BooterSignature<'a>(&'a [u8]); + +impl<'a> AsRef<[u8]> for BooterSignature<'a> { + fn as_ref(&self) -> &[u8] { + self.0 + } +} + +impl<'a> FirmwareSignature for BooterSignature<'a> {} + +/// The `Booter` loader firmware, responsible for loading the GSP. +pub(crate) struct BooterFirmware { + // Load parameters for `IMEM` falcon memory. + imem_load_target: FalconLoadTarget, + // Load parameters for `DMEM` falcon memory. + dmem_load_target: FalconLoadTarget, + // BROM falcon parameters. + brom_params: FalconBromParams, + // Device-mapped firmware image. + ucode: FirmwareDmaObject, +} + +impl FirmwareDmaObject { + fn new_booter(dev: &device::Device, data: &[u8]) -> Result { + DmaObject::from_data(dev, data).map(|ucode| Self(ucode, PhantomData)) + } +} + +#[derive(Copy, Clone, Debug, PartialEq)] +pub(crate) enum BooterKind { + Loader, + #[expect(unused)] + Unloader, +} + +impl BooterFirmware { + /// Parses the Booter firmware contained in `fw`, and patches the correct signature so it is + /// ready to be loaded and run on `falcon`. + pub(crate) fn new( + dev: &device::Device, + kind: BooterKind, + chipset: Chipset, + ver: &str, + falcon: &Falcon<::Target>, + bar: &Bar0, + ) -> Result { + let fw_name = match kind { + BooterKind::Loader => "booter_load", + BooterKind::Unloader => "booter_unload", + }; + let fw = super::request_firmware(dev, chipset, fw_name, ver)?; + let bin_fw = BinFirmware::new(&fw)?; + + // The binary firmware embeds a Heavy-Secured firmware. + let hs_fw = HsFirmwareV2::new(&bin_fw)?; + + // The Heavy-Secured firmware embeds a firmware load descriptor. + let load_hdr = HsLoadHeaderV2::new(&hs_fw)?; + + // Offset in `ucode` where to patch the signature. + let patch_loc = hs_fw.patch_location()?; + + let sig_params = HsSignatureParams::new(&hs_fw)?; + let brom_params = FalconBromParams { + // `load_hdr.os_data_offset` is an absolute index, but `pkc_data_offset` is from the + // signature patch location. + pkc_data_offset: patch_loc + .checked_sub(load_hdr.os_data_offset) + .ok_or(EINVAL)?, + engine_id_mask: u16::try_from(sig_params.engine_id_mask).map_err(|_| EINVAL)?, + ucode_id: u8::try_from(sig_params.ucode_id).map_err(|_| EINVAL)?, + }; + let app0 = HsLoadHeaderV2App::new(&hs_fw, 0)?; + + // Object containing the firmware microcode to be signature-patched. + let ucode = bin_fw + .data() + .ok_or(EINVAL) + .and_then(|data| FirmwareDmaObject::::new_booter(dev, data))?; + + let ucode_signed = { + let mut signatures = hs_fw.signatures_iter()?.peekable(); + + if signatures.peek().is_none() { + // If there are no signatures, then the firmware is unsigned. + ucode.no_patch_signature() + } else { + // Obtain the version from the fuse register, and extract the corresponding + // signature. + let reg_fuse_version = falcon.signature_reg_fuse_version( + bar, + brom_params.engine_id_mask, + brom_params.ucode_id, + )?; + + // `0` means the last signature should be used. + const FUSE_VERSION_USE_LAST_SIG: u32 = 0; + let signature = match reg_fuse_version { + FUSE_VERSION_USE_LAST_SIG => signatures.last(), + // Otherwise hardware fuse version needs to be subtracted to obtain the index. + reg_fuse_version => { + let Some(idx) = sig_params.fuse_ver.checked_sub(reg_fuse_version) else { + dev_err!(dev, "invalid fuse version for Booter firmware\n"); + return Err(EINVAL); + }; + signatures.nth(idx as usize) + } + } + .ok_or(EINVAL)?; + + ucode.patch_signature(&signature, patch_loc as usize)? + } + }; + + Ok(Self { + imem_load_target: FalconLoadTarget { + src_start: app0.offset, + dst_start: 0, + len: app0.len, + }, + dmem_load_target: FalconLoadTarget { + src_start: load_hdr.os_data_offset, + dst_start: 0, + len: load_hdr.os_data_size, + }, + brom_params, + ucode: ucode_signed, + }) + } +} + +impl FalconLoadParams for BooterFirmware { + fn imem_load_params(&self) -> FalconLoadTarget { + self.imem_load_target.clone() + } + + fn dmem_load_params(&self) -> FalconLoadTarget { + self.dmem_load_target.clone() + } + + fn brom_params(&self) -> FalconBromParams { + self.brom_params.clone() + } + + fn boot_addr(&self) -> u32 { + self.imem_load_target.src_start + } +} + +impl Deref for BooterFirmware { + type Target = DmaObject; + + fn deref(&self) -> &Self::Target { + &self.ucode.0 + } +} + +impl FalconFirmware for BooterFirmware { + type Target = Sec2; +} diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs index 6625a11ccc88..faa553635b7c 100644 --- a/drivers/gpu/nova-core/gsp/boot.rs +++ b/drivers/gpu/nova-core/gsp/boot.rs @@ -7,7 +7,11 @@ use crate::driver::Bar0; use crate::falcon::{gsp::Gsp, sec2::Sec2, Falcon}; use crate::fb::FbLayout; -use crate::firmware::fwsec::{FwsecCommand, FwsecFirmware}; +use crate::firmware::{ + booter::{BooterFirmware, BooterKind}, + fwsec::{FwsecCommand, FwsecFirmware}, + FIRMWARE_VERSION, +}; use crate::gpu::Chipset; use crate::regs; use crate::vbios::Vbios; @@ -102,7 +106,7 @@ pub(crate) fn boot( bar: &Bar0, chipset: Chipset, gsp_falcon: &Falcon, - _sec2_falcon: &Falcon, + sec2_falcon: &Falcon, ) -> Result { let dev = pdev.as_ref(); @@ -113,6 +117,15 @@ pub(crate) fn boot( Self::run_fwsec_frts(dev, gsp_falcon, bar, &bios, &fb_layout)?; + let _booter_loader = BooterFirmware::new( + dev, + BooterKind::Loader, + chipset, + FIRMWARE_VERSION, + sec2_falcon, + bar, + )?; + Ok(()) } } From a841614e607c9e232dd56ec726ba63d2750025a2 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Sat, 13 Sep 2025 23:12:20 +0900 Subject: [PATCH 61/64] gpu: nova-core: firmware: process and prepare the GSP firmware The GSP firmware is a binary blob that is verified, loaded, and run by the GSP bootloader. Its presentation is a bit peculiar as the GSP bootloader expects to be given a DMA address to a 3-levels page table mapping the GSP firmware at address 0 of its own address space. Prepare such a structure containing the DMA-mapped firmware as well as the DMA-mapped page tables, and a way to obtain the DMA handle of the level 0 page table. Then, move the GSP firmware instance from the `Firmware` struct to the `start_gsp` method since it doesn't need to be kept after the GSP is booted. As we are performing the required ELF section parsing and radix3 page table building, remove these items from the TODO file. Acked-by: Danilo Krummrich Link: https://lore.kernel.org/r/20250913-nova_firmware-v6-7-9007079548b0@nvidia.com Signed-off-by: Alexandre Courbot --- Documentation/gpu/nova/core/todo.rst | 17 -- drivers/gpu/nova-core/firmware.rs | 3 +- drivers/gpu/nova-core/firmware/gsp.rs | 236 ++++++++++++++++++++++++++ drivers/gpu/nova-core/gsp.rs | 3 + drivers/gpu/nova-core/gsp/boot.rs | 6 + 5 files changed, 246 insertions(+), 19 deletions(-) create mode 100644 drivers/gpu/nova-core/firmware/gsp.rs diff --git a/Documentation/gpu/nova/core/todo.rst b/Documentation/gpu/nova/core/todo.rst index 48b20656dcb1..091a2fb78c6b 100644 --- a/Documentation/gpu/nova/core/todo.rst +++ b/Documentation/gpu/nova/core/todo.rst @@ -230,23 +230,6 @@ Rust abstraction for debugfs APIs. GPU (general) ============= -Parse firmware headers ----------------------- - -Parse ELF headers from the firmware files loaded from the filesystem. - -| Reference: ELF utils -| Complexity: Beginner -| Contact: Abdiel Janulgue - -Build radix3 page table ------------------------ - -Build the radix3 page table to map the firmware. - -| Complexity: Intermediate -| Contact: Abdiel Janulgue - Initial Devinit support ----------------------- diff --git a/drivers/gpu/nova-core/firmware.rs b/drivers/gpu/nova-core/firmware.rs index d821744d9961..10200d11bbbf 100644 --- a/drivers/gpu/nova-core/firmware.rs +++ b/drivers/gpu/nova-core/firmware.rs @@ -19,6 +19,7 @@ pub(crate) mod booter; pub(crate) mod fwsec; +pub(crate) mod gsp; pub(crate) const FIRMWARE_VERSION: &str = "535.113.01"; @@ -39,7 +40,6 @@ fn request_firmware( #[expect(dead_code)] pub(crate) struct Firmware { bootloader: firmware::Firmware, - gsp: firmware::Firmware, } impl Firmware { @@ -48,7 +48,6 @@ pub(crate) fn new(dev: &device::Device, chipset: Chipset, ver: &str) -> Result(elf: &'a [u8], name: &'b str) -> Option<&'a [u8]> { + let hdr = &elf + .get(0..size_of::()) + .and_then(Elf64Hdr::from_bytes)? + .0; + + // Get all the section headers. + let mut shdr = { + let shdr_num = usize::from(hdr.e_shnum); + let shdr_start = usize::try_from(hdr.e_shoff).ok()?; + let shdr_end = shdr_num + .checked_mul(size_of::()) + .and_then(|v| v.checked_add(shdr_start))?; + + elf.get(shdr_start..shdr_end) + .map(|slice| slice.chunks_exact(size_of::()))? + }; + + // Get the strings table. + let strhdr = shdr + .clone() + .nth(usize::from(hdr.e_shstrndx)) + .and_then(Elf64SHdr::from_bytes)?; + + // Find the section which name matches `name` and return it. + shdr.find(|&sh| { + let Some(hdr) = Elf64SHdr::from_bytes(sh) else { + return false; + }; + + let Some(name_idx) = strhdr + .0 + .sh_offset + .checked_add(u64::from(hdr.0.sh_name)) + .and_then(|idx| usize::try_from(idx).ok()) + else { + return false; + }; + + // Get the start of the name. + elf.get(name_idx..) + // Stop at the first `0`. + .and_then(|nstr| nstr.get(0..=nstr.iter().position(|b| *b == 0)?)) + // Convert into CStr. This should never fail because of the line above. + .and_then(|nstr| CStr::from_bytes_with_nul(nstr).ok()) + // Convert into str. + .and_then(|c_str| c_str.to_str().ok()) + // Check that the name matches. + .map(|str| str == name) + .unwrap_or(false) + }) + // Return the slice containing the section. + .and_then(|sh| { + let hdr = Elf64SHdr::from_bytes(sh)?; + let start = usize::try_from(hdr.0.sh_offset).ok()?; + let end = usize::try_from(hdr.0.sh_size) + .ok() + .and_then(|sh_size| start.checked_add(sh_size))?; + + elf.get(start..end) + }) + } +} + +/// GSP firmware with 3-level radix page tables for the GSP bootloader. +/// +/// The bootloader expects firmware to be mapped starting at address 0 in GSP's virtual address +/// space: +/// +/// ```text +/// Level 0: 1 page, 1 entry -> points to first level 1 page +/// Level 1: Multiple pages/entries -> each entry points to a level 2 page +/// Level 2: Multiple pages/entries -> each entry points to a firmware page +/// ``` +/// +/// Each page is 4KB, each entry is 8 bytes (64-bit DMA address). +/// Also known as "Radix3" firmware. +#[pin_data] +pub(crate) struct GspFirmware { + /// The GSP firmware inside a [`VVec`], device-mapped via a SG table. + #[pin] + fw: SGTable>>, + /// Level 2 page table whose entries contain DMA addresses of firmware pages. + #[pin] + level2: SGTable>>, + /// Level 1 page table whose entries contain DMA addresses of level 2 pages. + #[pin] + level1: SGTable>>, + /// Level 0 page table (single 4KB page) with one entry: DMA address of first level 1 page. + level0: DmaObject, + /// Size in bytes of the firmware contained in [`Self::fw`]. + size: usize, + /// Device-mapped GSP signatures matching the GPU's [`Chipset`]. + signatures: DmaObject, +} + +impl GspFirmware { + /// Loads the GSP firmware binaries, map them into `dev`'s address-space, and creates the page + /// tables expected by the GSP bootloader to load it. + pub(crate) fn new<'a, 'b>( + dev: &'a device::Device, + chipset: Chipset, + ver: &'b str, + ) -> Result + 'a> { + let fw = super::request_firmware(dev, chipset, "gsp", ver)?; + + let fw_section = elf::elf64_section(fw.data(), ".fwimage").ok_or(EINVAL)?; + + let sigs_section = match chipset.arch() { + Architecture::Ampere => ".fwsignature_ga10x", + _ => return Err(ENOTSUPP), + }; + let signatures = elf::elf64_section(fw.data(), sigs_section) + .ok_or(EINVAL) + .and_then(|data| DmaObject::from_data(dev, data))?; + + let size = fw_section.len(); + + // Move the firmware into a vmalloc'd vector and map it into the device address + // space. + let fw_vvec = VVec::with_capacity(fw_section.len(), GFP_KERNEL) + .and_then(|mut v| { + v.extend_from_slice(fw_section, GFP_KERNEL)?; + Ok(v) + }) + .map_err(|_| ENOMEM)?; + + Ok(try_pin_init!(Self { + fw <- SGTable::new(dev, fw_vvec, DataDirection::ToDevice, GFP_KERNEL), + level2 <- { + // Allocate the level 2 page table, map the firmware onto it, and map it into the + // device address space. + VVec::::with_capacity( + fw.iter().count() * core::mem::size_of::(), + GFP_KERNEL, + ) + .map_err(|_| ENOMEM) + .and_then(|level2| map_into_lvl(&fw, level2)) + .map(|level2| SGTable::new(dev, level2, DataDirection::ToDevice, GFP_KERNEL))? + }, + level1 <- { + // Allocate the level 1 page table, map the level 2 page table onto it, and map it + // into the device address space. + VVec::::with_capacity( + level2.iter().count() * core::mem::size_of::(), + GFP_KERNEL, + ) + .map_err(|_| ENOMEM) + .and_then(|level1| map_into_lvl(&level2, level1)) + .map(|level1| SGTable::new(dev, level1, DataDirection::ToDevice, GFP_KERNEL))? + }, + level0: { + // Allocate the level 0 page table as a device-visible DMA object, and map the + // level 1 page table onto it. + + // Level 0 page table data. + let mut level0_data = kvec![0u8; GSP_PAGE_SIZE]?; + + // Fill level 1 page entry. + #[allow(clippy::useless_conversion)] + let level1_entry = u64::from(level1.iter().next().unwrap().dma_address()); + let dst = &mut level0_data[..size_of_val(&level1_entry)]; + dst.copy_from_slice(&level1_entry.to_le_bytes()); + + // Turn the level0 page table into a [`DmaObject`]. + DmaObject::from_data(dev, &level0_data)? + }, + size, + signatures, + })) + } + + #[expect(unused)] + /// Returns the DMA handle of the radix3 level 0 page table. + pub(crate) fn radix3_dma_handle(&self) -> DmaAddress { + self.level0.dma_handle() + } +} + +/// Build a page table from a scatter-gather list. +/// +/// Takes each DMA-mapped region from `sg_table` and writes page table entries +/// for all 4KB pages within that region. For example, a 16KB SG entry becomes +/// 4 consecutive page table entries. +fn map_into_lvl(sg_table: &SGTable>>, mut dst: VVec) -> Result> { + for sg_entry in sg_table.iter() { + // Number of pages we need to map. + let num_pages = (sg_entry.dma_len() as usize).div_ceil(GSP_PAGE_SIZE); + + for i in 0..num_pages { + let entry = sg_entry.dma_address() + (i as u64 * GSP_PAGE_SIZE as u64); + dst.extend_from_slice(&entry.to_le_bytes(), GFP_KERNEL)?; + } + } + + Ok(dst) +} diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs index 33cc4f2011c1..917e8a83d293 100644 --- a/drivers/gpu/nova-core/gsp.rs +++ b/drivers/gpu/nova-core/gsp.rs @@ -4,6 +4,9 @@ use kernel::prelude::*; +pub(crate) const GSP_PAGE_SHIFT: usize = 12; +pub(crate) const GSP_PAGE_SIZE: usize = 1 << GSP_PAGE_SHIFT; + /// GSP runtime data. /// /// This is an empty pinned placeholder for now. diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs index faa553635b7c..2800f3aee37d 100644 --- a/drivers/gpu/nova-core/gsp/boot.rs +++ b/drivers/gpu/nova-core/gsp/boot.rs @@ -10,6 +10,7 @@ use crate::firmware::{ booter::{BooterFirmware, BooterKind}, fwsec::{FwsecCommand, FwsecFirmware}, + gsp::GspFirmware, FIRMWARE_VERSION, }; use crate::gpu::Chipset; @@ -112,6 +113,11 @@ pub(crate) fn boot( let bios = Vbios::new(dev, bar)?; + let _gsp_fw = KBox::pin_init( + GspFirmware::new(dev, chipset, FIRMWARE_VERSION)?, + GFP_KERNEL, + )?; + let fb_layout = FbLayout::new(chipset, bar)?; dev_dbg!(dev, "{:#x?}\n", fb_layout); From 015b1d36505a1ad6830dc2ead65db2d36a209c23 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Sat, 13 Sep 2025 23:12:21 +0900 Subject: [PATCH 62/64] gpu: nova-core: firmware: process the GSP bootloader The GSP bootloader is a small RISC-V firmware that is loaded by Booter onto the GSP core and is in charge of loading, validating, and starting the actual GSP firmware. It is a regular binary firmware file containing a specific header. Create a type holding the DMA-mapped firmware as well as useful information extracted from the header, and hook it into our firmware structure for later use. The GSP bootloader is stored into the `GspFirmware` structure, since it is part of the GSP firmware package. This makes the `Firmware` structure empty, so remove it. Reviewed-by: John Hubbard Acked-by: Danilo Krummrich Link: https://lore.kernel.org/r/20250913-nova_firmware-v6-8-9007079548b0@nvidia.com Signed-off-by: Alexandre Courbot --- drivers/gpu/nova-core/firmware.rs | 18 +---- drivers/gpu/nova-core/firmware/gsp.rs | 7 ++ drivers/gpu/nova-core/firmware/riscv.rs | 91 +++++++++++++++++++++++++ drivers/gpu/nova-core/gpu.rs | 4 -- 4 files changed, 99 insertions(+), 21 deletions(-) create mode 100644 drivers/gpu/nova-core/firmware/riscv.rs diff --git a/drivers/gpu/nova-core/firmware.rs b/drivers/gpu/nova-core/firmware.rs index 10200d11bbbf..dc9645a789b0 100644 --- a/drivers/gpu/nova-core/firmware.rs +++ b/drivers/gpu/nova-core/firmware.rs @@ -15,11 +15,11 @@ use crate::dma::DmaObject; use crate::falcon::FalconFirmware; use crate::gpu; -use crate::gpu::Chipset; pub(crate) mod booter; pub(crate) mod fwsec; pub(crate) mod gsp; +pub(crate) mod riscv; pub(crate) const FIRMWARE_VERSION: &str = "535.113.01"; @@ -36,22 +36,6 @@ fn request_firmware( .and_then(|path| firmware::Firmware::request(&path, dev)) } -/// Structure encapsulating the firmware blobs required for the GPU to operate. -#[expect(dead_code)] -pub(crate) struct Firmware { - bootloader: firmware::Firmware, -} - -impl Firmware { - pub(crate) fn new(dev: &device::Device, chipset: Chipset, ver: &str) -> Result { - let request = |name| request_firmware(dev, chipset, name, ver); - - Ok(Firmware { - bootloader: request("bootloader")?, - }) - } -} - /// Structure used to describe some firmwares, notably FWSEC-FRTS. #[repr(C)] #[derive(Debug, Clone)] diff --git a/drivers/gpu/nova-core/firmware/gsp.rs b/drivers/gpu/nova-core/firmware/gsp.rs index a7eda9ca0d21..9b70095434c6 100644 --- a/drivers/gpu/nova-core/firmware/gsp.rs +++ b/drivers/gpu/nova-core/firmware/gsp.rs @@ -9,6 +9,7 @@ use kernel::scatterlist::{Owned, SGTable}; use crate::dma::DmaObject; +use crate::firmware::riscv::RiscvFirmware; use crate::gpu::{Architecture, Chipset}; use crate::gsp::GSP_PAGE_SIZE; @@ -131,6 +132,8 @@ pub(crate) struct GspFirmware { size: usize, /// Device-mapped GSP signatures matching the GPU's [`Chipset`]. signatures: DmaObject, + /// GSP bootloader, verifies the GSP firmware before loading and running it. + bootloader: RiscvFirmware, } impl GspFirmware { @@ -164,6 +167,9 @@ pub(crate) fn new<'a, 'b>( }) .map_err(|_| ENOMEM)?; + let bl = super::request_firmware(dev, chipset, "bootloader", ver)?; + let bootloader = RiscvFirmware::new(dev, &bl)?; + Ok(try_pin_init!(Self { fw <- SGTable::new(dev, fw_vvec, DataDirection::ToDevice, GFP_KERNEL), level2 <- { @@ -206,6 +212,7 @@ pub(crate) fn new<'a, 'b>( }, size, signatures, + bootloader, })) } diff --git a/drivers/gpu/nova-core/firmware/riscv.rs b/drivers/gpu/nova-core/firmware/riscv.rs new file mode 100644 index 000000000000..afb08f5bc4ba --- /dev/null +++ b/drivers/gpu/nova-core/firmware/riscv.rs @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Support for firmware binaries designed to run on a RISC-V core. Such firmwares files have a +//! dedicated header. + +use core::mem::size_of; + +use kernel::device; +use kernel::firmware::Firmware; +use kernel::prelude::*; +use kernel::transmute::FromBytes; + +use crate::dma::DmaObject; +use crate::firmware::BinFirmware; + +/// Descriptor for microcode running on a RISC-V core. +#[repr(C)] +#[derive(Debug)] +struct RmRiscvUCodeDesc { + version: u32, + bootloader_offset: u32, + bootloader_size: u32, + bootloader_param_offset: u32, + bootloader_param_size: u32, + riscv_elf_offset: u32, + riscv_elf_size: u32, + app_version: u32, + manifest_offset: u32, + manifest_size: u32, + monitor_data_offset: u32, + monitor_data_size: u32, + monitor_code_offset: u32, + monitor_code_size: u32, +} + +// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. +unsafe impl FromBytes for RmRiscvUCodeDesc {} + +impl RmRiscvUCodeDesc { + /// Interprets the header of `bin_fw` as a [`RmRiscvUCodeDesc`] and returns it. + /// + /// Fails if the header pointed at by `bin_fw` is not within the bounds of the firmware image. + fn new(bin_fw: &BinFirmware<'_>) -> Result { + let offset = bin_fw.hdr.header_offset as usize; + + bin_fw + .fw + .get(offset..offset + size_of::()) + .and_then(Self::from_bytes_copy) + .ok_or(EINVAL) + } +} + +/// A parsed firmware for a RISC-V core, ready to be loaded and run. +#[expect(unused)] +pub(crate) struct RiscvFirmware { + /// Offset at which the code starts in the firmware image. + code_offset: u32, + /// Offset at which the data starts in the firmware image. + data_offset: u32, + /// Offset at which the manifest starts in the firmware image. + manifest_offset: u32, + /// Application version. + app_version: u32, + /// Device-mapped firmware image. + ucode: DmaObject, +} + +impl RiscvFirmware { + /// Parses the RISC-V firmware image contained in `fw`. + pub(crate) fn new(dev: &device::Device, fw: &Firmware) -> Result { + let bin_fw = BinFirmware::new(fw)?; + + let riscv_desc = RmRiscvUCodeDesc::new(&bin_fw)?; + + let ucode = { + let start = bin_fw.hdr.data_offset as usize; + let len = bin_fw.hdr.data_size as usize; + + DmaObject::from_data(dev, fw.data().get(start..start + len).ok_or(EINVAL)?)? + }; + + Ok(Self { + ucode, + code_offset: riscv_desc.monitor_code_offset, + data_offset: riscv_desc.monitor_data_offset, + manifest_offset: riscv_desc.manifest_offset, + app_version: riscv_desc.app_version, + }) + } +} diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs index d2395335727a..5da9ad726483 100644 --- a/drivers/gpu/nova-core/gpu.rs +++ b/drivers/gpu/nova-core/gpu.rs @@ -5,7 +5,6 @@ use crate::driver::Bar0; use crate::falcon::{gsp::Gsp as GspFalcon, sec2::Sec2 as Sec2Falcon, Falcon}; use crate::fb::SysmemFlush; -use crate::firmware::{Firmware, FIRMWARE_VERSION}; use crate::gfw; use crate::gsp::Gsp; use crate::regs; @@ -175,7 +174,6 @@ pub(crate) struct Gpu { spec: Spec, /// MMIO mapping of PCI BAR 0 bar: Arc>, - fw: Firmware, /// System memory page required for flushing all pending GPU-side memory writes done through /// PCIE into system memory, via sysmembar (A GPU-initiated HW memory-barrier operation). sysmem_flush: SysmemFlush, @@ -211,8 +209,6 @@ pub(crate) fn new<'a>( .inspect_err(|_| dev_err!(pdev.as_ref(), "GFW boot did not complete"))?; }, - fw <- Firmware::new(pdev.as_ref(), spec.chipset, FIRMWARE_VERSION)?, - sysmem_flush: SysmemFlush::register(pdev.as_ref(), bar, spec.chipset)?, gsp_falcon: Falcon::new( From cb05748b4295d811e47f3adc324b021a919cd4d5 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Sat, 13 Sep 2025 23:12:22 +0900 Subject: [PATCH 63/64] gpu: nova-core: firmware: use 570.144 firmware 570.144 is the latest available into linux-firmware as of this commit, and the one we will use to start development of nova-core. It should eventually be dropped for a newer version before the driver becomes able to do anything useful. The newer firmware is expected to iron out some of the inelegances of 570.144, notably related to packaging. Reviewed-by: John Hubbard Acked-by: Danilo Krummrich Link: https://lore.kernel.org/r/20250913-nova_firmware-v6-9-9007079548b0@nvidia.com Signed-off-by: Alexandre Courbot --- drivers/gpu/nova-core/firmware.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/nova-core/firmware.rs b/drivers/gpu/nova-core/firmware.rs index dc9645a789b0..4179a74a2342 100644 --- a/drivers/gpu/nova-core/firmware.rs +++ b/drivers/gpu/nova-core/firmware.rs @@ -21,7 +21,7 @@ pub(crate) mod gsp; pub(crate) mod riscv; -pub(crate) const FIRMWARE_VERSION: &str = "535.113.01"; +pub(crate) const FIRMWARE_VERSION: &str = "570.144"; /// Requests the GPU firmware `name` suitable for `chipset`, with version `ver`. fn request_firmware( From 299eb32863e584cfff7c6b667c3e92ae7d4d2bf9 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Sat, 13 Sep 2025 23:12:23 +0900 Subject: [PATCH 64/64] gpu: nova-core: Add base files for r570.144 firmware bindings Interacting with the GSP currently requires using definitions from C header files. Rust definitions for the types needed for Nova core will be generated using the Rust bindgen tool. This patch adds the base module to allow inclusion of the generated bindings. The generated bindings themselves are added by subsequent patches when they are first used. Currently we only intend to support a single firmware version, 570.144, with these bindings. Longer term we intend to move to a more stable GSP interface that isn't tied to specific firmware versions. Signed-off-by: Alistair Popple Reviewed-by: John Hubbard [acourbot@nvidia.com: adapt the bindings module comment a bit] Acked-by: Danilo Krummrich Link: https://lore.kernel.org/r/20250913-nova_firmware-v6-10-9007079548b0@nvidia.com Signed-off-by: Alexandre Courbot --- drivers/gpu/nova-core/gsp.rs | 2 ++ drivers/gpu/nova-core/gsp/fw.rs | 7 +++++ drivers/gpu/nova-core/gsp/fw/r570_144.rs | 29 +++++++++++++++++++ .../gpu/nova-core/gsp/fw/r570_144/bindings.rs | 1 + 4 files changed, 39 insertions(+) create mode 100644 drivers/gpu/nova-core/gsp/fw.rs create mode 100644 drivers/gpu/nova-core/gsp/fw/r570_144.rs create mode 100644 drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs index 917e8a83d293..64e472e7a9d3 100644 --- a/drivers/gpu/nova-core/gsp.rs +++ b/drivers/gpu/nova-core/gsp.rs @@ -4,6 +4,8 @@ use kernel::prelude::*; +mod fw; + pub(crate) const GSP_PAGE_SHIFT: usize = 12; pub(crate) const GSP_PAGE_SIZE: usize = 1 << GSP_PAGE_SHIFT; diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw.rs new file mode 100644 index 000000000000..34226dd00982 --- /dev/null +++ b/drivers/gpu/nova-core/gsp/fw.rs @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 + +mod r570_144; + +// Alias to avoid repeating the version number with every use. +#[expect(unused)] +use r570_144 as bindings; diff --git a/drivers/gpu/nova-core/gsp/fw/r570_144.rs b/drivers/gpu/nova-core/gsp/fw/r570_144.rs new file mode 100644 index 000000000000..35cb0370a7c9 --- /dev/null +++ b/drivers/gpu/nova-core/gsp/fw/r570_144.rs @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Firmware bindings. +//! +//! Imports the generated bindings by `bindgen`. +//! +//! This module may not be directly used. Please abstract or re-export the needed symbols in the +//! parent module instead. + +#![cfg_attr(test, allow(deref_nullptr))] +#![cfg_attr(test, allow(unaligned_references))] +#![cfg_attr(test, allow(unsafe_op_in_unsafe_fn))] +#![allow( + dead_code, + unused_imports, + clippy::all, + clippy::undocumented_unsafe_blocks, + clippy::ptr_as_ptr, + clippy::ref_as_ptr, + missing_docs, + non_camel_case_types, + non_upper_case_globals, + non_snake_case, + improper_ctypes, + unreachable_pub, + unsafe_op_in_unsafe_fn +)] +use kernel::ffi; +include!("r570_144/bindings.rs"); diff --git a/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs new file mode 100644 index 000000000000..cec594032515 --- /dev/null +++ b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs @@ -0,0 +1 @@ +// SPDX-License-Identifier: GPL-2.0