linux/drivers/infiniband/hw/mlx5/ib_rep.c
Chiara Meiohas 8d159eb211 RDMA/mlx5: Use IB set_netdev and get_netdev functions
The IB layer provides a common interface to store and get net
devices associated to an IB device port (ib_device_set_netdev()
and ib_device_get_netdev()).
Previously, mlx5_ib stored and managed the associated net devices
internally.

Replace internal net device management in mlx5_ib with
ib_device_set_netdev() when attaching/detaching  a net device and
ib_device_get_netdev() when retrieving the net device.

Export ib_device_get_netdev().

For mlx5 representors/PFs/VFs and lag creation we replace the netdev
assignments with the IB set/get netdev functions.

In active-backup mode lag the active slave net device is stored in the
lag itself. To assure the net device stored in a lag bond IB device is
the active slave we implement the following:
- mlx5_core: when modifying the slave of a bond we send the internal driver event
  MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE.
- mlx5_ib: when catching the event call ib_device_set_netdev()

This patch also ensures the correct IB events are sent in switchdev lag.

While at it, when in multiport eswitch mode, only a single IB device is
created for all ports. The said IB device will receive all netdev events
of its VFs once loaded, thus to avoid overwriting the mapping of PF IB
device to PF netdev, ignore NETDEV_REGISTER events if the ib device has
already been mapped to a netdev.

Signed-off-by: Chiara Meiohas <cmeiohas@nvidia.com>
Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
Link: https://patch.msgid.link/20240909173025.30422-6-michaelgur@nvidia.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
2024-09-13 08:27:40 +03:00

275 lines
6.6 KiB
C

// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2018 Mellanox Technologies. All rights reserved.
*/
#include <linux/mlx5/vport.h>
#include "ib_rep.h"
#include "srq.h"
static int
mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev,
struct mlx5_eswitch_rep *rep,
int vport_index)
{
struct mlx5_ib_dev *ibdev;
struct net_device *ndev;
ibdev = mlx5_eswitch_uplink_get_proto_dev(dev->priv.eswitch, REP_IB);
if (!ibdev)
return -EINVAL;
ibdev->port[vport_index].rep = rep;
rep->rep_data[REP_IB].priv = ibdev;
ndev = mlx5_ib_get_rep_netdev(rep->esw, rep->vport);
return ib_device_set_netdev(&ibdev->ib_dev, ndev, vport_index + 1);
}
static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev);
static void mlx5_ib_num_ports_update(struct mlx5_core_dev *dev, u32 *num_ports)
{
struct mlx5_core_dev *peer_dev;
int i;
mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
u32 peer_num_ports = mlx5_eswitch_get_total_vports(peer_dev);
if (mlx5_lag_is_mpesw(peer_dev))
*num_ports += peer_num_ports;
else
/* Only 1 ib port is the representor for all uplinks */
*num_ports += peer_num_ports - 1;
}
}
static int
mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
{
u32 num_ports = mlx5_eswitch_get_total_vports(dev);
struct mlx5_core_dev *lag_master = dev;
const struct mlx5_ib_profile *profile;
struct mlx5_core_dev *peer_dev;
struct mlx5_ib_dev *ibdev;
int new_uplink = false;
int vport_index;
int ret;
int i;
vport_index = rep->vport_index;
if (mlx5_lag_is_shared_fdb(dev)) {
if (mlx5_lag_is_master(dev)) {
mlx5_ib_num_ports_update(dev, &num_ports);
} else {
if (rep->vport == MLX5_VPORT_UPLINK) {
if (!mlx5_lag_is_mpesw(dev))
return 0;
new_uplink = true;
}
mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
u32 peer_n_ports = mlx5_eswitch_get_total_vports(peer_dev);
if (mlx5_lag_is_master(peer_dev))
lag_master = peer_dev;
else if (!mlx5_lag_is_mpesw(dev))
/* Only 1 ib port is the representor for all uplinks */
peer_n_ports--;
if (mlx5_get_dev_index(peer_dev) < mlx5_get_dev_index(dev))
vport_index += peer_n_ports;
}
}
}
if (rep->vport == MLX5_VPORT_UPLINK && !new_uplink)
profile = &raw_eth_profile;
else
return mlx5_ib_set_vport_rep(lag_master, rep, vport_index);
ibdev = ib_alloc_device(mlx5_ib_dev, ib_dev);
if (!ibdev)
return -ENOMEM;
ibdev->port = kcalloc(num_ports, sizeof(*ibdev->port),
GFP_KERNEL);
if (!ibdev->port) {
ret = -ENOMEM;
goto fail_port;
}
ibdev->is_rep = true;
vport_index = rep->vport_index;
ibdev->port[vport_index].rep = rep;
ibdev->mdev = lag_master;
ibdev->num_ports = num_ports;
ibdev->ib_dev.phys_port_cnt = num_ports;
ret = ib_device_set_netdev(&ibdev->ib_dev,
mlx5_ib_get_rep_netdev(lag_master->priv.eswitch,
rep->vport),
vport_index + 1);
if (ret)
goto fail_add;
ret = __mlx5_ib_add(ibdev, profile);
if (ret)
goto fail_add;
rep->rep_data[REP_IB].priv = ibdev;
if (mlx5_lag_is_shared_fdb(lag_master))
mlx5_ib_register_peer_vport_reps(lag_master);
return 0;
fail_add:
kfree(ibdev->port);
fail_port:
ib_dealloc_device(&ibdev->ib_dev);
return ret;
}
static void *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep)
{
return rep->rep_data[REP_IB].priv;
}
static void
mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
{
struct mlx5_core_dev *mdev = mlx5_eswitch_get_core_dev(rep->esw);
struct mlx5_ib_dev *dev = mlx5_ib_rep_to_dev(rep);
int vport_index = rep->vport_index;
struct mlx5_ib_port *port;
int i;
if (WARN_ON(!mdev))
return;
if (!dev)
return;
if (mlx5_lag_is_shared_fdb(mdev) &&
!mlx5_lag_is_master(mdev)) {
if (rep->vport == MLX5_VPORT_UPLINK && !mlx5_lag_is_mpesw(mdev))
return;
for (i = 0; i < dev->num_ports; i++) {
if (dev->port[i].rep == rep)
break;
}
if (WARN_ON(i == dev->num_ports))
return;
vport_index = i;
}
port = &dev->port[vport_index];
ib_device_set_netdev(&dev->ib_dev, NULL, vport_index + 1);
rep->rep_data[REP_IB].priv = NULL;
port->rep = NULL;
if (rep->vport == MLX5_VPORT_UPLINK) {
if (mlx5_lag_is_shared_fdb(mdev) && !mlx5_lag_is_master(mdev))
return;
if (mlx5_lag_is_shared_fdb(mdev)) {
struct mlx5_core_dev *peer_mdev;
struct mlx5_eswitch *esw;
mlx5_lag_for_each_peer_mdev(mdev, peer_mdev, i) {
esw = peer_mdev->priv.eswitch;
mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
}
}
__mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
}
}
static const struct mlx5_eswitch_rep_ops rep_ops = {
.load = mlx5_ib_vport_rep_load,
.unload = mlx5_ib_vport_rep_unload,
.get_proto_dev = mlx5_ib_rep_to_dev,
};
static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev)
{
struct mlx5_core_dev *peer_mdev;
struct mlx5_eswitch *esw;
int i;
mlx5_lag_for_each_peer_mdev(mdev, peer_mdev, i) {
esw = peer_mdev->priv.eswitch;
mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
}
}
struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
u16 vport_num)
{
return mlx5_eswitch_get_proto_dev(esw, vport_num, REP_ETH);
}
struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
struct mlx5_ib_sq *sq,
u32 port)
{
struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
struct mlx5_eswitch_rep *rep;
if (!dev->is_rep || !port)
return NULL;
if (!dev->port[port - 1].rep)
return ERR_PTR(-EINVAL);
rep = dev->port[port - 1].rep;
return mlx5_eswitch_add_send_to_vport_rule(esw, esw, rep, sq->base.mqp.qpn);
}
static int mlx5r_rep_probe(struct auxiliary_device *adev,
const struct auxiliary_device_id *id)
{
struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev);
struct mlx5_core_dev *mdev = idev->mdev;
struct mlx5_eswitch *esw;
esw = mdev->priv.eswitch;
mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
return 0;
}
static void mlx5r_rep_remove(struct auxiliary_device *adev)
{
struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev);
struct mlx5_core_dev *mdev = idev->mdev;
struct mlx5_eswitch *esw;
esw = mdev->priv.eswitch;
mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
}
static const struct auxiliary_device_id mlx5r_rep_id_table[] = {
{ .name = MLX5_ADEV_NAME ".rdma-rep", },
{},
};
MODULE_DEVICE_TABLE(auxiliary, mlx5r_rep_id_table);
static struct auxiliary_driver mlx5r_rep_driver = {
.name = "rep",
.probe = mlx5r_rep_probe,
.remove = mlx5r_rep_remove,
.id_table = mlx5r_rep_id_table,
};
int mlx5r_rep_init(void)
{
return auxiliary_driver_register(&mlx5r_rep_driver);
}
void mlx5r_rep_cleanup(void)
{
auxiliary_driver_unregister(&mlx5r_rep_driver);
}