mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	RDMA/device: Add ib_device_get_by_netdev()
Several drivers need to find the ib_device from a given netdev. rxe needs this at speed in an unsleepable context, so choose to implement the translation using a RCU safe hash table. The hash table can have a many to one mapping. This is intended to support some future case where multiple IB drivers (ie iWarp and RoCE) connect to the same netdevs. driver_ids will need to be different to support this. In the process this makes the struct ib_device and ib_port_data RCU safe by deferring their kfrees. Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
This commit is contained in:
		
							parent
							
								
									c2261dd76b
								
							
						
					
					
						commit
						324e227ea7
					
				
					 2 changed files with 116 additions and 13 deletions
				
			
		| 
						 | 
				
			
			@ -40,6 +40,7 @@
 | 
			
		|||
#include <linux/netdevice.h>
 | 
			
		||||
#include <linux/security.h>
 | 
			
		||||
#include <linux/notifier.h>
 | 
			
		||||
#include <linux/hashtable.h>
 | 
			
		||||
#include <rdma/rdma_netlink.h>
 | 
			
		||||
#include <rdma/ib_addr.h>
 | 
			
		||||
#include <rdma/ib_cache.h>
 | 
			
		||||
| 
						 | 
				
			
			@ -134,6 +135,10 @@ static void *xan_find_marked(struct xarray *xa, unsigned long *indexp,
 | 
			
		|||
	     !xa_is_err(entry);                                                \
 | 
			
		||||
	     (index)++, entry = xan_find_marked(xa, &(index), filter))
 | 
			
		||||
 | 
			
		||||
/* RCU hash table mapping netdevice pointers to struct ib_port_data */
 | 
			
		||||
static DEFINE_SPINLOCK(ndev_hash_lock);
 | 
			
		||||
static DECLARE_HASHTABLE(ndev_hash, 5);
 | 
			
		||||
 | 
			
		||||
static void free_netdevs(struct ib_device *ib_dev);
 | 
			
		||||
static int ib_security_change(struct notifier_block *nb, unsigned long event,
 | 
			
		||||
			      void *lsm_data);
 | 
			
		||||
| 
						 | 
				
			
			@ -144,6 +149,12 @@ static struct notifier_block ibdev_lsm_nb = {
 | 
			
		|||
	.notifier_call = ib_security_change,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/* Pointer to the RCU head at the start of the ib_port_data array */
 | 
			
		||||
struct ib_port_data_rcu {
 | 
			
		||||
	struct rcu_head rcu_head;
 | 
			
		||||
	struct ib_port_data pdata[];
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static int ib_device_check_mandatory(struct ib_device *device)
 | 
			
		||||
{
 | 
			
		||||
#define IB_MANDATORY_FUNC(x) { offsetof(struct ib_device_ops, x), #x }
 | 
			
		||||
| 
						 | 
				
			
			@ -295,9 +306,12 @@ static void ib_device_release(struct device *device)
 | 
			
		|||
	WARN_ON(refcount_read(&dev->refcount));
 | 
			
		||||
	ib_cache_release_one(dev);
 | 
			
		||||
	ib_security_release_port_pkey_list(dev);
 | 
			
		||||
	kfree(dev->port_data);
 | 
			
		||||
	xa_destroy(&dev->client_data);
 | 
			
		||||
	kfree(dev);
 | 
			
		||||
	if (dev->port_data)
 | 
			
		||||
		kfree_rcu(container_of(dev->port_data, struct ib_port_data_rcu,
 | 
			
		||||
				       pdata[0]),
 | 
			
		||||
			  rcu_head);
 | 
			
		||||
	kfree_rcu(dev, rcu_head);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int ib_device_uevent(struct device *device,
 | 
			
		||||
| 
						 | 
				
			
			@ -468,6 +482,7 @@ static void remove_client_context(struct ib_device *device,
 | 
			
		|||
 | 
			
		||||
static int alloc_port_data(struct ib_device *device)
 | 
			
		||||
{
 | 
			
		||||
	struct ib_port_data_rcu *pdata_rcu;
 | 
			
		||||
	unsigned int port;
 | 
			
		||||
 | 
			
		||||
	if (device->port_data)
 | 
			
		||||
| 
						 | 
				
			
			@ -484,17 +499,26 @@ static int alloc_port_data(struct ib_device *device)
 | 
			
		|||
	 * Therefore port_data is declared as a 1 based array with potential
 | 
			
		||||
	 * empty slots at the beginning.
 | 
			
		||||
	 */
 | 
			
		||||
	device->port_data = kcalloc(rdma_end_port(device) + 1,
 | 
			
		||||
				    sizeof(*device->port_data), GFP_KERNEL);
 | 
			
		||||
	if (!device->port_data)
 | 
			
		||||
	pdata_rcu = kzalloc(struct_size(pdata_rcu, pdata,
 | 
			
		||||
					rdma_end_port(device) + 1),
 | 
			
		||||
			    GFP_KERNEL);
 | 
			
		||||
	if (!pdata_rcu)
 | 
			
		||||
		return -ENOMEM;
 | 
			
		||||
	/*
 | 
			
		||||
	 * The rcu_head is put in front of the port data array and the stored
 | 
			
		||||
	 * pointer is adjusted since we never need to see that member until
 | 
			
		||||
	 * kfree_rcu.
 | 
			
		||||
	 */
 | 
			
		||||
	device->port_data = pdata_rcu->pdata;
 | 
			
		||||
 | 
			
		||||
	rdma_for_each_port (device, port) {
 | 
			
		||||
		struct ib_port_data *pdata = &device->port_data[port];
 | 
			
		||||
 | 
			
		||||
		pdata->ib_dev = device;
 | 
			
		||||
		spin_lock_init(&pdata->pkey_list_lock);
 | 
			
		||||
		INIT_LIST_HEAD(&pdata->pkey_list);
 | 
			
		||||
		spin_lock_init(&pdata->netdev_lock);
 | 
			
		||||
		INIT_HLIST_NODE(&pdata->ndev_hash_link);
 | 
			
		||||
	}
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -1042,6 +1066,29 @@ int ib_query_port(struct ib_device *device,
 | 
			
		|||
}
 | 
			
		||||
EXPORT_SYMBOL(ib_query_port);
 | 
			
		||||
 | 
			
		||||
static void add_ndev_hash(struct ib_port_data *pdata)
 | 
			
		||||
{
 | 
			
		||||
	unsigned long flags;
 | 
			
		||||
 | 
			
		||||
	might_sleep();
 | 
			
		||||
 | 
			
		||||
	spin_lock_irqsave(&ndev_hash_lock, flags);
 | 
			
		||||
	if (hash_hashed(&pdata->ndev_hash_link)) {
 | 
			
		||||
		hash_del_rcu(&pdata->ndev_hash_link);
 | 
			
		||||
		spin_unlock_irqrestore(&ndev_hash_lock, flags);
 | 
			
		||||
		/*
 | 
			
		||||
		 * We cannot do hash_add_rcu after a hash_del_rcu until the
 | 
			
		||||
		 * grace period
 | 
			
		||||
		 */
 | 
			
		||||
		synchronize_rcu();
 | 
			
		||||
		spin_lock_irqsave(&ndev_hash_lock, flags);
 | 
			
		||||
	}
 | 
			
		||||
	if (pdata->netdev)
 | 
			
		||||
		hash_add_rcu(ndev_hash, &pdata->ndev_hash_link,
 | 
			
		||||
			     (uintptr_t)pdata->netdev);
 | 
			
		||||
	spin_unlock_irqrestore(&ndev_hash_lock, flags);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * ib_device_set_netdev - Associate the ib_dev with an underlying net_device
 | 
			
		||||
 * @ib_dev: Device to modify
 | 
			
		||||
| 
						 | 
				
			
			@ -1078,17 +1125,19 @@ int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
 | 
			
		|||
 | 
			
		||||
	pdata = &ib_dev->port_data[port];
 | 
			
		||||
	spin_lock_irqsave(&pdata->netdev_lock, flags);
 | 
			
		||||
	if (pdata->netdev == ndev) {
 | 
			
		||||
	old_ndev = rcu_dereference_protected(
 | 
			
		||||
		pdata->netdev, lockdep_is_held(&pdata->netdev_lock));
 | 
			
		||||
	if (old_ndev == ndev) {
 | 
			
		||||
		spin_unlock_irqrestore(&pdata->netdev_lock, flags);
 | 
			
		||||
		return 0;
 | 
			
		||||
	}
 | 
			
		||||
	old_ndev = pdata->netdev;
 | 
			
		||||
 | 
			
		||||
	if (ndev)
 | 
			
		||||
		dev_hold(ndev);
 | 
			
		||||
	pdata->netdev = ndev;
 | 
			
		||||
	rcu_assign_pointer(pdata->netdev, ndev);
 | 
			
		||||
	spin_unlock_irqrestore(&pdata->netdev_lock, flags);
 | 
			
		||||
 | 
			
		||||
	add_ndev_hash(pdata);
 | 
			
		||||
	if (old_ndev)
 | 
			
		||||
		dev_put(old_ndev);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1103,11 +1152,24 @@ static void free_netdevs(struct ib_device *ib_dev)
 | 
			
		|||
 | 
			
		||||
	rdma_for_each_port (ib_dev, port) {
 | 
			
		||||
		struct ib_port_data *pdata = &ib_dev->port_data[port];
 | 
			
		||||
		struct net_device *ndev;
 | 
			
		||||
 | 
			
		||||
		spin_lock_irqsave(&pdata->netdev_lock, flags);
 | 
			
		||||
		if (pdata->netdev) {
 | 
			
		||||
			dev_put(pdata->netdev);
 | 
			
		||||
			pdata->netdev = NULL;
 | 
			
		||||
		ndev = rcu_dereference_protected(
 | 
			
		||||
			pdata->netdev, lockdep_is_held(&pdata->netdev_lock));
 | 
			
		||||
		if (ndev) {
 | 
			
		||||
			spin_lock(&ndev_hash_lock);
 | 
			
		||||
			hash_del_rcu(&pdata->ndev_hash_link);
 | 
			
		||||
			spin_unlock(&ndev_hash_lock);
 | 
			
		||||
 | 
			
		||||
			/*
 | 
			
		||||
			 * If this is the last dev_put there is still a
 | 
			
		||||
			 * synchronize_rcu before the netdev is kfreed, so we
 | 
			
		||||
			 * can continue to rely on unlocked pointer
 | 
			
		||||
			 * comparisons after the put
 | 
			
		||||
			 */
 | 
			
		||||
			rcu_assign_pointer(pdata->netdev, NULL);
 | 
			
		||||
			dev_put(ndev);
 | 
			
		||||
		}
 | 
			
		||||
		spin_unlock_irqrestore(&pdata->netdev_lock, flags);
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			@ -1132,7 +1194,8 @@ struct net_device *ib_device_get_netdev(struct ib_device *ib_dev,
 | 
			
		|||
		res = ib_dev->ops.get_netdev(ib_dev, port);
 | 
			
		||||
	else {
 | 
			
		||||
		spin_lock(&pdata->netdev_lock);
 | 
			
		||||
		res = pdata->netdev;
 | 
			
		||||
		res = rcu_dereference_protected(
 | 
			
		||||
			pdata->netdev, lockdep_is_held(&pdata->netdev_lock));
 | 
			
		||||
		if (res)
 | 
			
		||||
			dev_hold(res);
 | 
			
		||||
		spin_unlock(&pdata->netdev_lock);
 | 
			
		||||
| 
						 | 
				
			
			@ -1150,6 +1213,38 @@ struct net_device *ib_device_get_netdev(struct ib_device *ib_dev,
 | 
			
		|||
	return res;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * ib_device_get_by_netdev - Find an IB device associated with a netdev
 | 
			
		||||
 * @ndev: netdev to locate
 | 
			
		||||
 * @driver_id: The driver ID that must match (RDMA_DRIVER_UNKNOWN matches all)
 | 
			
		||||
 *
 | 
			
		||||
 * Find and hold an ib_device that is associated with a netdev via
 | 
			
		||||
 * ib_device_set_netdev(). The caller must call ib_device_put() on the
 | 
			
		||||
 * returned pointer.
 | 
			
		||||
 */
 | 
			
		||||
struct ib_device *ib_device_get_by_netdev(struct net_device *ndev,
 | 
			
		||||
					  enum rdma_driver_id driver_id)
 | 
			
		||||
{
 | 
			
		||||
	struct ib_device *res = NULL;
 | 
			
		||||
	struct ib_port_data *cur;
 | 
			
		||||
 | 
			
		||||
	rcu_read_lock();
 | 
			
		||||
	hash_for_each_possible_rcu (ndev_hash, cur, ndev_hash_link,
 | 
			
		||||
				    (uintptr_t)ndev) {
 | 
			
		||||
		if (rcu_access_pointer(cur->netdev) == ndev &&
 | 
			
		||||
		    (driver_id == RDMA_DRIVER_UNKNOWN ||
 | 
			
		||||
		     cur->ib_dev->driver_id == driver_id) &&
 | 
			
		||||
		    ib_device_try_get(cur->ib_dev)) {
 | 
			
		||||
			res = cur->ib_dev;
 | 
			
		||||
			break;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	rcu_read_unlock();
 | 
			
		||||
 | 
			
		||||
	return res;
 | 
			
		||||
}
 | 
			
		||||
EXPORT_SYMBOL(ib_device_get_by_netdev);
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * ib_enum_roce_netdev - enumerate all RoCE ports
 | 
			
		||||
 * @ib_dev : IB device we want to query
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -2198,6 +2198,8 @@ struct ib_port_immutable {
 | 
			
		|||
};
 | 
			
		||||
 | 
			
		||||
struct ib_port_data {
 | 
			
		||||
	struct ib_device *ib_dev;
 | 
			
		||||
 | 
			
		||||
	struct ib_port_immutable immutable;
 | 
			
		||||
 | 
			
		||||
	spinlock_t pkey_list_lock;
 | 
			
		||||
| 
						 | 
				
			
			@ -2206,7 +2208,8 @@ struct ib_port_data {
 | 
			
		|||
	struct ib_port_cache cache;
 | 
			
		||||
 | 
			
		||||
	spinlock_t netdev_lock;
 | 
			
		||||
	struct net_device *netdev;
 | 
			
		||||
	struct net_device __rcu *netdev;
 | 
			
		||||
	struct hlist_node ndev_hash_link;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/* rdma netdev type - specifies protocol type */
 | 
			
		||||
| 
						 | 
				
			
			@ -2545,6 +2548,7 @@ struct ib_device {
 | 
			
		|||
	struct device                *dma_device;
 | 
			
		||||
	struct ib_device_ops	     ops;
 | 
			
		||||
	char                          name[IB_DEVICE_NAME_MAX];
 | 
			
		||||
	struct rcu_head rcu_head;
 | 
			
		||||
 | 
			
		||||
	struct list_head              event_handler_list;
 | 
			
		||||
	spinlock_t                    event_handler_lock;
 | 
			
		||||
| 
						 | 
				
			
			@ -3996,6 +4000,10 @@ static inline bool ib_device_try_get(struct ib_device *dev)
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
void ib_device_put(struct ib_device *device);
 | 
			
		||||
struct ib_device *ib_device_get_by_netdev(struct net_device *ndev,
 | 
			
		||||
					  enum rdma_driver_id driver_id);
 | 
			
		||||
struct ib_device *ib_device_get_by_name(const char *name,
 | 
			
		||||
					enum rdma_driver_id driver_id);
 | 
			
		||||
struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port,
 | 
			
		||||
					    u16 pkey, const union ib_gid *gid,
 | 
			
		||||
					    const struct sockaddr *addr);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue