forked from mirrors/linux
		
	genirq/msi: Switch to new irq spreading infrastructure
Switch MSI over to the new spreading code. If a pci device contains a valid pointer to a cpumask, then this mask is used for spreading otherwise the online cpu mask is used. This allows a driver to restrict the spread to a subset of CPUs, e.g. cpus on a particular node. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Christoph Hellwig <hch@lst.de> Cc: axboe@fb.com Cc: keith.busch@intel.com Cc: agordeev@redhat.com Cc: linux-block@vger.kernel.org Link: http://lkml.kernel.org/r/1473862739-15032-4-git-send-email-hch@lst.de Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
This commit is contained in:
		
							parent
							
								
									34c3d9819f
								
							
						
					
					
						commit
						e75eafb9b0
					
				
					 2 changed files with 86 additions and 71 deletions
				
			
		|  | @ -549,15 +549,23 @@ static int populate_msi_sysfs(struct pci_dev *pdev) | ||||||
| 	return ret; | 	return ret; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static struct msi_desc *msi_setup_entry(struct pci_dev *dev, int nvec) | static struct msi_desc * | ||||||
|  | msi_setup_entry(struct pci_dev *dev, int nvec, bool affinity) | ||||||
| { | { | ||||||
| 	u16 control; | 	struct cpumask *masks = NULL; | ||||||
| 	struct msi_desc *entry; | 	struct msi_desc *entry; | ||||||
|  | 	u16 control; | ||||||
|  | 
 | ||||||
|  | 	if (affinity) { | ||||||
|  | 		masks = irq_create_affinity_masks(dev->irq_affinity, nvec); | ||||||
|  | 		if (!masks) | ||||||
|  | 			pr_err("Unable to allocate affinity masks, ignoring\n"); | ||||||
|  | 	} | ||||||
| 
 | 
 | ||||||
| 	/* MSI Entry Initialization */ | 	/* MSI Entry Initialization */ | ||||||
| 	entry = alloc_msi_entry(&dev->dev, nvec, NULL); | 	entry = alloc_msi_entry(&dev->dev, nvec, masks); | ||||||
| 	if (!entry) | 	if (!entry) | ||||||
| 		return NULL; | 		goto out; | ||||||
| 
 | 
 | ||||||
| 	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control); | 	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control); | ||||||
| 
 | 
 | ||||||
|  | @ -568,7 +576,6 @@ static struct msi_desc *msi_setup_entry(struct pci_dev *dev, int nvec) | ||||||
| 	entry->msi_attrib.default_irq	= dev->irq;	/* Save IOAPIC IRQ */ | 	entry->msi_attrib.default_irq	= dev->irq;	/* Save IOAPIC IRQ */ | ||||||
| 	entry->msi_attrib.multi_cap	= (control & PCI_MSI_FLAGS_QMASK) >> 1; | 	entry->msi_attrib.multi_cap	= (control & PCI_MSI_FLAGS_QMASK) >> 1; | ||||||
| 	entry->msi_attrib.multiple	= ilog2(__roundup_pow_of_two(nvec)); | 	entry->msi_attrib.multiple	= ilog2(__roundup_pow_of_two(nvec)); | ||||||
| 	entry->affinity			= dev->irq_affinity; |  | ||||||
| 
 | 
 | ||||||
| 	if (control & PCI_MSI_FLAGS_64BIT) | 	if (control & PCI_MSI_FLAGS_64BIT) | ||||||
| 		entry->mask_pos = dev->msi_cap + PCI_MSI_MASK_64; | 		entry->mask_pos = dev->msi_cap + PCI_MSI_MASK_64; | ||||||
|  | @ -579,6 +586,8 @@ static struct msi_desc *msi_setup_entry(struct pci_dev *dev, int nvec) | ||||||
| 	if (entry->msi_attrib.maskbit) | 	if (entry->msi_attrib.maskbit) | ||||||
| 		pci_read_config_dword(dev, entry->mask_pos, &entry->masked); | 		pci_read_config_dword(dev, entry->mask_pos, &entry->masked); | ||||||
| 
 | 
 | ||||||
|  | out: | ||||||
|  | 	kfree(masks); | ||||||
| 	return entry; | 	return entry; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -607,7 +616,7 @@ static int msi_verify_entries(struct pci_dev *dev) | ||||||
|  * an error, and a positive return value indicates the number of interrupts |  * an error, and a positive return value indicates the number of interrupts | ||||||
|  * which could have been allocated. |  * which could have been allocated. | ||||||
|  */ |  */ | ||||||
| static int msi_capability_init(struct pci_dev *dev, int nvec) | static int msi_capability_init(struct pci_dev *dev, int nvec, bool affinity) | ||||||
| { | { | ||||||
| 	struct msi_desc *entry; | 	struct msi_desc *entry; | ||||||
| 	int ret; | 	int ret; | ||||||
|  | @ -615,7 +624,7 @@ static int msi_capability_init(struct pci_dev *dev, int nvec) | ||||||
| 
 | 
 | ||||||
| 	pci_msi_set_enable(dev, 0);	/* Disable MSI during set up */ | 	pci_msi_set_enable(dev, 0);	/* Disable MSI during set up */ | ||||||
| 
 | 
 | ||||||
| 	entry = msi_setup_entry(dev, nvec); | 	entry = msi_setup_entry(dev, nvec, affinity); | ||||||
| 	if (!entry) | 	if (!entry) | ||||||
| 		return -ENOMEM; | 		return -ENOMEM; | ||||||
| 
 | 
 | ||||||
|  | @ -678,28 +687,29 @@ static void __iomem *msix_map_region(struct pci_dev *dev, unsigned nr_entries) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static int msix_setup_entries(struct pci_dev *dev, void __iomem *base, | static int msix_setup_entries(struct pci_dev *dev, void __iomem *base, | ||||||
| 			      struct msix_entry *entries, int nvec) | 			      struct msix_entry *entries, int nvec, | ||||||
|  | 			      bool affinity) | ||||||
| { | { | ||||||
| 	const struct cpumask *mask = NULL; | 	struct cpumask *curmsk, *masks = NULL; | ||||||
| 	struct msi_desc *entry; | 	struct msi_desc *entry; | ||||||
| 	int cpu = -1, i; | 	int ret, i; | ||||||
| 
 | 
 | ||||||
| 	for (i = 0; i < nvec; i++) { | 	if (affinity) { | ||||||
| 		if (dev->irq_affinity) { | 		masks = irq_create_affinity_masks(dev->irq_affinity, nvec); | ||||||
| 			cpu = cpumask_next(cpu, dev->irq_affinity); | 		if (!masks) | ||||||
| 			if (cpu >= nr_cpu_ids) | 			pr_err("Unable to allocate affinity masks, ignoring\n"); | ||||||
| 				cpu = cpumask_first(dev->irq_affinity); | 	} | ||||||
| 			mask = cpumask_of(cpu); |  | ||||||
| 		} |  | ||||||
| 
 | 
 | ||||||
| 		entry = alloc_msi_entry(&dev->dev, 1, NULL); | 	for (i = 0, curmsk = masks; i < nvec; i++) { | ||||||
|  | 		entry = alloc_msi_entry(&dev->dev, 1, curmsk); | ||||||
| 		if (!entry) { | 		if (!entry) { | ||||||
| 			if (!i) | 			if (!i) | ||||||
| 				iounmap(base); | 				iounmap(base); | ||||||
| 			else | 			else | ||||||
| 				free_msi_irqs(dev); | 				free_msi_irqs(dev); | ||||||
| 			/* No enough memory. Don't try again */ | 			/* No enough memory. Don't try again */ | ||||||
| 			return -ENOMEM; | 			ret = -ENOMEM; | ||||||
|  | 			goto out; | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
| 		entry->msi_attrib.is_msix	= 1; | 		entry->msi_attrib.is_msix	= 1; | ||||||
|  | @ -710,11 +720,14 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base, | ||||||
| 			entry->msi_attrib.entry_nr = i; | 			entry->msi_attrib.entry_nr = i; | ||||||
| 		entry->msi_attrib.default_irq	= dev->irq; | 		entry->msi_attrib.default_irq	= dev->irq; | ||||||
| 		entry->mask_base		= base; | 		entry->mask_base		= base; | ||||||
| 		entry->affinity			= mask; |  | ||||||
| 
 | 
 | ||||||
| 		list_add_tail(&entry->list, dev_to_msi_list(&dev->dev)); | 		list_add_tail(&entry->list, dev_to_msi_list(&dev->dev)); | ||||||
|  | 		if (masks) | ||||||
|  | 			curmsk++; | ||||||
| 	} | 	} | ||||||
| 
 | 	ret = 0; | ||||||
|  | out: | ||||||
|  | 	kfree(masks); | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -743,8 +756,8 @@ static void msix_program_entries(struct pci_dev *dev, | ||||||
|  * single MSI-X irq. A return of zero indicates the successful setup of |  * single MSI-X irq. A return of zero indicates the successful setup of | ||||||
|  * requested MSI-X entries with allocated irqs or non-zero for otherwise. |  * requested MSI-X entries with allocated irqs or non-zero for otherwise. | ||||||
|  **/ |  **/ | ||||||
| static int msix_capability_init(struct pci_dev *dev, | static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries, | ||||||
| 				struct msix_entry *entries, int nvec) | 				int nvec, bool affinity) | ||||||
| { | { | ||||||
| 	int ret; | 	int ret; | ||||||
| 	u16 control; | 	u16 control; | ||||||
|  | @ -759,7 +772,7 @@ static int msix_capability_init(struct pci_dev *dev, | ||||||
| 	if (!base) | 	if (!base) | ||||||
| 		return -ENOMEM; | 		return -ENOMEM; | ||||||
| 
 | 
 | ||||||
| 	ret = msix_setup_entries(dev, base, entries, nvec); | 	ret = msix_setup_entries(dev, base, entries, nvec, affinity); | ||||||
| 	if (ret) | 	if (ret) | ||||||
| 		return ret; | 		return ret; | ||||||
| 
 | 
 | ||||||
|  | @ -939,22 +952,8 @@ int pci_msix_vec_count(struct pci_dev *dev) | ||||||
| } | } | ||||||
| EXPORT_SYMBOL(pci_msix_vec_count); | EXPORT_SYMBOL(pci_msix_vec_count); | ||||||
| 
 | 
 | ||||||
| /**
 | static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, | ||||||
|  * pci_enable_msix - configure device's MSI-X capability structure | 			     int nvec, bool affinity) | ||||||
|  * @dev: pointer to the pci_dev data structure of MSI-X device function |  | ||||||
|  * @entries: pointer to an array of MSI-X entries (optional) |  | ||||||
|  * @nvec: number of MSI-X irqs requested for allocation by device driver |  | ||||||
|  * |  | ||||||
|  * Setup the MSI-X capability structure of device function with the number |  | ||||||
|  * of requested irqs upon its software driver call to request for |  | ||||||
|  * MSI-X mode enabled on its hardware device function. A return of zero |  | ||||||
|  * indicates the successful configuration of MSI-X capability structure |  | ||||||
|  * with new allocated MSI-X irqs. A return of < 0 indicates a failure. |  | ||||||
|  * Or a return of > 0 indicates that driver request is exceeding the number |  | ||||||
|  * of irqs or MSI-X vectors available. Driver should use the returned value to |  | ||||||
|  * re-send its request. |  | ||||||
|  **/ |  | ||||||
| int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec) |  | ||||||
| { | { | ||||||
| 	int nr_entries; | 	int nr_entries; | ||||||
| 	int i, j; | 	int i, j; | ||||||
|  | @ -986,7 +985,27 @@ int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec) | ||||||
| 		dev_info(&dev->dev, "can't enable MSI-X (MSI IRQ already assigned)\n"); | 		dev_info(&dev->dev, "can't enable MSI-X (MSI IRQ already assigned)\n"); | ||||||
| 		return -EINVAL; | 		return -EINVAL; | ||||||
| 	} | 	} | ||||||
| 	return msix_capability_init(dev, entries, nvec); | 	return msix_capability_init(dev, entries, nvec, affinity); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /**
 | ||||||
|  |  * pci_enable_msix - configure device's MSI-X capability structure | ||||||
|  |  * @dev: pointer to the pci_dev data structure of MSI-X device function | ||||||
|  |  * @entries: pointer to an array of MSI-X entries (optional) | ||||||
|  |  * @nvec: number of MSI-X irqs requested for allocation by device driver | ||||||
|  |  * | ||||||
|  |  * Setup the MSI-X capability structure of device function with the number | ||||||
|  |  * of requested irqs upon its software driver call to request for | ||||||
|  |  * MSI-X mode enabled on its hardware device function. A return of zero | ||||||
|  |  * indicates the successful configuration of MSI-X capability structure | ||||||
|  |  * with new allocated MSI-X irqs. A return of < 0 indicates a failure. | ||||||
|  |  * Or a return of > 0 indicates that driver request is exceeding the number | ||||||
|  |  * of irqs or MSI-X vectors available. Driver should use the returned value to | ||||||
|  |  * re-send its request. | ||||||
|  |  **/ | ||||||
|  | int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec) | ||||||
|  | { | ||||||
|  | 	return __pci_enable_msix(dev, entries, nvec, false); | ||||||
| } | } | ||||||
| EXPORT_SYMBOL(pci_enable_msix); | EXPORT_SYMBOL(pci_enable_msix); | ||||||
| 
 | 
 | ||||||
|  | @ -1039,6 +1058,7 @@ EXPORT_SYMBOL(pci_msi_enabled); | ||||||
| static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, | static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, | ||||||
| 		unsigned int flags) | 		unsigned int flags) | ||||||
| { | { | ||||||
|  | 	bool affinity = flags & PCI_IRQ_AFFINITY; | ||||||
| 	int nvec; | 	int nvec; | ||||||
| 	int rc; | 	int rc; | ||||||
| 
 | 
 | ||||||
|  | @ -1067,19 +1087,17 @@ static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, | ||||||
| 		nvec = maxvec; | 		nvec = maxvec; | ||||||
| 
 | 
 | ||||||
| 	for (;;) { | 	for (;;) { | ||||||
| 		if (flags & PCI_IRQ_AFFINITY) { | 		if (affinity) { | ||||||
| 			dev->irq_affinity = irq_create_affinity_mask(&nvec); | 			nvec = irq_calc_affinity_vectors(dev->irq_affinity, | ||||||
|  | 					nvec); | ||||||
| 			if (nvec < minvec) | 			if (nvec < minvec) | ||||||
| 				return -ENOSPC; | 				return -ENOSPC; | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
| 		rc = msi_capability_init(dev, nvec); | 		rc = msi_capability_init(dev, nvec, affinity); | ||||||
| 		if (rc == 0) | 		if (rc == 0) | ||||||
| 			return nvec; | 			return nvec; | ||||||
| 
 | 
 | ||||||
| 		kfree(dev->irq_affinity); |  | ||||||
| 		dev->irq_affinity = NULL; |  | ||||||
| 
 |  | ||||||
| 		if (rc < 0) | 		if (rc < 0) | ||||||
| 			return rc; | 			return rc; | ||||||
| 		if (rc < minvec) | 		if (rc < minvec) | ||||||
|  | @ -1111,26 +1129,24 @@ static int __pci_enable_msix_range(struct pci_dev *dev, | ||||||
| 		struct msix_entry *entries, int minvec, int maxvec, | 		struct msix_entry *entries, int minvec, int maxvec, | ||||||
| 		unsigned int flags) | 		unsigned int flags) | ||||||
| { | { | ||||||
| 	int nvec = maxvec; | 	bool affinity = flags & PCI_IRQ_AFFINITY; | ||||||
| 	int rc; | 	int rc, nvec = maxvec; | ||||||
| 
 | 
 | ||||||
| 	if (maxvec < minvec) | 	if (maxvec < minvec) | ||||||
| 		return -ERANGE; | 		return -ERANGE; | ||||||
| 
 | 
 | ||||||
| 	for (;;) { | 	for (;;) { | ||||||
| 		if (flags & PCI_IRQ_AFFINITY) { | 		if (affinity) { | ||||||
| 			dev->irq_affinity = irq_create_affinity_mask(&nvec); | 			nvec = irq_calc_affinity_vectors(dev->irq_affinity, | ||||||
|  | 					nvec); | ||||||
| 			if (nvec < minvec) | 			if (nvec < minvec) | ||||||
| 				return -ENOSPC; | 				return -ENOSPC; | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
| 		rc = pci_enable_msix(dev, entries, nvec); | 		rc = __pci_enable_msix(dev, entries, nvec, affinity); | ||||||
| 		if (rc == 0) | 		if (rc == 0) | ||||||
| 			return nvec; | 			return nvec; | ||||||
| 
 | 
 | ||||||
| 		kfree(dev->irq_affinity); |  | ||||||
| 		dev->irq_affinity = NULL; |  | ||||||
| 
 |  | ||||||
| 		if (rc < 0) | 		if (rc < 0) | ||||||
| 			return rc; | 			return rc; | ||||||
| 		if (rc < minvec) | 		if (rc < minvec) | ||||||
|  |  | ||||||
|  | @ -236,25 +236,24 @@ static int alloc_descs(unsigned int start, unsigned int cnt, int node, | ||||||
| 	const struct cpumask *mask = NULL; | 	const struct cpumask *mask = NULL; | ||||||
| 	struct irq_desc *desc; | 	struct irq_desc *desc; | ||||||
| 	unsigned int flags; | 	unsigned int flags; | ||||||
| 	int i, cpu = -1; | 	int i; | ||||||
| 
 | 
 | ||||||
| 	if (affinity && cpumask_empty(affinity)) | 	/* Validate affinity mask(s) */ | ||||||
| 		return -EINVAL; | 	if (affinity) { | ||||||
|  | 		for (i = 0, mask = affinity; i < cnt; i++, mask++) { | ||||||
|  | 			if (cpumask_empty(mask)) | ||||||
|  | 				return -EINVAL; | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
| 
 | 
 | ||||||
| 	flags = affinity ? IRQD_AFFINITY_MANAGED : 0; | 	flags = affinity ? IRQD_AFFINITY_MANAGED : 0; | ||||||
|  | 	mask = NULL; | ||||||
| 
 | 
 | ||||||
| 	for (i = 0; i < cnt; i++) { | 	for (i = 0; i < cnt; i++) { | ||||||
| 		if (affinity) { | 		if (affinity) { | ||||||
| 			cpu = cpumask_next(cpu, affinity); | 			node = cpu_to_node(cpumask_first(affinity)); | ||||||
| 			if (cpu >= nr_cpu_ids) | 			mask = affinity; | ||||||
| 				cpu = cpumask_first(affinity); | 			affinity++; | ||||||
| 			node = cpu_to_node(cpu); |  | ||||||
| 
 |  | ||||||
| 			/*
 |  | ||||||
| 			 * For single allocations we use the caller provided |  | ||||||
| 			 * mask otherwise we use the mask of the target cpu |  | ||||||
| 			 */ |  | ||||||
| 			mask = cnt == 1 ? affinity : cpumask_of(cpu); |  | ||||||
| 		} | 		} | ||||||
| 		desc = alloc_desc(start + i, node, flags, mask, owner); | 		desc = alloc_desc(start + i, node, flags, mask, owner); | ||||||
| 		if (!desc) | 		if (!desc) | ||||||
|  | @ -481,9 +480,9 @@ EXPORT_SYMBOL_GPL(irq_free_descs); | ||||||
|  * @cnt:	Number of consecutive irqs to allocate. |  * @cnt:	Number of consecutive irqs to allocate. | ||||||
|  * @node:	Preferred node on which the irq descriptor should be allocated |  * @node:	Preferred node on which the irq descriptor should be allocated | ||||||
|  * @owner:	Owning module (can be NULL) |  * @owner:	Owning module (can be NULL) | ||||||
|  * @affinity:	Optional pointer to an affinity mask which hints where the |  * @affinity:	Optional pointer to an affinity mask array of size @cnt which | ||||||
|  *		irq descriptors should be allocated and which default |  *		hints where the irq descriptors should be allocated and which | ||||||
|  *		affinities to use |  *		default affinities to use | ||||||
|  * |  * | ||||||
|  * Returns the first irq number or error code |  * Returns the first irq number or error code | ||||||
|  */ |  */ | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Thomas Gleixner
						Thomas Gleixner