forked from mirrors/linux
		
	irq_remapping/vt-d: Enhance Intel IR driver to support hierarchical irqdomains
Enhance Intel interrupt remapping driver to support hierarchical irqdomains. Implement intel_ir_chip to support stacked irq_chip. Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com> Acked-by: Joerg Roedel <jroedel@suse.de> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: David Cohen <david.a.cohen@linux.intel.com> Cc: Sander Eikelenboom <linux@eikelenboom.it> Cc: David Vrabel <david.vrabel@citrix.com> Cc: Tony Luck <tony.luck@intel.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: iommu@lists.linux-foundation.org Cc: Bjorn Helgaas <bhelgaas@google.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Rafael J. Wysocki <rjw@rjwysocki.net> Cc: Randy Dunlap <rdunlap@infradead.org> Cc: Yinghai Lu <yinghai@kernel.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Dimitri Sivanich <sivanich@sgi.com> Cc: Joerg Roedel <joro@8bytes.org> Cc: David Woodhouse <dwmw2@infradead.org> Link: http://lkml.kernel.org/r/1428905519-23704-11-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
This commit is contained in:
		
							parent
							
								
									8dedf4cf5a
								
							
						
					
					
						commit
						b106ee63ab
					
				
					 2 changed files with 333 additions and 8 deletions
				
			
		|  | @ -8,6 +8,7 @@ | |||
| #include <linux/irq.h> | ||||
| #include <linux/intel-iommu.h> | ||||
| #include <linux/acpi.h> | ||||
| #include <linux/irqdomain.h> | ||||
| #include <asm/io_apic.h> | ||||
| #include <asm/smp.h> | ||||
| #include <asm/cpu.h> | ||||
|  | @ -31,6 +32,14 @@ struct hpet_scope { | |||
| 	unsigned int devfn; | ||||
| }; | ||||
| 
 | ||||
| struct intel_ir_data { | ||||
| 	struct irq_2_iommu			irq_2_iommu; | ||||
| 	struct irte				irte_entry; | ||||
| 	union { | ||||
| 		struct msi_msg			msi_entry; | ||||
| 	}; | ||||
| }; | ||||
| 
 | ||||
| #define IR_X2APIC_MODE(mode) (mode ? (1 << 11) : 0) | ||||
| #define IRTE_DEST(dest) ((eim_mode) ? dest : dest << 8) | ||||
| 
 | ||||
|  | @ -50,6 +59,7 @@ static struct hpet_scope ir_hpet[MAX_HPET_TBS]; | |||
|  * the dmar_global_lock. | ||||
|  */ | ||||
| static DEFINE_RAW_SPINLOCK(irq_2_ir_lock); | ||||
| static struct irq_domain_ops intel_ir_domain_ops; | ||||
| 
 | ||||
| static int __init parse_ioapics_under_ir(void); | ||||
| 
 | ||||
|  | @ -263,7 +273,7 @@ static int free_irte(int irq) | |||
| 	unsigned long flags; | ||||
| 	int rc; | ||||
| 
 | ||||
| 	if (!irq_iommu) | ||||
| 	if (!irq_iommu || irq_iommu->iommu == NULL) | ||||
| 		return -1; | ||||
| 
 | ||||
| 	raw_spin_lock_irqsave(&irq_2_ir_lock, flags); | ||||
|  | @ -488,7 +498,6 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu) | |||
| 
 | ||||
| 	pages = alloc_pages_node(iommu->node, GFP_KERNEL | __GFP_ZERO, | ||||
| 				 INTR_REMAP_PAGE_ORDER); | ||||
| 
 | ||||
| 	if (!pages) { | ||||
| 		pr_err("IR%d: failed to allocate pages of order %d\n", | ||||
| 		       iommu->seq_id, INTR_REMAP_PAGE_ORDER); | ||||
|  | @ -502,11 +511,23 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu) | |||
| 		goto out_free_pages; | ||||
| 	} | ||||
| 
 | ||||
| 	iommu->ir_domain = irq_domain_add_hierarchy(arch_get_ir_parent_domain(), | ||||
| 						    0, INTR_REMAP_TABLE_ENTRIES, | ||||
| 						    NULL, &intel_ir_domain_ops, | ||||
| 						    iommu); | ||||
| 	if (!iommu->ir_domain) { | ||||
| 		pr_err("IR%d: failed to allocate irqdomain\n", iommu->seq_id); | ||||
| 		goto out_free_bitmap; | ||||
| 	} | ||||
| 	iommu->ir_msi_domain = arch_create_msi_irq_domain(iommu->ir_domain); | ||||
| 
 | ||||
| 	ir_table->base = page_address(pages); | ||||
| 	ir_table->bitmap = bitmap; | ||||
| 	iommu->ir_table = ir_table; | ||||
| 	return 0; | ||||
| 
 | ||||
| out_free_bitmap: | ||||
| 	kfree(bitmap); | ||||
| out_free_pages: | ||||
| 	__free_pages(pages, INTR_REMAP_PAGE_ORDER); | ||||
| out_free_table: | ||||
|  | @ -517,6 +538,14 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu) | |||
| static void intel_teardown_irq_remapping(struct intel_iommu *iommu) | ||||
| { | ||||
| 	if (iommu && iommu->ir_table) { | ||||
| 		if (iommu->ir_msi_domain) { | ||||
| 			irq_domain_remove(iommu->ir_msi_domain); | ||||
| 			iommu->ir_msi_domain = NULL; | ||||
| 		} | ||||
| 		if (iommu->ir_domain) { | ||||
| 			irq_domain_remove(iommu->ir_domain); | ||||
| 			iommu->ir_domain = NULL; | ||||
| 		} | ||||
| 		free_pages((unsigned long)iommu->ir_table->base, | ||||
| 			   INTR_REMAP_PAGE_ORDER); | ||||
| 		kfree(iommu->ir_table->bitmap); | ||||
|  | @ -1062,12 +1091,6 @@ intel_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, | |||
| 	struct irte irte; | ||||
| 	int err; | ||||
| 
 | ||||
| 	if (!config_enabled(CONFIG_SMP)) | ||||
| 		return -EINVAL; | ||||
| 
 | ||||
| 	if (!cpumask_intersects(mask, cpu_online_mask)) | ||||
| 		return -EINVAL; | ||||
| 
 | ||||
| 	if (get_irte(irq, &irte)) | ||||
| 		return -EBUSY; | ||||
| 
 | ||||
|  | @ -1100,6 +1123,7 @@ intel_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, | |||
| 		send_cleanup_vector(cfg); | ||||
| 
 | ||||
| 	cpumask_copy(data->affinity, mask); | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
|  | @ -1205,6 +1229,53 @@ static int intel_alloc_hpet_msi(unsigned int irq, unsigned int id) | |||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| static struct irq_domain *intel_get_ir_irq_domain(struct irq_alloc_info *info) | ||||
| { | ||||
| 	struct intel_iommu *iommu = NULL; | ||||
| 
 | ||||
| 	if (!info) | ||||
| 		return NULL; | ||||
| 
 | ||||
| 	switch (info->type) { | ||||
| 	case X86_IRQ_ALLOC_TYPE_IOAPIC: | ||||
| 		iommu = map_ioapic_to_ir(info->ioapic_id); | ||||
| 		break; | ||||
| 	case X86_IRQ_ALLOC_TYPE_HPET: | ||||
| 		iommu = map_hpet_to_ir(info->hpet_id); | ||||
| 		break; | ||||
| 	case X86_IRQ_ALLOC_TYPE_MSI: | ||||
| 	case X86_IRQ_ALLOC_TYPE_MSIX: | ||||
| 		iommu = map_dev_to_ir(info->msi_dev); | ||||
| 		break; | ||||
| 	default: | ||||
| 		BUG_ON(1); | ||||
| 		break; | ||||
| 	} | ||||
| 
 | ||||
| 	return iommu ? iommu->ir_domain : NULL; | ||||
| } | ||||
| 
 | ||||
| static struct irq_domain *intel_get_irq_domain(struct irq_alloc_info *info) | ||||
| { | ||||
| 	struct intel_iommu *iommu; | ||||
| 
 | ||||
| 	if (!info) | ||||
| 		return NULL; | ||||
| 
 | ||||
| 	switch (info->type) { | ||||
| 	case X86_IRQ_ALLOC_TYPE_MSI: | ||||
| 	case X86_IRQ_ALLOC_TYPE_MSIX: | ||||
| 		iommu = map_dev_to_ir(info->msi_dev); | ||||
| 		if (iommu) | ||||
| 			return iommu->ir_msi_domain; | ||||
| 		break; | ||||
| 	default: | ||||
| 		break; | ||||
| 	} | ||||
| 
 | ||||
| 	return NULL; | ||||
| } | ||||
| 
 | ||||
| struct irq_remap_ops intel_irq_remap_ops = { | ||||
| 	.prepare		= intel_prepare_irq_remapping, | ||||
| 	.enable			= intel_enable_irq_remapping, | ||||
|  | @ -1218,6 +1289,256 @@ struct irq_remap_ops intel_irq_remap_ops = { | |||
| 	.msi_alloc_irq		= intel_msi_alloc_irq, | ||||
| 	.msi_setup_irq		= intel_msi_setup_irq, | ||||
| 	.alloc_hpet_msi		= intel_alloc_hpet_msi, | ||||
| 	.get_ir_irq_domain	= intel_get_ir_irq_domain, | ||||
| 	.get_irq_domain		= intel_get_irq_domain, | ||||
| }; | ||||
| 
 | ||||
| /*
 | ||||
|  * Migrate the IO-APIC irq in the presence of intr-remapping. | ||||
|  * | ||||
|  * For both level and edge triggered, irq migration is a simple atomic | ||||
|  * update(of vector and cpu destination) of IRTE and flush the hardware cache. | ||||
|  * | ||||
|  * For level triggered, we eliminate the io-apic RTE modification (with the | ||||
|  * updated vector information), by using a virtual vector (io-apic pin number). | ||||
|  * Real vector that is used for interrupting cpu will be coming from | ||||
|  * the interrupt-remapping table entry. | ||||
|  * | ||||
|  * As the migration is a simple atomic update of IRTE, the same mechanism | ||||
|  * is used to migrate MSI irq's in the presence of interrupt-remapping. | ||||
|  */ | ||||
| static int | ||||
| intel_ir_set_affinity(struct irq_data *data, const struct cpumask *mask, | ||||
| 		      bool force) | ||||
| { | ||||
| 	struct intel_ir_data *ir_data = data->chip_data; | ||||
| 	struct irte *irte = &ir_data->irte_entry; | ||||
| 	struct irq_cfg *cfg = irqd_cfg(data); | ||||
| 	struct irq_data *parent = data->parent_data; | ||||
| 	int ret; | ||||
| 
 | ||||
| 	ret = parent->chip->irq_set_affinity(parent, mask, force); | ||||
| 	if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) | ||||
| 		return ret; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Atomically updates the IRTE with the new destination, vector | ||||
| 	 * and flushes the interrupt entry cache. | ||||
| 	 */ | ||||
| 	irte->vector = cfg->vector; | ||||
| 	irte->dest_id = IRTE_DEST(cfg->dest_apicid); | ||||
| 	modify_irte(&ir_data->irq_2_iommu, irte); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * After this point, all the interrupts will start arriving | ||||
| 	 * at the new destination. So, time to cleanup the previous | ||||
| 	 * vector allocation. | ||||
| 	 */ | ||||
| 	if (cfg->move_in_progress) | ||||
| 		send_cleanup_vector(cfg); | ||||
| 
 | ||||
| 	return IRQ_SET_MASK_OK_DONE; | ||||
| } | ||||
| 
 | ||||
| static void intel_ir_compose_msi_msg(struct irq_data *irq_data, | ||||
| 				     struct msi_msg *msg) | ||||
| { | ||||
| 	struct intel_ir_data *ir_data = irq_data->chip_data; | ||||
| 
 | ||||
| 	*msg = ir_data->msi_entry; | ||||
| } | ||||
| 
 | ||||
| static struct irq_chip intel_ir_chip = { | ||||
| 	.irq_ack = ir_ack_apic_edge, | ||||
| 	.irq_set_affinity = intel_ir_set_affinity, | ||||
| 	.irq_compose_msi_msg = intel_ir_compose_msi_msg, | ||||
| }; | ||||
| 
 | ||||
| static void intel_irq_remapping_prepare_irte(struct intel_ir_data *data, | ||||
| 					     struct irq_cfg *irq_cfg, | ||||
| 					     struct irq_alloc_info *info, | ||||
| 					     int index, int sub_handle) | ||||
| { | ||||
| 	struct IR_IO_APIC_route_entry *entry; | ||||
| 	struct irte *irte = &data->irte_entry; | ||||
| 	struct msi_msg *msg = &data->msi_entry; | ||||
| 
 | ||||
| 	prepare_irte(irte, irq_cfg->vector, irq_cfg->dest_apicid); | ||||
| 	switch (info->type) { | ||||
| 	case X86_IRQ_ALLOC_TYPE_IOAPIC: | ||||
| 		/* Set source-id of interrupt request */ | ||||
| 		set_ioapic_sid(irte, info->ioapic_id); | ||||
| 		apic_printk(APIC_VERBOSE, KERN_DEBUG "IOAPIC[%d]: Set IRTE entry (P:%d FPD:%d Dst_Mode:%d Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X Avail:%X Vector:%02X Dest:%08X SID:%04X SQ:%X SVT:%X)\n", | ||||
| 			info->ioapic_id, irte->present, irte->fpd, | ||||
| 			irte->dst_mode, irte->redir_hint, | ||||
| 			irte->trigger_mode, irte->dlvry_mode, | ||||
| 			irte->avail, irte->vector, irte->dest_id, | ||||
| 			irte->sid, irte->sq, irte->svt); | ||||
| 
 | ||||
| 		entry = (struct IR_IO_APIC_route_entry *)info->ioapic_entry; | ||||
| 		info->ioapic_entry = NULL; | ||||
| 		memset(entry, 0, sizeof(*entry)); | ||||
| 		entry->index2	= (index >> 15) & 0x1; | ||||
| 		entry->zero	= 0; | ||||
| 		entry->format	= 1; | ||||
| 		entry->index	= (index & 0x7fff); | ||||
| 		/*
 | ||||
| 		 * IO-APIC RTE will be configured with virtual vector. | ||||
| 		 * irq handler will do the explicit EOI to the io-apic. | ||||
| 		 */ | ||||
| 		entry->vector	= info->ioapic_pin; | ||||
| 		entry->mask	= 0;			/* enable IRQ */ | ||||
| 		entry->trigger	= info->ioapic_trigger; | ||||
| 		entry->polarity	= info->ioapic_polarity; | ||||
| 		if (info->ioapic_trigger) | ||||
| 			entry->mask = 1; /* Mask level triggered irqs. */ | ||||
| 		break; | ||||
| 
 | ||||
| 	case X86_IRQ_ALLOC_TYPE_HPET: | ||||
| 	case X86_IRQ_ALLOC_TYPE_MSI: | ||||
| 	case X86_IRQ_ALLOC_TYPE_MSIX: | ||||
| 		if (info->type == X86_IRQ_ALLOC_TYPE_HPET) | ||||
| 			set_hpet_sid(irte, info->hpet_id); | ||||
| 		else | ||||
| 			set_msi_sid(irte, info->msi_dev); | ||||
| 
 | ||||
| 		msg->address_hi = MSI_ADDR_BASE_HI; | ||||
| 		msg->data = sub_handle; | ||||
| 		msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT | | ||||
| 				  MSI_ADDR_IR_SHV | | ||||
| 				  MSI_ADDR_IR_INDEX1(index) | | ||||
| 				  MSI_ADDR_IR_INDEX2(index); | ||||
| 		break; | ||||
| 
 | ||||
| 	default: | ||||
| 		BUG_ON(1); | ||||
| 		break; | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| static void intel_free_irq_resources(struct irq_domain *domain, | ||||
| 				     unsigned int virq, unsigned int nr_irqs) | ||||
| { | ||||
| 	struct irq_data *irq_data; | ||||
| 	struct intel_ir_data *data; | ||||
| 	struct irq_2_iommu *irq_iommu; | ||||
| 	unsigned long flags; | ||||
| 	int i; | ||||
| 
 | ||||
| 	for (i = 0; i < nr_irqs; i++) { | ||||
| 		irq_data = irq_domain_get_irq_data(domain, virq  + i); | ||||
| 		if (irq_data && irq_data->chip_data) { | ||||
| 			data = irq_data->chip_data; | ||||
| 			irq_iommu = &data->irq_2_iommu; | ||||
| 			raw_spin_lock_irqsave(&irq_2_ir_lock, flags); | ||||
| 			clear_entries(irq_iommu); | ||||
| 			raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); | ||||
| 			irq_domain_reset_irq_data(irq_data); | ||||
| 			kfree(data); | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| static int intel_irq_remapping_alloc(struct irq_domain *domain, | ||||
| 				     unsigned int virq, unsigned int nr_irqs, | ||||
| 				     void *arg) | ||||
| { | ||||
| 	struct intel_iommu *iommu = domain->host_data; | ||||
| 	struct irq_alloc_info *info = arg; | ||||
| 	struct intel_ir_data *data; | ||||
| 	struct irq_data *irq_data; | ||||
| 	struct irq_cfg *irq_cfg; | ||||
| 	int i, ret, index; | ||||
| 
 | ||||
| 	if (!info || !iommu) | ||||
| 		return -EINVAL; | ||||
| 	if (nr_irqs > 1 && info->type != X86_IRQ_ALLOC_TYPE_MSI && | ||||
| 	    info->type != X86_IRQ_ALLOC_TYPE_MSIX) | ||||
| 		return -EINVAL; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * With IRQ remapping enabled, don't need contiguous CPU vectors | ||||
| 	 * to support multiple MSI interrupts. | ||||
| 	 */ | ||||
| 	if (info->type == X86_IRQ_ALLOC_TYPE_MSI) | ||||
| 		info->flags &= ~X86_IRQ_ALLOC_CONTIGUOUS_VECTORS; | ||||
| 
 | ||||
| 	ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg); | ||||
| 	if (ret < 0) | ||||
| 		return ret; | ||||
| 
 | ||||
| 	ret = -ENOMEM; | ||||
| 	data = kzalloc(sizeof(*data), GFP_KERNEL); | ||||
| 	if (!data) | ||||
| 		goto out_free_parent; | ||||
| 
 | ||||
| 	down_read(&dmar_global_lock); | ||||
| 	index = alloc_irte(iommu, virq, &data->irq_2_iommu, nr_irqs); | ||||
| 	up_read(&dmar_global_lock); | ||||
| 	if (index < 0) { | ||||
| 		pr_warn("Failed to allocate IRTE\n"); | ||||
| 		kfree(data); | ||||
| 		goto out_free_parent; | ||||
| 	} | ||||
| 
 | ||||
| 	for (i = 0; i < nr_irqs; i++) { | ||||
| 		irq_data = irq_domain_get_irq_data(domain, virq + i); | ||||
| 		irq_cfg = irqd_cfg(irq_data); | ||||
| 		if (!irq_data || !irq_cfg) { | ||||
| 			ret = -EINVAL; | ||||
| 			goto out_free_data; | ||||
| 		} | ||||
| 
 | ||||
| 		if (i > 0) { | ||||
| 			data = kzalloc(sizeof(*data), GFP_KERNEL); | ||||
| 			if (!data) | ||||
| 				goto out_free_data; | ||||
| 		} | ||||
| 		irq_data->hwirq = (index << 16) + i; | ||||
| 		irq_data->chip_data = data; | ||||
| 		irq_data->chip = &intel_ir_chip; | ||||
| 		intel_irq_remapping_prepare_irte(data, irq_cfg, info, index, i); | ||||
| 		irq_set_status_flags(virq + i, IRQ_MOVE_PCNTXT); | ||||
| 	} | ||||
| 	return 0; | ||||
| 
 | ||||
| out_free_data: | ||||
| 	intel_free_irq_resources(domain, virq, i); | ||||
| out_free_parent: | ||||
| 	irq_domain_free_irqs_common(domain, virq, nr_irqs); | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| static void intel_irq_remapping_free(struct irq_domain *domain, | ||||
| 				     unsigned int virq, unsigned int nr_irqs) | ||||
| { | ||||
| 	intel_free_irq_resources(domain, virq, nr_irqs); | ||||
| 	irq_domain_free_irqs_common(domain, virq, nr_irqs); | ||||
| } | ||||
| 
 | ||||
| static void intel_irq_remapping_activate(struct irq_domain *domain, | ||||
| 					 struct irq_data *irq_data) | ||||
| { | ||||
| 	struct intel_ir_data *data = irq_data->chip_data; | ||||
| 
 | ||||
| 	modify_irte(&data->irq_2_iommu, &data->irte_entry); | ||||
| } | ||||
| 
 | ||||
| static void intel_irq_remapping_deactivate(struct irq_domain *domain, | ||||
| 					   struct irq_data *irq_data) | ||||
| { | ||||
| 	struct intel_ir_data *data = irq_data->chip_data; | ||||
| 	struct irte entry; | ||||
| 
 | ||||
| 	memset(&entry, 0, sizeof(entry)); | ||||
| 	modify_irte(&data->irq_2_iommu, &entry); | ||||
| } | ||||
| 
 | ||||
| static struct irq_domain_ops intel_ir_domain_ops = { | ||||
| 	.alloc = intel_irq_remapping_alloc, | ||||
| 	.free = intel_irq_remapping_free, | ||||
| 	.activate = intel_irq_remapping_activate, | ||||
| 	.deactivate = intel_irq_remapping_deactivate, | ||||
| }; | ||||
| 
 | ||||
| /*
 | ||||
|  |  | |||
|  | @ -286,6 +286,8 @@ struct q_inval { | |||
| 
 | ||||
| #define INTR_REMAP_TABLE_ENTRIES	65536 | ||||
| 
 | ||||
| struct irq_domain; | ||||
| 
 | ||||
| struct ir_table { | ||||
| 	struct irte *base; | ||||
| 	unsigned long *bitmap; | ||||
|  | @ -335,6 +337,8 @@ struct intel_iommu { | |||
| 
 | ||||
| #ifdef CONFIG_IRQ_REMAP | ||||
| 	struct ir_table *ir_table;	/* Interrupt remapping info */ | ||||
| 	struct irq_domain *ir_domain; | ||||
| 	struct irq_domain *ir_msi_domain; | ||||
| #endif | ||||
| 	struct device	*iommu_dev; /* IOMMU-sysfs device */ | ||||
| 	int		node; | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Jiang Liu
						Jiang Liu