mirror of
				https://github.com/torvalds/linux.git
				synced 2025-10-31 08:38:45 +02:00 
			
		
		
		
	genirq: Introduce common irq_force_complete_move() implementation
CONFIG_GENERIC_PENDING_IRQ requires an architecture specific implementation of irq_force_complete_move() for CPU hotplug. At the moment, only x86 implements this unconditionally, but for RISC-V irq_force_complete_move() is only needed when the RISC-V IMSIC driver is in use and not needed otherwise. To allow runtime configuration of this mechanism, introduce a common irq_force_complete_move() implementation in the interrupt core code, which only invokes the completion function, when a interrupt chip in the hierarchy implements it. Switch X86 over to the new mechanism. No functional change intended. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Anup Patel <apatel@ventanamicro.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Link: https://lore.kernel.org/all/20250217085657.789309-5-apatel@ventanamicro.com
This commit is contained in:
		
							parent
							
								
									fe35ecee8e
								
							
						
					
					
						commit
						751dc837da
					
				
					 4 changed files with 123 additions and 125 deletions
				
			
		|  | @ -888,8 +888,109 @@ static int apic_set_affinity(struct irq_data *irqd, | |||
| 	return err ? err : IRQ_SET_MASK_OK; | ||||
| } | ||||
| 
 | ||||
| static void free_moved_vector(struct apic_chip_data *apicd) | ||||
| { | ||||
| 	unsigned int vector = apicd->prev_vector; | ||||
| 	unsigned int cpu = apicd->prev_cpu; | ||||
| 	bool managed = apicd->is_managed; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Managed interrupts are usually not migrated away | ||||
| 	 * from an online CPU, but CPU isolation 'managed_irq' | ||||
| 	 * can make that happen. | ||||
| 	 * 1) Activation does not take the isolation into account | ||||
| 	 *    to keep the code simple | ||||
| 	 * 2) Migration away from an isolated CPU can happen when | ||||
| 	 *    a non-isolated CPU which is in the calculated | ||||
| 	 *    affinity mask comes online. | ||||
| 	 */ | ||||
| 	trace_vector_free_moved(apicd->irq, cpu, vector, managed); | ||||
| 	irq_matrix_free(vector_matrix, cpu, vector, managed); | ||||
| 	per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED; | ||||
| 	hlist_del_init(&apicd->clist); | ||||
| 	apicd->prev_vector = 0; | ||||
| 	apicd->move_in_progress = 0; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Called from fixup_irqs() with @desc->lock held and interrupts disabled. | ||||
|  */ | ||||
| static void apic_force_complete_move(struct irq_data *irqd) | ||||
| { | ||||
| 	unsigned int cpu = smp_processor_id(); | ||||
| 	struct apic_chip_data *apicd; | ||||
| 	unsigned int vector; | ||||
| 
 | ||||
| 	guard(raw_spinlock)(&vector_lock); | ||||
| 	apicd = apic_chip_data(irqd); | ||||
| 	if (!apicd) | ||||
| 		return; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * If prev_vector is empty or the descriptor is neither currently | ||||
| 	 * nor previously on the outgoing CPU no action required. | ||||
| 	 */ | ||||
| 	vector = apicd->prev_vector; | ||||
| 	if (!vector || (apicd->cpu != cpu && apicd->prev_cpu != cpu)) | ||||
| 		return; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * This is tricky. If the cleanup of the old vector has not been | ||||
| 	 * done yet, then the following setaffinity call will fail with | ||||
| 	 * -EBUSY. This can leave the interrupt in a stale state. | ||||
| 	 * | ||||
| 	 * All CPUs are stuck in stop machine with interrupts disabled so | ||||
| 	 * calling __irq_complete_move() would be completely pointless. | ||||
| 	 * | ||||
| 	 * 1) The interrupt is in move_in_progress state. That means that we | ||||
| 	 *    have not seen an interrupt since the io_apic was reprogrammed to | ||||
| 	 *    the new vector. | ||||
| 	 * | ||||
| 	 * 2) The interrupt has fired on the new vector, but the cleanup IPIs | ||||
| 	 *    have not been processed yet. | ||||
| 	 */ | ||||
| 	if (apicd->move_in_progress) { | ||||
| 		/*
 | ||||
| 		 * In theory there is a race: | ||||
| 		 * | ||||
| 		 * set_ioapic(new_vector) <-- Interrupt is raised before update | ||||
| 		 *			      is effective, i.e. it's raised on | ||||
| 		 *			      the old vector. | ||||
| 		 * | ||||
| 		 * So if the target cpu cannot handle that interrupt before | ||||
| 		 * the old vector is cleaned up, we get a spurious interrupt | ||||
| 		 * and in the worst case the ioapic irq line becomes stale. | ||||
| 		 * | ||||
| 		 * But in case of cpu hotplug this should be a non issue | ||||
| 		 * because if the affinity update happens right before all | ||||
| 		 * cpus rendezvous in stop machine, there is no way that the | ||||
| 		 * interrupt can be blocked on the target cpu because all cpus | ||||
| 		 * loops first with interrupts enabled in stop machine, so the | ||||
| 		 * old vector is not yet cleaned up when the interrupt fires. | ||||
| 		 * | ||||
| 		 * So the only way to run into this issue is if the delivery | ||||
| 		 * of the interrupt on the apic/system bus would be delayed | ||||
| 		 * beyond the point where the target cpu disables interrupts | ||||
| 		 * in stop machine. I doubt that it can happen, but at least | ||||
| 		 * there is a theoretical chance. Virtualization might be | ||||
| 		 * able to expose this, but AFAICT the IOAPIC emulation is not | ||||
| 		 * as stupid as the real hardware. | ||||
| 		 * | ||||
| 		 * Anyway, there is nothing we can do about that at this point | ||||
| 		 * w/o refactoring the whole fixup_irq() business completely. | ||||
| 		 * We print at least the irq number and the old vector number, | ||||
| 		 * so we have the necessary information when a problem in that | ||||
| 		 * area arises. | ||||
| 		 */ | ||||
| 		pr_warn("IRQ fixup: irq %d move in progress, old vector %d\n", | ||||
| 			irqd->irq, vector); | ||||
| 	} | ||||
| 	free_moved_vector(apicd); | ||||
| } | ||||
| 
 | ||||
| #else | ||||
| # define apic_set_affinity		NULL | ||||
| # define apic_force_complete_move	NULL | ||||
| #endif | ||||
| 
 | ||||
| static int apic_retrigger_irq(struct irq_data *irqd) | ||||
|  | @ -927,35 +1028,12 @@ static struct irq_chip lapic_controller = { | |||
| 	.irq_ack			= apic_ack_edge, | ||||
| 	.irq_set_affinity		= apic_set_affinity, | ||||
| 	.irq_compose_msi_msg		= x86_vector_msi_compose_msg, | ||||
| 	.irq_force_complete_move	= apic_force_complete_move, | ||||
| 	.irq_retrigger			= apic_retrigger_irq, | ||||
| }; | ||||
| 
 | ||||
| #ifdef CONFIG_SMP | ||||
| 
 | ||||
| static void free_moved_vector(struct apic_chip_data *apicd) | ||||
| { | ||||
| 	unsigned int vector = apicd->prev_vector; | ||||
| 	unsigned int cpu = apicd->prev_cpu; | ||||
| 	bool managed = apicd->is_managed; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Managed interrupts are usually not migrated away | ||||
| 	 * from an online CPU, but CPU isolation 'managed_irq' | ||||
| 	 * can make that happen. | ||||
| 	 * 1) Activation does not take the isolation into account | ||||
| 	 *    to keep the code simple | ||||
| 	 * 2) Migration away from an isolated CPU can happen when | ||||
| 	 *    a non-isolated CPU which is in the calculated | ||||
| 	 *    affinity mask comes online. | ||||
| 	 */ | ||||
| 	trace_vector_free_moved(apicd->irq, cpu, vector, managed); | ||||
| 	irq_matrix_free(vector_matrix, cpu, vector, managed); | ||||
| 	per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED; | ||||
| 	hlist_del_init(&apicd->clist); | ||||
| 	apicd->prev_vector = 0; | ||||
| 	apicd->move_in_progress = 0; | ||||
| } | ||||
| 
 | ||||
| static void __vector_cleanup(struct vector_cleanup *cl, bool check_irr) | ||||
| { | ||||
| 	struct apic_chip_data *apicd; | ||||
|  | @ -1068,99 +1146,6 @@ void irq_complete_move(struct irq_cfg *cfg) | |||
| 		__vector_schedule_cleanup(apicd); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Called from fixup_irqs() with @desc->lock held and interrupts disabled. | ||||
|  */ | ||||
| void irq_force_complete_move(struct irq_desc *desc) | ||||
| { | ||||
| 	unsigned int cpu = smp_processor_id(); | ||||
| 	struct apic_chip_data *apicd; | ||||
| 	struct irq_data *irqd; | ||||
| 	unsigned int vector; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * The function is called for all descriptors regardless of which | ||||
| 	 * irqdomain they belong to. For example if an IRQ is provided by | ||||
| 	 * an irq_chip as part of a GPIO driver, the chip data for that | ||||
| 	 * descriptor is specific to the irq_chip in question. | ||||
| 	 * | ||||
| 	 * Check first that the chip_data is what we expect | ||||
| 	 * (apic_chip_data) before touching it any further. | ||||
| 	 */ | ||||
| 	irqd = irq_domain_get_irq_data(x86_vector_domain, | ||||
| 				       irq_desc_get_irq(desc)); | ||||
| 	if (!irqd) | ||||
| 		return; | ||||
| 
 | ||||
| 	raw_spin_lock(&vector_lock); | ||||
| 	apicd = apic_chip_data(irqd); | ||||
| 	if (!apicd) | ||||
| 		goto unlock; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * If prev_vector is empty or the descriptor is neither currently | ||||
| 	 * nor previously on the outgoing CPU no action required. | ||||
| 	 */ | ||||
| 	vector = apicd->prev_vector; | ||||
| 	if (!vector || (apicd->cpu != cpu && apicd->prev_cpu != cpu)) | ||||
| 		goto unlock; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * This is tricky. If the cleanup of the old vector has not been | ||||
| 	 * done yet, then the following setaffinity call will fail with | ||||
| 	 * -EBUSY. This can leave the interrupt in a stale state. | ||||
| 	 * | ||||
| 	 * All CPUs are stuck in stop machine with interrupts disabled so | ||||
| 	 * calling __irq_complete_move() would be completely pointless. | ||||
| 	 * | ||||
| 	 * 1) The interrupt is in move_in_progress state. That means that we | ||||
| 	 *    have not seen an interrupt since the io_apic was reprogrammed to | ||||
| 	 *    the new vector. | ||||
| 	 * | ||||
| 	 * 2) The interrupt has fired on the new vector, but the cleanup IPIs | ||||
| 	 *    have not been processed yet. | ||||
| 	 */ | ||||
| 	if (apicd->move_in_progress) { | ||||
| 		/*
 | ||||
| 		 * In theory there is a race: | ||||
| 		 * | ||||
| 		 * set_ioapic(new_vector) <-- Interrupt is raised before update | ||||
| 		 *			      is effective, i.e. it's raised on | ||||
| 		 *			      the old vector. | ||||
| 		 * | ||||
| 		 * So if the target cpu cannot handle that interrupt before | ||||
| 		 * the old vector is cleaned up, we get a spurious interrupt | ||||
| 		 * and in the worst case the ioapic irq line becomes stale. | ||||
| 		 * | ||||
| 		 * But in case of cpu hotplug this should be a non issue | ||||
| 		 * because if the affinity update happens right before all | ||||
| 		 * cpus rendezvous in stop machine, there is no way that the | ||||
| 		 * interrupt can be blocked on the target cpu because all cpus | ||||
| 		 * loops first with interrupts enabled in stop machine, so the | ||||
| 		 * old vector is not yet cleaned up when the interrupt fires. | ||||
| 		 * | ||||
| 		 * So the only way to run into this issue is if the delivery | ||||
| 		 * of the interrupt on the apic/system bus would be delayed | ||||
| 		 * beyond the point where the target cpu disables interrupts | ||||
| 		 * in stop machine. I doubt that it can happen, but at least | ||||
| 		 * there is a theoretical chance. Virtualization might be | ||||
| 		 * able to expose this, but AFAICT the IOAPIC emulation is not | ||||
| 		 * as stupid as the real hardware. | ||||
| 		 * | ||||
| 		 * Anyway, there is nothing we can do about that at this point | ||||
| 		 * w/o refactoring the whole fixup_irq() business completely. | ||||
| 		 * We print at least the irq number and the old vector number, | ||||
| 		 * so we have the necessary information when a problem in that | ||||
| 		 * area arises. | ||||
| 		 */ | ||||
| 		pr_warn("IRQ fixup: irq %d move in progress, old vector %d\n", | ||||
| 			irqd->irq, vector); | ||||
| 	} | ||||
| 	free_moved_vector(apicd); | ||||
| unlock: | ||||
| 	raw_spin_unlock(&vector_lock); | ||||
| } | ||||
| 
 | ||||
| #ifdef CONFIG_HOTPLUG_CPU | ||||
| /*
 | ||||
|  * Note, this is not accurate accounting, but at least good enough to | ||||
|  |  | |||
|  | @ -486,6 +486,7 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) | |||
|  * @ipi_send_mask:	send an IPI to destination cpus in cpumask | ||||
|  * @irq_nmi_setup:	function called from core code before enabling an NMI | ||||
|  * @irq_nmi_teardown:	function called from core code after disabling an NMI | ||||
|  * @irq_force_complete_move:	optional function to force complete pending irq move | ||||
|  * @flags:		chip specific flags | ||||
|  */ | ||||
| struct irq_chip { | ||||
|  | @ -537,6 +538,8 @@ struct irq_chip { | |||
| 	int		(*irq_nmi_setup)(struct irq_data *data); | ||||
| 	void		(*irq_nmi_teardown)(struct irq_data *data); | ||||
| 
 | ||||
| 	void		(*irq_force_complete_move)(struct irq_data *data); | ||||
| 
 | ||||
| 	unsigned long	flags; | ||||
| }; | ||||
| 
 | ||||
|  | @ -619,11 +622,9 @@ static inline void irq_move_irq(struct irq_data *data) | |||
| 		__irq_move_irq(data); | ||||
| } | ||||
| void irq_move_masked_irq(struct irq_data *data); | ||||
| void irq_force_complete_move(struct irq_desc *desc); | ||||
| #else | ||||
| static inline void irq_move_irq(struct irq_data *data) { } | ||||
| static inline void irq_move_masked_irq(struct irq_data *data) { } | ||||
| static inline void irq_force_complete_move(struct irq_desc *desc) { } | ||||
| #endif | ||||
| 
 | ||||
| extern int no_irq_affinity; | ||||
|  |  | |||
|  | @ -442,6 +442,7 @@ static inline struct cpumask *irq_desc_get_pending_mask(struct irq_desc *desc) | |||
| 	return desc->pending_mask; | ||||
| } | ||||
| bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear); | ||||
| void irq_force_complete_move(struct irq_desc *desc); | ||||
| #else /* CONFIG_GENERIC_PENDING_IRQ */ | ||||
| static inline bool irq_can_move_pcntxt(struct irq_data *data) | ||||
| { | ||||
|  | @ -467,6 +468,7 @@ static inline bool irq_fixup_move_pending(struct irq_desc *desc, bool fclear) | |||
| { | ||||
| 	return false; | ||||
| } | ||||
| static inline void irq_force_complete_move(struct irq_desc *desc) { } | ||||
| #endif /* !CONFIG_GENERIC_PENDING_IRQ */ | ||||
| 
 | ||||
| #if !defined(CONFIG_IRQ_DOMAIN) || !defined(CONFIG_IRQ_DOMAIN_HIERARCHY) | ||||
|  |  | |||
|  | @ -35,6 +35,16 @@ bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear) | |||
| 	return true; | ||||
| } | ||||
| 
 | ||||
| void irq_force_complete_move(struct irq_desc *desc) | ||||
| { | ||||
| 	for (struct irq_data *d = irq_desc_get_irq_data(desc); d; d = d->parent_data) { | ||||
| 		if (d->chip && d->chip->irq_force_complete_move) { | ||||
| 			d->chip->irq_force_complete_move(d); | ||||
| 			return; | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| void irq_move_masked_irq(struct irq_data *idata) | ||||
| { | ||||
| 	struct irq_desc *desc = irq_data_to_desc(idata); | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Thomas Gleixner
						Thomas Gleixner