mirror of
				https://github.com/torvalds/linux.git
				synced 2025-10-31 16:48:26 +02:00 
			
		
		
		
	genirq: Introduce common irq_force_complete_move() implementation
CONFIG_GENERIC_PENDING_IRQ requires an architecture specific implementation of irq_force_complete_move() for CPU hotplug. At the moment, only x86 implements this unconditionally, but for RISC-V irq_force_complete_move() is only needed when the RISC-V IMSIC driver is in use and not needed otherwise. To allow runtime configuration of this mechanism, introduce a common irq_force_complete_move() implementation in the interrupt core code, which only invokes the completion function, when a interrupt chip in the hierarchy implements it. Switch X86 over to the new mechanism. No functional change intended. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Anup Patel <apatel@ventanamicro.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Link: https://lore.kernel.org/all/20250217085657.789309-5-apatel@ventanamicro.com
This commit is contained in:
		
							parent
							
								
									fe35ecee8e
								
							
						
					
					
						commit
						751dc837da
					
				
					 4 changed files with 123 additions and 125 deletions
				
			
		|  | @ -888,8 +888,109 @@ static int apic_set_affinity(struct irq_data *irqd, | ||||||
| 	return err ? err : IRQ_SET_MASK_OK; | 	return err ? err : IRQ_SET_MASK_OK; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static void free_moved_vector(struct apic_chip_data *apicd) | ||||||
|  | { | ||||||
|  | 	unsigned int vector = apicd->prev_vector; | ||||||
|  | 	unsigned int cpu = apicd->prev_cpu; | ||||||
|  | 	bool managed = apicd->is_managed; | ||||||
|  | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * Managed interrupts are usually not migrated away | ||||||
|  | 	 * from an online CPU, but CPU isolation 'managed_irq' | ||||||
|  | 	 * can make that happen. | ||||||
|  | 	 * 1) Activation does not take the isolation into account | ||||||
|  | 	 *    to keep the code simple | ||||||
|  | 	 * 2) Migration away from an isolated CPU can happen when | ||||||
|  | 	 *    a non-isolated CPU which is in the calculated | ||||||
|  | 	 *    affinity mask comes online. | ||||||
|  | 	 */ | ||||||
|  | 	trace_vector_free_moved(apicd->irq, cpu, vector, managed); | ||||||
|  | 	irq_matrix_free(vector_matrix, cpu, vector, managed); | ||||||
|  | 	per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED; | ||||||
|  | 	hlist_del_init(&apicd->clist); | ||||||
|  | 	apicd->prev_vector = 0; | ||||||
|  | 	apicd->move_in_progress = 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Called from fixup_irqs() with @desc->lock held and interrupts disabled. | ||||||
|  |  */ | ||||||
|  | static void apic_force_complete_move(struct irq_data *irqd) | ||||||
|  | { | ||||||
|  | 	unsigned int cpu = smp_processor_id(); | ||||||
|  | 	struct apic_chip_data *apicd; | ||||||
|  | 	unsigned int vector; | ||||||
|  | 
 | ||||||
|  | 	guard(raw_spinlock)(&vector_lock); | ||||||
|  | 	apicd = apic_chip_data(irqd); | ||||||
|  | 	if (!apicd) | ||||||
|  | 		return; | ||||||
|  | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * If prev_vector is empty or the descriptor is neither currently | ||||||
|  | 	 * nor previously on the outgoing CPU no action required. | ||||||
|  | 	 */ | ||||||
|  | 	vector = apicd->prev_vector; | ||||||
|  | 	if (!vector || (apicd->cpu != cpu && apicd->prev_cpu != cpu)) | ||||||
|  | 		return; | ||||||
|  | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * This is tricky. If the cleanup of the old vector has not been | ||||||
|  | 	 * done yet, then the following setaffinity call will fail with | ||||||
|  | 	 * -EBUSY. This can leave the interrupt in a stale state. | ||||||
|  | 	 * | ||||||
|  | 	 * All CPUs are stuck in stop machine with interrupts disabled so | ||||||
|  | 	 * calling __irq_complete_move() would be completely pointless. | ||||||
|  | 	 * | ||||||
|  | 	 * 1) The interrupt is in move_in_progress state. That means that we | ||||||
|  | 	 *    have not seen an interrupt since the io_apic was reprogrammed to | ||||||
|  | 	 *    the new vector. | ||||||
|  | 	 * | ||||||
|  | 	 * 2) The interrupt has fired on the new vector, but the cleanup IPIs | ||||||
|  | 	 *    have not been processed yet. | ||||||
|  | 	 */ | ||||||
|  | 	if (apicd->move_in_progress) { | ||||||
|  | 		/*
 | ||||||
|  | 		 * In theory there is a race: | ||||||
|  | 		 * | ||||||
|  | 		 * set_ioapic(new_vector) <-- Interrupt is raised before update | ||||||
|  | 		 *			      is effective, i.e. it's raised on | ||||||
|  | 		 *			      the old vector. | ||||||
|  | 		 * | ||||||
|  | 		 * So if the target cpu cannot handle that interrupt before | ||||||
|  | 		 * the old vector is cleaned up, we get a spurious interrupt | ||||||
|  | 		 * and in the worst case the ioapic irq line becomes stale. | ||||||
|  | 		 * | ||||||
|  | 		 * But in case of cpu hotplug this should be a non issue | ||||||
|  | 		 * because if the affinity update happens right before all | ||||||
|  | 		 * cpus rendezvous in stop machine, there is no way that the | ||||||
|  | 		 * interrupt can be blocked on the target cpu because all cpus | ||||||
|  | 		 * loops first with interrupts enabled in stop machine, so the | ||||||
|  | 		 * old vector is not yet cleaned up when the interrupt fires. | ||||||
|  | 		 * | ||||||
|  | 		 * So the only way to run into this issue is if the delivery | ||||||
|  | 		 * of the interrupt on the apic/system bus would be delayed | ||||||
|  | 		 * beyond the point where the target cpu disables interrupts | ||||||
|  | 		 * in stop machine. I doubt that it can happen, but at least | ||||||
|  | 		 * there is a theoretical chance. Virtualization might be | ||||||
|  | 		 * able to expose this, but AFAICT the IOAPIC emulation is not | ||||||
|  | 		 * as stupid as the real hardware. | ||||||
|  | 		 * | ||||||
|  | 		 * Anyway, there is nothing we can do about that at this point | ||||||
|  | 		 * w/o refactoring the whole fixup_irq() business completely. | ||||||
|  | 		 * We print at least the irq number and the old vector number, | ||||||
|  | 		 * so we have the necessary information when a problem in that | ||||||
|  | 		 * area arises. | ||||||
|  | 		 */ | ||||||
|  | 		pr_warn("IRQ fixup: irq %d move in progress, old vector %d\n", | ||||||
|  | 			irqd->irq, vector); | ||||||
|  | 	} | ||||||
|  | 	free_moved_vector(apicd); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| #else | #else | ||||||
| # define apic_set_affinity	NULL | # define apic_set_affinity		NULL | ||||||
|  | # define apic_force_complete_move	NULL | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
| static int apic_retrigger_irq(struct irq_data *irqd) | static int apic_retrigger_irq(struct irq_data *irqd) | ||||||
|  | @ -923,39 +1024,16 @@ static void x86_vector_msi_compose_msg(struct irq_data *data, | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static struct irq_chip lapic_controller = { | static struct irq_chip lapic_controller = { | ||||||
| 	.name			= "APIC", | 	.name				= "APIC", | ||||||
| 	.irq_ack		= apic_ack_edge, | 	.irq_ack			= apic_ack_edge, | ||||||
| 	.irq_set_affinity	= apic_set_affinity, | 	.irq_set_affinity		= apic_set_affinity, | ||||||
| 	.irq_compose_msi_msg	= x86_vector_msi_compose_msg, | 	.irq_compose_msi_msg		= x86_vector_msi_compose_msg, | ||||||
| 	.irq_retrigger		= apic_retrigger_irq, | 	.irq_force_complete_move	= apic_force_complete_move, | ||||||
|  | 	.irq_retrigger			= apic_retrigger_irq, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| #ifdef CONFIG_SMP | #ifdef CONFIG_SMP | ||||||
| 
 | 
 | ||||||
| static void free_moved_vector(struct apic_chip_data *apicd) |  | ||||||
| { |  | ||||||
| 	unsigned int vector = apicd->prev_vector; |  | ||||||
| 	unsigned int cpu = apicd->prev_cpu; |  | ||||||
| 	bool managed = apicd->is_managed; |  | ||||||
| 
 |  | ||||||
| 	/*
 |  | ||||||
| 	 * Managed interrupts are usually not migrated away |  | ||||||
| 	 * from an online CPU, but CPU isolation 'managed_irq' |  | ||||||
| 	 * can make that happen. |  | ||||||
| 	 * 1) Activation does not take the isolation into account |  | ||||||
| 	 *    to keep the code simple |  | ||||||
| 	 * 2) Migration away from an isolated CPU can happen when |  | ||||||
| 	 *    a non-isolated CPU which is in the calculated |  | ||||||
| 	 *    affinity mask comes online. |  | ||||||
| 	 */ |  | ||||||
| 	trace_vector_free_moved(apicd->irq, cpu, vector, managed); |  | ||||||
| 	irq_matrix_free(vector_matrix, cpu, vector, managed); |  | ||||||
| 	per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED; |  | ||||||
| 	hlist_del_init(&apicd->clist); |  | ||||||
| 	apicd->prev_vector = 0; |  | ||||||
| 	apicd->move_in_progress = 0; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static void __vector_cleanup(struct vector_cleanup *cl, bool check_irr) | static void __vector_cleanup(struct vector_cleanup *cl, bool check_irr) | ||||||
| { | { | ||||||
| 	struct apic_chip_data *apicd; | 	struct apic_chip_data *apicd; | ||||||
|  | @ -1068,99 +1146,6 @@ void irq_complete_move(struct irq_cfg *cfg) | ||||||
| 		__vector_schedule_cleanup(apicd); | 		__vector_schedule_cleanup(apicd); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*
 |  | ||||||
|  * Called from fixup_irqs() with @desc->lock held and interrupts disabled. |  | ||||||
|  */ |  | ||||||
| void irq_force_complete_move(struct irq_desc *desc) |  | ||||||
| { |  | ||||||
| 	unsigned int cpu = smp_processor_id(); |  | ||||||
| 	struct apic_chip_data *apicd; |  | ||||||
| 	struct irq_data *irqd; |  | ||||||
| 	unsigned int vector; |  | ||||||
| 
 |  | ||||||
| 	/*
 |  | ||||||
| 	 * The function is called for all descriptors regardless of which |  | ||||||
| 	 * irqdomain they belong to. For example if an IRQ is provided by |  | ||||||
| 	 * an irq_chip as part of a GPIO driver, the chip data for that |  | ||||||
| 	 * descriptor is specific to the irq_chip in question. |  | ||||||
| 	 * |  | ||||||
| 	 * Check first that the chip_data is what we expect |  | ||||||
| 	 * (apic_chip_data) before touching it any further. |  | ||||||
| 	 */ |  | ||||||
| 	irqd = irq_domain_get_irq_data(x86_vector_domain, |  | ||||||
| 				       irq_desc_get_irq(desc)); |  | ||||||
| 	if (!irqd) |  | ||||||
| 		return; |  | ||||||
| 
 |  | ||||||
| 	raw_spin_lock(&vector_lock); |  | ||||||
| 	apicd = apic_chip_data(irqd); |  | ||||||
| 	if (!apicd) |  | ||||||
| 		goto unlock; |  | ||||||
| 
 |  | ||||||
| 	/*
 |  | ||||||
| 	 * If prev_vector is empty or the descriptor is neither currently |  | ||||||
| 	 * nor previously on the outgoing CPU no action required. |  | ||||||
| 	 */ |  | ||||||
| 	vector = apicd->prev_vector; |  | ||||||
| 	if (!vector || (apicd->cpu != cpu && apicd->prev_cpu != cpu)) |  | ||||||
| 		goto unlock; |  | ||||||
| 
 |  | ||||||
| 	/*
 |  | ||||||
| 	 * This is tricky. If the cleanup of the old vector has not been |  | ||||||
| 	 * done yet, then the following setaffinity call will fail with |  | ||||||
| 	 * -EBUSY. This can leave the interrupt in a stale state. |  | ||||||
| 	 * |  | ||||||
| 	 * All CPUs are stuck in stop machine with interrupts disabled so |  | ||||||
| 	 * calling __irq_complete_move() would be completely pointless. |  | ||||||
| 	 * |  | ||||||
| 	 * 1) The interrupt is in move_in_progress state. That means that we |  | ||||||
| 	 *    have not seen an interrupt since the io_apic was reprogrammed to |  | ||||||
| 	 *    the new vector. |  | ||||||
| 	 * |  | ||||||
| 	 * 2) The interrupt has fired on the new vector, but the cleanup IPIs |  | ||||||
| 	 *    have not been processed yet. |  | ||||||
| 	 */ |  | ||||||
| 	if (apicd->move_in_progress) { |  | ||||||
| 		/*
 |  | ||||||
| 		 * In theory there is a race: |  | ||||||
| 		 * |  | ||||||
| 		 * set_ioapic(new_vector) <-- Interrupt is raised before update |  | ||||||
| 		 *			      is effective, i.e. it's raised on |  | ||||||
| 		 *			      the old vector. |  | ||||||
| 		 * |  | ||||||
| 		 * So if the target cpu cannot handle that interrupt before |  | ||||||
| 		 * the old vector is cleaned up, we get a spurious interrupt |  | ||||||
| 		 * and in the worst case the ioapic irq line becomes stale. |  | ||||||
| 		 * |  | ||||||
| 		 * But in case of cpu hotplug this should be a non issue |  | ||||||
| 		 * because if the affinity update happens right before all |  | ||||||
| 		 * cpus rendezvous in stop machine, there is no way that the |  | ||||||
| 		 * interrupt can be blocked on the target cpu because all cpus |  | ||||||
| 		 * loops first with interrupts enabled in stop machine, so the |  | ||||||
| 		 * old vector is not yet cleaned up when the interrupt fires. |  | ||||||
| 		 * |  | ||||||
| 		 * So the only way to run into this issue is if the delivery |  | ||||||
| 		 * of the interrupt on the apic/system bus would be delayed |  | ||||||
| 		 * beyond the point where the target cpu disables interrupts |  | ||||||
| 		 * in stop machine. I doubt that it can happen, but at least |  | ||||||
| 		 * there is a theoretical chance. Virtualization might be |  | ||||||
| 		 * able to expose this, but AFAICT the IOAPIC emulation is not |  | ||||||
| 		 * as stupid as the real hardware. |  | ||||||
| 		 * |  | ||||||
| 		 * Anyway, there is nothing we can do about that at this point |  | ||||||
| 		 * w/o refactoring the whole fixup_irq() business completely. |  | ||||||
| 		 * We print at least the irq number and the old vector number, |  | ||||||
| 		 * so we have the necessary information when a problem in that |  | ||||||
| 		 * area arises. |  | ||||||
| 		 */ |  | ||||||
| 		pr_warn("IRQ fixup: irq %d move in progress, old vector %d\n", |  | ||||||
| 			irqd->irq, vector); |  | ||||||
| 	} |  | ||||||
| 	free_moved_vector(apicd); |  | ||||||
| unlock: |  | ||||||
| 	raw_spin_unlock(&vector_lock); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| #ifdef CONFIG_HOTPLUG_CPU | #ifdef CONFIG_HOTPLUG_CPU | ||||||
| /*
 | /*
 | ||||||
|  * Note, this is not accurate accounting, but at least good enough to |  * Note, this is not accurate accounting, but at least good enough to | ||||||
|  |  | ||||||
|  | @ -486,6 +486,7 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) | ||||||
|  * @ipi_send_mask:	send an IPI to destination cpus in cpumask |  * @ipi_send_mask:	send an IPI to destination cpus in cpumask | ||||||
|  * @irq_nmi_setup:	function called from core code before enabling an NMI |  * @irq_nmi_setup:	function called from core code before enabling an NMI | ||||||
|  * @irq_nmi_teardown:	function called from core code after disabling an NMI |  * @irq_nmi_teardown:	function called from core code after disabling an NMI | ||||||
|  |  * @irq_force_complete_move:	optional function to force complete pending irq move | ||||||
|  * @flags:		chip specific flags |  * @flags:		chip specific flags | ||||||
|  */ |  */ | ||||||
| struct irq_chip { | struct irq_chip { | ||||||
|  | @ -537,6 +538,8 @@ struct irq_chip { | ||||||
| 	int		(*irq_nmi_setup)(struct irq_data *data); | 	int		(*irq_nmi_setup)(struct irq_data *data); | ||||||
| 	void		(*irq_nmi_teardown)(struct irq_data *data); | 	void		(*irq_nmi_teardown)(struct irq_data *data); | ||||||
| 
 | 
 | ||||||
|  | 	void		(*irq_force_complete_move)(struct irq_data *data); | ||||||
|  | 
 | ||||||
| 	unsigned long	flags; | 	unsigned long	flags; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | @ -619,11 +622,9 @@ static inline void irq_move_irq(struct irq_data *data) | ||||||
| 		__irq_move_irq(data); | 		__irq_move_irq(data); | ||||||
| } | } | ||||||
| void irq_move_masked_irq(struct irq_data *data); | void irq_move_masked_irq(struct irq_data *data); | ||||||
| void irq_force_complete_move(struct irq_desc *desc); |  | ||||||
| #else | #else | ||||||
| static inline void irq_move_irq(struct irq_data *data) { } | static inline void irq_move_irq(struct irq_data *data) { } | ||||||
| static inline void irq_move_masked_irq(struct irq_data *data) { } | static inline void irq_move_masked_irq(struct irq_data *data) { } | ||||||
| static inline void irq_force_complete_move(struct irq_desc *desc) { } |  | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
| extern int no_irq_affinity; | extern int no_irq_affinity; | ||||||
|  |  | ||||||
|  | @ -442,6 +442,7 @@ static inline struct cpumask *irq_desc_get_pending_mask(struct irq_desc *desc) | ||||||
| 	return desc->pending_mask; | 	return desc->pending_mask; | ||||||
| } | } | ||||||
| bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear); | bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear); | ||||||
|  | void irq_force_complete_move(struct irq_desc *desc); | ||||||
| #else /* CONFIG_GENERIC_PENDING_IRQ */ | #else /* CONFIG_GENERIC_PENDING_IRQ */ | ||||||
| static inline bool irq_can_move_pcntxt(struct irq_data *data) | static inline bool irq_can_move_pcntxt(struct irq_data *data) | ||||||
| { | { | ||||||
|  | @ -467,6 +468,7 @@ static inline bool irq_fixup_move_pending(struct irq_desc *desc, bool fclear) | ||||||
| { | { | ||||||
| 	return false; | 	return false; | ||||||
| } | } | ||||||
|  | static inline void irq_force_complete_move(struct irq_desc *desc) { } | ||||||
| #endif /* !CONFIG_GENERIC_PENDING_IRQ */ | #endif /* !CONFIG_GENERIC_PENDING_IRQ */ | ||||||
| 
 | 
 | ||||||
| #if !defined(CONFIG_IRQ_DOMAIN) || !defined(CONFIG_IRQ_DOMAIN_HIERARCHY) | #if !defined(CONFIG_IRQ_DOMAIN) || !defined(CONFIG_IRQ_DOMAIN_HIERARCHY) | ||||||
|  |  | ||||||
|  | @ -35,6 +35,16 @@ bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear) | ||||||
| 	return true; | 	return true; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | void irq_force_complete_move(struct irq_desc *desc) | ||||||
|  | { | ||||||
|  | 	for (struct irq_data *d = irq_desc_get_irq_data(desc); d; d = d->parent_data) { | ||||||
|  | 		if (d->chip && d->chip->irq_force_complete_move) { | ||||||
|  | 			d->chip->irq_force_complete_move(d); | ||||||
|  | 			return; | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
| void irq_move_masked_irq(struct irq_data *idata) | void irq_move_masked_irq(struct irq_data *idata) | ||||||
| { | { | ||||||
| 	struct irq_desc *desc = irq_data_to_desc(idata); | 	struct irq_desc *desc = irq_data_to_desc(idata); | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Thomas Gleixner
						Thomas Gleixner