mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-03 18:20:25 +02:00 
			
		
		
		
	Herd7 transforms successful RMW with Mb tags by inserting smp_mb() fences around them. We emulate this by considering imaginary po-edges before the RMW read and before the RMW write, and extending the smp_mb() ordering rule, which currently only applies to real po edges that would be found around a really inserted smp_mb(), also to cases of the only imagined po edges. Reported-by: Viktor Vafeiadis <viktor@mpi-sws.org> Suggested-by: Alan Stern <stern@rowland.harvard.edu> Signed-off-by: Jonas Oberhauser <jonas.oberhauser@huaweicloud.com> Signed-off-by: Paul E. McKenney <paulmck@kernel.org> Reviewed-by: Boqun Feng <boqun.feng@gmail.com> Tested-by: Boqun Feng <boqun.feng@gmail.com>
		
			
				
	
	
		
			226 lines
		
	
	
	
		
			8.2 KiB
		
	
	
	
		
			Text
		
	
	
	
	
	
			
		
		
	
	
			226 lines
		
	
	
	
		
			8.2 KiB
		
	
	
	
		
			Text
		
	
	
	
	
	
// SPDX-License-Identifier: GPL-2.0+
 | 
						|
(*
 | 
						|
 * Copyright (C) 2015 Jade Alglave <j.alglave@ucl.ac.uk>,
 | 
						|
 * Copyright (C) 2016 Luc Maranget <luc.maranget@inria.fr> for Inria
 | 
						|
 * Copyright (C) 2017 Alan Stern <stern@rowland.harvard.edu>,
 | 
						|
 *                    Andrea Parri <parri.andrea@gmail.com>
 | 
						|
 *
 | 
						|
 * An earlier version of this file appeared in the companion webpage for
 | 
						|
 * "Frightening small children and disconcerting grown-ups: Concurrency
 | 
						|
 * in the Linux kernel" by Alglave, Maranget, McKenney, Parri, and Stern,
 | 
						|
 * which appeared in ASPLOS 2018.
 | 
						|
 *)
 | 
						|
 | 
						|
"Linux-kernel memory consistency model"
 | 
						|
 | 
						|
(*
 | 
						|
 * File "lock.cat" handles locks and is experimental.
 | 
						|
 * It can be replaced by include "cos.cat" for tests that do not use locks.
 | 
						|
 *)
 | 
						|
 | 
						|
include "lock.cat"
 | 
						|
 | 
						|
(*******************)
 | 
						|
(* Basic relations *)
 | 
						|
(*******************)
 | 
						|
 | 
						|
(* Release Acquire *)
 | 
						|
let acq-po = [Acquire] ; po ; [M]
 | 
						|
let po-rel = [M] ; po ; [Release]
 | 
						|
let po-unlock-lock-po = po ; [UL] ; (po|rf) ; [LKR] ; po
 | 
						|
 | 
						|
(* Fences *)
 | 
						|
let R4rmb = R \ Noreturn	(* Reads for which rmb works *)
 | 
						|
let rmb = [R4rmb] ; fencerel(Rmb) ; [R4rmb]
 | 
						|
let wmb = [W] ; fencerel(Wmb) ; [W]
 | 
						|
let mb = ([M] ; fencerel(Mb) ; [M]) |
 | 
						|
	(*
 | 
						|
	 * full-barrier RMWs (successful cmpxchg(), xchg(), etc.) act as
 | 
						|
	 * though there were enclosed by smp_mb().
 | 
						|
	 * The effect of these virtual smp_mb() is formalized by adding
 | 
						|
	 * Mb tags to the read and write of the operation, and providing
 | 
						|
	 * the same ordering as though there were additional po edges
 | 
						|
	 * between the Mb tag and the read resp. write.
 | 
						|
	 *)
 | 
						|
	([M] ; po ; [Mb & R]) |
 | 
						|
	([Mb & W] ; po ; [M]) |
 | 
						|
	([M] ; fencerel(Before-atomic) ; [RMW] ; po? ; [M]) |
 | 
						|
	([M] ; po? ; [RMW] ; fencerel(After-atomic) ; [M]) |
 | 
						|
	([M] ; po? ; [LKW] ; fencerel(After-spinlock) ; [M]) |
 | 
						|
(*
 | 
						|
 * Note: The po-unlock-lock-po relation only passes the lock to the direct
 | 
						|
 * successor, perhaps giving the impression that the ordering of the
 | 
						|
 * smp_mb__after_unlock_lock() fence only affects a single lock handover.
 | 
						|
 * However, in a longer sequence of lock handovers, the implicit
 | 
						|
 * A-cumulative release fences of lock-release ensure that any stores that
 | 
						|
 * propagate to one of the involved CPUs before it hands over the lock to
 | 
						|
 * the next CPU will also propagate to the final CPU handing over the lock
 | 
						|
 * to the CPU that executes the fence.  Therefore, all those stores are
 | 
						|
 * also affected by the fence.
 | 
						|
 *)
 | 
						|
	([M] ; po-unlock-lock-po ;
 | 
						|
		[After-unlock-lock] ; po ; [M]) |
 | 
						|
	([M] ; po? ; [Srcu-unlock] ; fencerel(After-srcu-read-unlock) ; [M])
 | 
						|
let gp = po ; [Sync-rcu | Sync-srcu] ; po?
 | 
						|
let strong-fence = mb | gp
 | 
						|
 | 
						|
let nonrw-fence = strong-fence | po-rel | acq-po
 | 
						|
let fence = nonrw-fence | wmb | rmb
 | 
						|
let barrier = fencerel(Barrier | Rmb | Wmb | Mb | Sync-rcu | Sync-srcu |
 | 
						|
		Before-atomic | After-atomic | Acquire | Release |
 | 
						|
		Rcu-lock | Rcu-unlock | Srcu-lock | Srcu-unlock) |
 | 
						|
	(po ; [Release]) | ([Acquire] ; po)
 | 
						|
 | 
						|
(**********************************)
 | 
						|
(* Fundamental coherence ordering *)
 | 
						|
(**********************************)
 | 
						|
 | 
						|
(* Sequential Consistency Per Variable *)
 | 
						|
let com = rf | co | fr
 | 
						|
acyclic po-loc | com as coherence
 | 
						|
 | 
						|
(* Atomic Read-Modify-Write *)
 | 
						|
empty rmw & (fre ; coe) as atomic
 | 
						|
 | 
						|
(**********************************)
 | 
						|
(* Instruction execution ordering *)
 | 
						|
(**********************************)
 | 
						|
 | 
						|
(* Preserved Program Order *)
 | 
						|
let dep = addr | data
 | 
						|
let rwdep = (dep | ctrl) ; [W]
 | 
						|
let overwrite = co | fr
 | 
						|
let to-w = rwdep | (overwrite & int) | (addr ; [Plain] ; wmb)
 | 
						|
let to-r = (addr ; [R]) | (dep ; [Marked] ; rfi)
 | 
						|
let ppo = to-r | to-w | (fence & int) | (po-unlock-lock-po & int)
 | 
						|
 | 
						|
(* Propagation: Ordering from release operations and strong fences. *)
 | 
						|
let A-cumul(r) = (rfe ; [Marked])? ; r
 | 
						|
let rmw-sequence = (rf ; rmw)*
 | 
						|
let cumul-fence = [Marked] ; (A-cumul(strong-fence | po-rel) | wmb |
 | 
						|
	po-unlock-lock-po) ; [Marked] ; rmw-sequence
 | 
						|
let prop = [Marked] ; (overwrite & ext)? ; cumul-fence* ;
 | 
						|
	[Marked] ; rfe? ; [Marked]
 | 
						|
 | 
						|
(*
 | 
						|
 * Happens Before: Ordering from the passage of time.
 | 
						|
 * No fences needed here for prop because relation confined to one process.
 | 
						|
 *)
 | 
						|
let hb = [Marked] ; (ppo | rfe | ((prop \ id) & int)) ; [Marked]
 | 
						|
acyclic hb as happens-before
 | 
						|
 | 
						|
(****************************************)
 | 
						|
(* Write and fence propagation ordering *)
 | 
						|
(****************************************)
 | 
						|
 | 
						|
(* Propagation: Each non-rf link needs a strong fence. *)
 | 
						|
let pb = prop ; strong-fence ; hb* ; [Marked]
 | 
						|
acyclic pb as propagation
 | 
						|
 | 
						|
(*******)
 | 
						|
(* RCU *)
 | 
						|
(*******)
 | 
						|
 | 
						|
(*
 | 
						|
 * Effects of read-side critical sections proceed from the rcu_read_unlock()
 | 
						|
 * or srcu_read_unlock() backwards on the one hand, and from the
 | 
						|
 * rcu_read_lock() or srcu_read_lock() forwards on the other hand.
 | 
						|
 *
 | 
						|
 * In the definition of rcu-fence below, the po term at the left-hand side
 | 
						|
 * of each disjunct and the po? term at the right-hand end have been factored
 | 
						|
 * out.  They have been moved into the definitions of rcu-link and rb.
 | 
						|
 * This was necessary in order to apply the "& loc" tests correctly.
 | 
						|
 *)
 | 
						|
let rcu-gp = [Sync-rcu]		(* Compare with gp *)
 | 
						|
let srcu-gp = [Sync-srcu]
 | 
						|
let rcu-rscsi = rcu-rscs^-1
 | 
						|
let srcu-rscsi = srcu-rscs^-1
 | 
						|
 | 
						|
(*
 | 
						|
 * The synchronize_rcu() strong fence is special in that it can order not
 | 
						|
 * one but two non-rf relations, but only in conjunction with an RCU
 | 
						|
 * read-side critical section.
 | 
						|
 *)
 | 
						|
let rcu-link = po? ; hb* ; pb* ; prop ; po
 | 
						|
 | 
						|
(*
 | 
						|
 * Any sequence containing at least as many grace periods as RCU read-side
 | 
						|
 * critical sections (joined by rcu-link) induces order like a generalized
 | 
						|
 * inter-CPU strong fence.
 | 
						|
 * Likewise for SRCU grace periods and read-side critical sections, provided
 | 
						|
 * the synchronize_srcu() and srcu_read_[un]lock() calls refer to the same
 | 
						|
 * struct srcu_struct location.
 | 
						|
 *)
 | 
						|
let rec rcu-order = rcu-gp | srcu-gp |
 | 
						|
	(rcu-gp ; rcu-link ; rcu-rscsi) |
 | 
						|
	((srcu-gp ; rcu-link ; srcu-rscsi) & loc) |
 | 
						|
	(rcu-rscsi ; rcu-link ; rcu-gp) |
 | 
						|
	((srcu-rscsi ; rcu-link ; srcu-gp) & loc) |
 | 
						|
	(rcu-gp ; rcu-link ; rcu-order ; rcu-link ; rcu-rscsi) |
 | 
						|
	((srcu-gp ; rcu-link ; rcu-order ; rcu-link ; srcu-rscsi) & loc) |
 | 
						|
	(rcu-rscsi ; rcu-link ; rcu-order ; rcu-link ; rcu-gp) |
 | 
						|
	((srcu-rscsi ; rcu-link ; rcu-order ; rcu-link ; srcu-gp) & loc) |
 | 
						|
	(rcu-order ; rcu-link ; rcu-order)
 | 
						|
let rcu-fence = po ; rcu-order ; po?
 | 
						|
let fence = fence | rcu-fence
 | 
						|
let strong-fence = strong-fence | rcu-fence
 | 
						|
 | 
						|
(* rb orders instructions just as pb does *)
 | 
						|
let rb = prop ; rcu-fence ; hb* ; pb* ; [Marked]
 | 
						|
 | 
						|
irreflexive rb as rcu
 | 
						|
 | 
						|
(*
 | 
						|
 * The happens-before, propagation, and rcu constraints are all
 | 
						|
 * expressions of temporal ordering.  They could be replaced by
 | 
						|
 * a single constraint on an "executes-before" relation, xb:
 | 
						|
 *
 | 
						|
 * let xb = hb | pb | rb
 | 
						|
 * acyclic xb as executes-before
 | 
						|
 *)
 | 
						|
 | 
						|
(*********************************)
 | 
						|
(* Plain accesses and data races *)
 | 
						|
(*********************************)
 | 
						|
 | 
						|
(* Warn about plain writes and marked accesses in the same region *)
 | 
						|
let mixed-accesses = ([Plain & W] ; (po-loc \ barrier) ; [Marked]) |
 | 
						|
	([Marked] ; (po-loc \ barrier) ; [Plain & W])
 | 
						|
flag ~empty mixed-accesses as mixed-accesses
 | 
						|
 | 
						|
(* Executes-before and visibility *)
 | 
						|
let xbstar = (hb | pb | rb)*
 | 
						|
let vis = cumul-fence* ; rfe? ; [Marked] ;
 | 
						|
	((strong-fence ; [Marked] ; xbstar) | (xbstar & int))
 | 
						|
 | 
						|
(* Boundaries for lifetimes of plain accesses *)
 | 
						|
let w-pre-bounded = [Marked] ; (addr | fence)?
 | 
						|
let r-pre-bounded = [Marked] ; (addr | nonrw-fence |
 | 
						|
	([R4rmb] ; fencerel(Rmb) ; [~Noreturn]))?
 | 
						|
let w-post-bounded = fence? ; [Marked] ; rmw-sequence
 | 
						|
let r-post-bounded = (nonrw-fence | ([~Noreturn] ; fencerel(Rmb) ; [R4rmb]))? ;
 | 
						|
	[Marked]
 | 
						|
 | 
						|
(* Visibility and executes-before for plain accesses *)
 | 
						|
let ww-vis = fence | (strong-fence ; xbstar ; w-pre-bounded) |
 | 
						|
	(w-post-bounded ; vis ; w-pre-bounded)
 | 
						|
let wr-vis = fence | (strong-fence ; xbstar ; r-pre-bounded) |
 | 
						|
	(w-post-bounded ; vis ; r-pre-bounded)
 | 
						|
let rw-xbstar = fence | (r-post-bounded ; xbstar ; w-pre-bounded)
 | 
						|
 | 
						|
(* Potential races *)
 | 
						|
let pre-race = ext & ((Plain * M) | ((M \ IW) * Plain))
 | 
						|
 | 
						|
(* Coherence requirements for plain accesses *)
 | 
						|
let wr-incoh = pre-race & rf & rw-xbstar^-1
 | 
						|
let rw-incoh = pre-race & fr & wr-vis^-1
 | 
						|
let ww-incoh = pre-race & co & ww-vis^-1
 | 
						|
empty (wr-incoh | rw-incoh | ww-incoh) as plain-coherence
 | 
						|
 | 
						|
(* Actual races *)
 | 
						|
let ww-nonrace = ww-vis & ((Marked * W) | rw-xbstar) & ((W * Marked) | wr-vis)
 | 
						|
let ww-race = (pre-race & co) \ ww-nonrace
 | 
						|
let wr-race = (pre-race & (co? ; rf)) \ wr-vis \ rw-xbstar^-1
 | 
						|
let rw-race = (pre-race & fr) \ rw-xbstar
 | 
						|
 | 
						|
flag ~empty (ww-race | wr-race | rw-race) as data-race
 |