mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	dm: add clone target
Add the dm-clone target, which allows cloning of arbitrary block devices. dm-clone produces a one-to-one copy of an existing, read-only source device into a writable destination device: It presents a virtual block device which makes all data appear immediately, and redirects reads and writes accordingly. The main use case of dm-clone is to clone a potentially remote, high-latency, read-only, archival-type block device into a writable, fast, primary-type device for fast, low-latency I/O. The cloned device is visible/mountable immediately and the copy of the source device to the destination device happens in the background, in parallel with user I/O. When the cloning completes, the dm-clone table can be removed altogether and be replaced, e.g., by a linear table, mapping directly to the destination device. For further information and examples of how to use dm-clone, please read Documentation/admin-guide/device-mapper/dm-clone.rst Suggested-by: Vangelis Koukis <vkoukis@arrikto.com> Co-developed-by: Ilias Tsitsimpis <iliastsi@arrikto.com> Signed-off-by: Ilias Tsitsimpis <iliastsi@arrikto.com> Signed-off-by: Nikos Tsironis <ntsironis@arrikto.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
This commit is contained in:
		
							parent
							
								
									c8156fc77d
								
							
						
					
					
						commit
						7431b7835f
					
				
					 6 changed files with 3662 additions and 0 deletions
				
			
		
							
								
								
									
										333
									
								
								Documentation/admin-guide/device-mapper/dm-clone.rst
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										333
									
								
								Documentation/admin-guide/device-mapper/dm-clone.rst
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,333 @@
 | 
				
			||||||
 | 
					.. SPDX-License-Identifier: GPL-2.0-only
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					========
 | 
				
			||||||
 | 
					dm-clone
 | 
				
			||||||
 | 
					========
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Introduction
 | 
				
			||||||
 | 
					============
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					dm-clone is a device mapper target which produces a one-to-one copy of an
 | 
				
			||||||
 | 
					existing, read-only source device into a writable destination device: It
 | 
				
			||||||
 | 
					presents a virtual block device which makes all data appear immediately, and
 | 
				
			||||||
 | 
					redirects reads and writes accordingly.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The main use case of dm-clone is to clone a potentially remote, high-latency,
 | 
				
			||||||
 | 
					read-only, archival-type block device into a writable, fast, primary-type device
 | 
				
			||||||
 | 
					for fast, low-latency I/O. The cloned device is visible/mountable immediately
 | 
				
			||||||
 | 
					and the copy of the source device to the destination device happens in the
 | 
				
			||||||
 | 
					background, in parallel with user I/O.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					For example, one could restore an application backup from a read-only copy,
 | 
				
			||||||
 | 
					accessible through a network storage protocol (NBD, Fibre Channel, iSCSI, AoE,
 | 
				
			||||||
 | 
					etc.), into a local SSD or NVMe device, and start using the device immediately,
 | 
				
			||||||
 | 
					without waiting for the restore to complete.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					When the cloning completes, the dm-clone table can be removed altogether and be
 | 
				
			||||||
 | 
					replaced, e.g., by a linear table, mapping directly to the destination device.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The dm-clone target reuses the metadata library used by the thin-provisioning
 | 
				
			||||||
 | 
					target.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Glossary
 | 
				
			||||||
 | 
					========
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   Hydration
 | 
				
			||||||
 | 
					     The process of filling a region of the destination device with data from
 | 
				
			||||||
 | 
					     the same region of the source device, i.e., copying the region from the
 | 
				
			||||||
 | 
					     source to the destination device.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Once a region gets hydrated we redirect all I/O regarding it to the destination
 | 
				
			||||||
 | 
					device.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Design
 | 
				
			||||||
 | 
					======
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Sub-devices
 | 
				
			||||||
 | 
					-----------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The target is constructed by passing three devices to it (along with other
 | 
				
			||||||
 | 
					parameters detailed later):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					1. A source device - the read-only device that gets cloned and source of the
 | 
				
			||||||
 | 
					   hydration.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					2. A destination device - the destination of the hydration, which will become a
 | 
				
			||||||
 | 
					   clone of the source device.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					3. A small metadata device - it records which regions are already valid in the
 | 
				
			||||||
 | 
					   destination device, i.e., which regions have already been hydrated, or have
 | 
				
			||||||
 | 
					   been written to directly, via user I/O.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The size of the destination device must be at least equal to the size of the
 | 
				
			||||||
 | 
					source device.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Regions
 | 
				
			||||||
 | 
					-------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					dm-clone divides the source and destination devices in fixed sized regions.
 | 
				
			||||||
 | 
					Regions are the unit of hydration, i.e., the minimum amount of data copied from
 | 
				
			||||||
 | 
					the source to the destination device.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The region size is configurable when you first create the dm-clone device. The
 | 
				
			||||||
 | 
					recommended region size is the same as the file system block size, which usually
 | 
				
			||||||
 | 
					is 4KB. The region size must be between 8 sectors (4KB) and 2097152 sectors
 | 
				
			||||||
 | 
					(1GB) and a power of two.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Reads and writes from/to hydrated regions are serviced from the destination
 | 
				
			||||||
 | 
					device.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					A read to a not yet hydrated region is serviced directly from the source device.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					A write to a not yet hydrated region will be delayed until the corresponding
 | 
				
			||||||
 | 
					region has been hydrated and the hydration of the region starts immediately.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Note that a write request with size equal to region size will skip copying of
 | 
				
			||||||
 | 
					the corresponding region from the source device and overwrite the region of the
 | 
				
			||||||
 | 
					destination device directly.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Discards
 | 
				
			||||||
 | 
					--------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					dm-clone interprets a discard request to a range that hasn't been hydrated yet
 | 
				
			||||||
 | 
					as a hint to skip hydration of the regions covered by the request, i.e., it
 | 
				
			||||||
 | 
					skips copying the region's data from the source to the destination device, and
 | 
				
			||||||
 | 
					only updates its metadata.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					If the destination device supports discards, then by default dm-clone will pass
 | 
				
			||||||
 | 
					down discard requests to it.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Background Hydration
 | 
				
			||||||
 | 
					--------------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					dm-clone copies continuously from the source to the destination device, until
 | 
				
			||||||
 | 
					all of the device has been copied.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Copying data from the source to the destination device uses bandwidth. The user
 | 
				
			||||||
 | 
					can set a throttle to prevent more than a certain amount of copying occurring at
 | 
				
			||||||
 | 
					any one time. Moreover, dm-clone takes into account user I/O traffic going to
 | 
				
			||||||
 | 
					the devices and pauses the background hydration when there is I/O in-flight.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					A message `hydration_threshold <#regions>` can be used to set the maximum number
 | 
				
			||||||
 | 
					of regions being copied, the default being 1 region.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					dm-clone employs dm-kcopyd for copying portions of the source device to the
 | 
				
			||||||
 | 
					destination device. By default, we issue copy requests of size equal to the
 | 
				
			||||||
 | 
					region size. A message `hydration_batch_size <#regions>` can be used to tune the
 | 
				
			||||||
 | 
					size of these copy requests. Increasing the hydration batch size results in
 | 
				
			||||||
 | 
					dm-clone trying to batch together contiguous regions, so we copy the data in
 | 
				
			||||||
 | 
					batches of this many regions.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					When the hydration of the destination device finishes, a dm event will be sent
 | 
				
			||||||
 | 
					to user space.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Updating on-disk metadata
 | 
				
			||||||
 | 
					-------------------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					On-disk metadata is committed every time a FLUSH or FUA bio is written. If no
 | 
				
			||||||
 | 
					such requests are made then commits will occur every second. This means the
 | 
				
			||||||
 | 
					dm-clone device behaves like a physical disk that has a volatile write cache. If
 | 
				
			||||||
 | 
					power is lost you may lose some recent writes. The metadata should always be
 | 
				
			||||||
 | 
					consistent in spite of any crash.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Target Interface
 | 
				
			||||||
 | 
					================
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Constructor
 | 
				
			||||||
 | 
					-----------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ::
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   clone <metadata dev> <destination dev> <source dev> <region size>
 | 
				
			||||||
 | 
					         [<#feature args> [<feature arg>]* [<#core args> [<core arg>]*]]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 ================ ==============================================================
 | 
				
			||||||
 | 
					 metadata dev     Fast device holding the persistent metadata
 | 
				
			||||||
 | 
					 destination dev  The destination device, where the source will be cloned
 | 
				
			||||||
 | 
					 source dev       Read only device containing the data that gets cloned
 | 
				
			||||||
 | 
					 region size      The size of a region in sectors
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 #feature args    Number of feature arguments passed
 | 
				
			||||||
 | 
					 feature args     no_hydration or no_discard_passdown
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 #core args       An even number of arguments corresponding to key/value pairs
 | 
				
			||||||
 | 
					                  passed to dm-clone
 | 
				
			||||||
 | 
					 core args        Key/value pairs passed to dm-clone, e.g. `hydration_threshold
 | 
				
			||||||
 | 
					                  256`
 | 
				
			||||||
 | 
					 ================ ==============================================================
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Optional feature arguments are:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 ==================== =========================================================
 | 
				
			||||||
 | 
					 no_hydration         Create a dm-clone instance with background hydration
 | 
				
			||||||
 | 
					                      disabled
 | 
				
			||||||
 | 
					 no_discard_passdown  Disable passing down discards to the destination device
 | 
				
			||||||
 | 
					 ==================== =========================================================
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Optional core arguments are:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 ================================ ==============================================
 | 
				
			||||||
 | 
					 hydration_threshold <#regions>   Maximum number of regions being copied from
 | 
				
			||||||
 | 
					                                  the source to the destination device at any
 | 
				
			||||||
 | 
					                                  one time, during background hydration.
 | 
				
			||||||
 | 
					 hydration_batch_size <#regions>  During background hydration, try to batch
 | 
				
			||||||
 | 
					                                  together contiguous regions, so we copy data
 | 
				
			||||||
 | 
					                                  from the source to the destination device in
 | 
				
			||||||
 | 
					                                  batches of this many regions.
 | 
				
			||||||
 | 
					 ================================ ==============================================
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Status
 | 
				
			||||||
 | 
					------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ::
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   <metadata block size> <#used metadata blocks>/<#total metadata blocks>
 | 
				
			||||||
 | 
					   <region size> <#hydrated regions>/<#total regions> <#hydrating regions>
 | 
				
			||||||
 | 
					   <#feature args> <feature args>* <#core args> <core args>*
 | 
				
			||||||
 | 
					   <clone metadata mode>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 ======================= =======================================================
 | 
				
			||||||
 | 
					 metadata block size     Fixed block size for each metadata block in sectors
 | 
				
			||||||
 | 
					 #used metadata blocks   Number of metadata blocks used
 | 
				
			||||||
 | 
					 #total metadata blocks  Total number of metadata blocks
 | 
				
			||||||
 | 
					 region size             Configurable region size for the device in sectors
 | 
				
			||||||
 | 
					 #hydrated regions       Number of regions that have finished hydrating
 | 
				
			||||||
 | 
					 #total regions          Total number of regions to hydrate
 | 
				
			||||||
 | 
					 #hydrating regions      Number of regions currently hydrating
 | 
				
			||||||
 | 
					 #feature args           Number of feature arguments to follow
 | 
				
			||||||
 | 
					 feature args            Feature arguments, e.g. `no_hydration`
 | 
				
			||||||
 | 
					 #core args              Even number of core arguments to follow
 | 
				
			||||||
 | 
					 core args               Key/value pairs for tuning the core, e.g.
 | 
				
			||||||
 | 
					                         `hydration_threshold 256`
 | 
				
			||||||
 | 
					 clone metadata mode     ro if read-only, rw if read-write
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                         In serious cases where even a read-only mode is deemed
 | 
				
			||||||
 | 
					                         unsafe no further I/O will be permitted and the status
 | 
				
			||||||
 | 
					                         will just contain the string 'Fail'. If the metadata
 | 
				
			||||||
 | 
					                         mode changes, a dm event will be sent to user space.
 | 
				
			||||||
 | 
					 ======================= =======================================================
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Messages
 | 
				
			||||||
 | 
					--------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  `disable_hydration`
 | 
				
			||||||
 | 
					      Disable the background hydration of the destination device.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  `enable_hydration`
 | 
				
			||||||
 | 
					      Enable the background hydration of the destination device.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  `hydration_threshold <#regions>`
 | 
				
			||||||
 | 
					      Set background hydration threshold.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  `hydration_batch_size <#regions>`
 | 
				
			||||||
 | 
					      Set background hydration batch size.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Examples
 | 
				
			||||||
 | 
					========
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Clone a device containing a file system
 | 
				
			||||||
 | 
					---------------------------------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					1. Create the dm-clone device.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   ::
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    dmsetup create clone --table "0 1048576000 clone $metadata_dev $dest_dev \
 | 
				
			||||||
 | 
					      $source_dev 8 1 no_hydration"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					2. Mount the device and trim the file system. dm-clone interprets the discards
 | 
				
			||||||
 | 
					   sent by the file system and it will not hydrate the unused space.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   ::
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    mount /dev/mapper/clone /mnt/cloned-fs
 | 
				
			||||||
 | 
					    fstrim /mnt/cloned-fs
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					3. Enable background hydration of the destination device.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   ::
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    dmsetup message clone 0 enable_hydration
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					4. When the hydration finishes, we can replace the dm-clone table with a linear
 | 
				
			||||||
 | 
					   table.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   ::
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    dmsetup suspend clone
 | 
				
			||||||
 | 
					    dmsetup load clone --table "0 1048576000 linear $dest_dev 0"
 | 
				
			||||||
 | 
					    dmsetup resume clone
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   The metadata device is no longer needed and can be safely discarded or reused
 | 
				
			||||||
 | 
					   for other purposes.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Known issues
 | 
				
			||||||
 | 
					============
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					1. We redirect reads, to not-yet-hydrated regions, to the source device. If
 | 
				
			||||||
 | 
					   reading the source device has high latency and the user repeatedly reads from
 | 
				
			||||||
 | 
					   the same regions, this behaviour could degrade performance. We should use
 | 
				
			||||||
 | 
					   these reads as hints to hydrate the relevant regions sooner. Currently, we
 | 
				
			||||||
 | 
					   rely on the page cache to cache these regions, so we hopefully don't end up
 | 
				
			||||||
 | 
					   reading them multiple times from the source device.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					2. Release in-core resources, i.e., the bitmaps tracking which regions are
 | 
				
			||||||
 | 
					   hydrated, after the hydration has finished.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					3. During background hydration, if we fail to read the source or write to the
 | 
				
			||||||
 | 
					   destination device, we print an error message, but the hydration process
 | 
				
			||||||
 | 
					   continues indefinitely, until it succeeds. We should stop the background
 | 
				
			||||||
 | 
					   hydration after a number of failures and emit a dm event for user space to
 | 
				
			||||||
 | 
					   notice.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Why not...?
 | 
				
			||||||
 | 
					===========
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					We explored the following alternatives before implementing dm-clone:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					1. Use dm-cache with cache size equal to the source device and implement a new
 | 
				
			||||||
 | 
					   cloning policy:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   * The resulting cache device is not a one-to-one mirror of the source device
 | 
				
			||||||
 | 
					     and thus we cannot remove the cache device once cloning completes.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   * dm-cache writes to the source device, which violates our requirement that
 | 
				
			||||||
 | 
					     the source device must be treated as read-only.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   * Caching is semantically different from cloning.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					2. Use dm-snapshot with a COW device equal to the source device:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   * dm-snapshot stores its metadata in the COW device, so the resulting device
 | 
				
			||||||
 | 
					     is not a one-to-one mirror of the source device.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   * No background copying mechanism.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   * dm-snapshot needs to commit its metadata whenever a pending exception
 | 
				
			||||||
 | 
					     completes, to ensure snapshot consistency. In the case of cloning, we don't
 | 
				
			||||||
 | 
					     need to be so strict and can rely on committing metadata every time a FLUSH
 | 
				
			||||||
 | 
					     or FUA bio is written, or periodically, like dm-thin and dm-cache do. This
 | 
				
			||||||
 | 
					     improves the performance significantly.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					3. Use dm-mirror: The mirror target has a background copying/mirroring
 | 
				
			||||||
 | 
					   mechanism, but it writes to all mirrors, thus violating our requirement that
 | 
				
			||||||
 | 
					   the source device must be treated as read-only.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					4. Use dm-thin's external snapshot functionality. This approach is the most
 | 
				
			||||||
 | 
					   promising among all alternatives, as the thinly-provisioned volume is a
 | 
				
			||||||
 | 
					   one-to-one mirror of the source device and handles reads and writes to
 | 
				
			||||||
 | 
					   un-provisioned/not-yet-cloned areas the same way as dm-clone does.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   Still:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   * There is no background copying mechanism, though one could be implemented.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   * Most importantly, we want to support arbitrary block devices as the
 | 
				
			||||||
 | 
					     destination of the cloning process and not restrict ourselves to
 | 
				
			||||||
 | 
					     thinly-provisioned volumes. Thin-provisioning has an inherent metadata
 | 
				
			||||||
 | 
					     overhead, for maintaining the thin volume mappings, which significantly
 | 
				
			||||||
 | 
					     degrades performance.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   Moreover, cloning a device shouldn't force the use of thin-provisioning. On
 | 
				
			||||||
 | 
					   the other hand, if we wish to use thin provisioning, we can just use a thin
 | 
				
			||||||
 | 
					   LV as dm-clone's destination device.
 | 
				
			||||||
| 
						 | 
					@ -347,6 +347,20 @@ config DM_ERA
 | 
				
			||||||
         over time.  Useful for maintaining cache coherency when using
 | 
					         over time.  Useful for maintaining cache coherency when using
 | 
				
			||||||
         vendor snapshots.
 | 
					         vendor snapshots.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					config DM_CLONE
 | 
				
			||||||
 | 
					       tristate "Clone target (EXPERIMENTAL)"
 | 
				
			||||||
 | 
					       depends on BLK_DEV_DM
 | 
				
			||||||
 | 
					       default n
 | 
				
			||||||
 | 
					       select DM_PERSISTENT_DATA
 | 
				
			||||||
 | 
					       ---help---
 | 
				
			||||||
 | 
					         dm-clone produces a one-to-one copy of an existing, read-only source
 | 
				
			||||||
 | 
					         device into a writable destination device. The cloned device is
 | 
				
			||||||
 | 
					         visible/mountable immediately and the copy of the source device to the
 | 
				
			||||||
 | 
					         destination device happens in the background, in parallel with user
 | 
				
			||||||
 | 
					         I/O.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					         If unsure, say N.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
config DM_MIRROR
 | 
					config DM_MIRROR
 | 
				
			||||||
       tristate "Mirror target"
 | 
					       tristate "Mirror target"
 | 
				
			||||||
       depends on BLK_DEV_DM
 | 
					       depends on BLK_DEV_DM
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -18,6 +18,7 @@ dm-cache-y	+= dm-cache-target.o dm-cache-metadata.o dm-cache-policy.o \
 | 
				
			||||||
		    dm-cache-background-tracker.o
 | 
							    dm-cache-background-tracker.o
 | 
				
			||||||
dm-cache-smq-y   += dm-cache-policy-smq.o
 | 
					dm-cache-smq-y   += dm-cache-policy-smq.o
 | 
				
			||||||
dm-era-y	+= dm-era-target.o
 | 
					dm-era-y	+= dm-era-target.o
 | 
				
			||||||
 | 
					dm-clone-y	+= dm-clone-target.o dm-clone-metadata.o
 | 
				
			||||||
dm-verity-y	+= dm-verity-target.o
 | 
					dm-verity-y	+= dm-verity-target.o
 | 
				
			||||||
md-mod-y	+= md.o md-bitmap.o
 | 
					md-mod-y	+= md.o md-bitmap.o
 | 
				
			||||||
raid456-y	+= raid5.o raid5-cache.o raid5-ppl.o
 | 
					raid456-y	+= raid5.o raid5-cache.o raid5-ppl.o
 | 
				
			||||||
| 
						 | 
					@ -65,6 +66,7 @@ obj-$(CONFIG_DM_VERITY)		+= dm-verity.o
 | 
				
			||||||
obj-$(CONFIG_DM_CACHE)		+= dm-cache.o
 | 
					obj-$(CONFIG_DM_CACHE)		+= dm-cache.o
 | 
				
			||||||
obj-$(CONFIG_DM_CACHE_SMQ)	+= dm-cache-smq.o
 | 
					obj-$(CONFIG_DM_CACHE_SMQ)	+= dm-cache-smq.o
 | 
				
			||||||
obj-$(CONFIG_DM_ERA)		+= dm-era.o
 | 
					obj-$(CONFIG_DM_ERA)		+= dm-era.o
 | 
				
			||||||
 | 
					obj-$(CONFIG_DM_CLONE)		+= dm-clone.o
 | 
				
			||||||
obj-$(CONFIG_DM_LOG_WRITES)	+= dm-log-writes.o
 | 
					obj-$(CONFIG_DM_LOG_WRITES)	+= dm-log-writes.o
 | 
				
			||||||
obj-$(CONFIG_DM_INTEGRITY)	+= dm-integrity.o
 | 
					obj-$(CONFIG_DM_INTEGRITY)	+= dm-integrity.o
 | 
				
			||||||
obj-$(CONFIG_DM_ZONED)		+= dm-zoned.o
 | 
					obj-$(CONFIG_DM_ZONED)		+= dm-zoned.o
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										964
									
								
								drivers/md/dm-clone-metadata.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										964
									
								
								drivers/md/dm-clone-metadata.c
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,964 @@
 | 
				
			||||||
 | 
					// SPDX-License-Identifier: GPL-2.0-only
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Copyright (C) 2019 Arrikto, Inc. All Rights Reserved.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <linux/mm.h>
 | 
				
			||||||
 | 
					#include <linux/err.h>
 | 
				
			||||||
 | 
					#include <linux/slab.h>
 | 
				
			||||||
 | 
					#include <linux/rwsem.h>
 | 
				
			||||||
 | 
					#include <linux/bitops.h>
 | 
				
			||||||
 | 
					#include <linux/bitmap.h>
 | 
				
			||||||
 | 
					#include <linux/device-mapper.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "persistent-data/dm-bitset.h"
 | 
				
			||||||
 | 
					#include "persistent-data/dm-space-map.h"
 | 
				
			||||||
 | 
					#include "persistent-data/dm-block-manager.h"
 | 
				
			||||||
 | 
					#include "persistent-data/dm-transaction-manager.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "dm-clone-metadata.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define DM_MSG_PREFIX "clone metadata"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define SUPERBLOCK_LOCATION 0
 | 
				
			||||||
 | 
					#define SUPERBLOCK_MAGIC 0x8af27f64
 | 
				
			||||||
 | 
					#define SUPERBLOCK_CSUM_XOR 257649492
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define DM_CLONE_MAX_CONCURRENT_LOCKS 5
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define UUID_LEN 16
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* Min and max dm-clone metadata versions supported */
 | 
				
			||||||
 | 
					#define DM_CLONE_MIN_METADATA_VERSION 1
 | 
				
			||||||
 | 
					#define DM_CLONE_MAX_METADATA_VERSION 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * On-disk metadata layout
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					struct superblock_disk {
 | 
				
			||||||
 | 
						__le32 csum;
 | 
				
			||||||
 | 
						__le32 flags;
 | 
				
			||||||
 | 
						__le64 blocknr;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						__u8 uuid[UUID_LEN];
 | 
				
			||||||
 | 
						__le64 magic;
 | 
				
			||||||
 | 
						__le32 version;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						__u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						__le64 region_size;
 | 
				
			||||||
 | 
						__le64 target_size;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						__le64 bitset_root;
 | 
				
			||||||
 | 
					} __packed;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Region and Dirty bitmaps.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * dm-clone logically splits the source and destination devices in regions of
 | 
				
			||||||
 | 
					 * fixed size. The destination device's regions are gradually hydrated, i.e.,
 | 
				
			||||||
 | 
					 * we copy (clone) the source's regions to the destination device. Eventually,
 | 
				
			||||||
 | 
					 * all regions will get hydrated and all I/O will be served from the
 | 
				
			||||||
 | 
					 * destination device.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * We maintain an on-disk bitmap which tracks the state of each of the
 | 
				
			||||||
 | 
					 * destination device's regions, i.e., whether they are hydrated or not.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * To save constantly doing look ups on disk we keep an in core copy of the
 | 
				
			||||||
 | 
					 * on-disk bitmap, the region_map.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * To further reduce metadata I/O overhead we use a second bitmap, the dmap
 | 
				
			||||||
 | 
					 * (dirty bitmap), which tracks the dirty words, i.e. longs, of the region_map.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * When a region finishes hydrating dm-clone calls
 | 
				
			||||||
 | 
					 * dm_clone_set_region_hydrated(), or for discard requests
 | 
				
			||||||
 | 
					 * dm_clone_cond_set_range(), which sets the corresponding bits in region_map
 | 
				
			||||||
 | 
					 * and dmap.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * During a metadata commit we scan the dmap for dirty region_map words (longs)
 | 
				
			||||||
 | 
					 * and update accordingly the on-disk metadata. Thus, we don't have to flush to
 | 
				
			||||||
 | 
					 * disk the whole region_map. We can just flush the dirty region_map words.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * We use a dirty bitmap, which is smaller than the original region_map, to
 | 
				
			||||||
 | 
					 * reduce the amount of memory accesses during a metadata commit. As dm-bitset
 | 
				
			||||||
 | 
					 * accesses the on-disk bitmap in 64-bit word granularity, there is no
 | 
				
			||||||
 | 
					 * significant benefit in tracking the dirty region_map bits with a smaller
 | 
				
			||||||
 | 
					 * granularity.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * We could update directly the on-disk bitmap, when dm-clone calls either
 | 
				
			||||||
 | 
					 * dm_clone_set_region_hydrated() or dm_clone_cond_set_range(), buts this
 | 
				
			||||||
 | 
					 * inserts significant metadata I/O overhead in dm-clone's I/O path. Also, as
 | 
				
			||||||
 | 
					 * these two functions don't block, we can call them in interrupt context,
 | 
				
			||||||
 | 
					 * e.g., in a hooked overwrite bio's completion routine, and further reduce the
 | 
				
			||||||
 | 
					 * I/O completion latency.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * We maintain two dirty bitmaps. During a metadata commit we atomically swap
 | 
				
			||||||
 | 
					 * the currently used dmap with the unused one. This allows the metadata update
 | 
				
			||||||
 | 
					 * functions to run concurrently with an ongoing commit.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					struct dirty_map {
 | 
				
			||||||
 | 
						unsigned long *dirty_words;
 | 
				
			||||||
 | 
						unsigned int changed;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					struct dm_clone_metadata {
 | 
				
			||||||
 | 
						/* The metadata block device */
 | 
				
			||||||
 | 
						struct block_device *bdev;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						sector_t target_size;
 | 
				
			||||||
 | 
						sector_t region_size;
 | 
				
			||||||
 | 
						unsigned long nr_regions;
 | 
				
			||||||
 | 
						unsigned long nr_words;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Spinlock protecting the region and dirty bitmaps. */
 | 
				
			||||||
 | 
						spinlock_t bitmap_lock;
 | 
				
			||||||
 | 
						struct dirty_map dmap[2];
 | 
				
			||||||
 | 
						struct dirty_map *current_dmap;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * In core copy of the on-disk bitmap to save constantly doing look ups
 | 
				
			||||||
 | 
						 * on disk.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						unsigned long *region_map;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Protected by bitmap_lock */
 | 
				
			||||||
 | 
						unsigned int read_only;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						struct dm_block_manager *bm;
 | 
				
			||||||
 | 
						struct dm_space_map *sm;
 | 
				
			||||||
 | 
						struct dm_transaction_manager *tm;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						struct rw_semaphore lock;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						struct dm_disk_bitset bitset_info;
 | 
				
			||||||
 | 
						dm_block_t bitset_root;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * Reading the space map root can fail, so we read it into this
 | 
				
			||||||
 | 
						 * buffer before the superblock is locked and updated.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						__u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						bool hydration_done:1;
 | 
				
			||||||
 | 
						bool fail_io:1;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*---------------------------------------------------------------------------*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Superblock validation.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					static void sb_prepare_for_write(struct dm_block_validator *v,
 | 
				
			||||||
 | 
									 struct dm_block *b, size_t sb_block_size)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct superblock_disk *sb;
 | 
				
			||||||
 | 
						u32 csum;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						sb = dm_block_data(b);
 | 
				
			||||||
 | 
						sb->blocknr = cpu_to_le64(dm_block_location(b));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						csum = dm_bm_checksum(&sb->flags, sb_block_size - sizeof(__le32),
 | 
				
			||||||
 | 
								      SUPERBLOCK_CSUM_XOR);
 | 
				
			||||||
 | 
						sb->csum = cpu_to_le32(csum);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int sb_check(struct dm_block_validator *v, struct dm_block *b,
 | 
				
			||||||
 | 
							    size_t sb_block_size)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct superblock_disk *sb;
 | 
				
			||||||
 | 
						u32 csum, metadata_version;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						sb = dm_block_data(b);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (dm_block_location(b) != le64_to_cpu(sb->blocknr)) {
 | 
				
			||||||
 | 
							DMERR("Superblock check failed: blocknr %llu, expected %llu",
 | 
				
			||||||
 | 
							      le64_to_cpu(sb->blocknr),
 | 
				
			||||||
 | 
							      (unsigned long long)dm_block_location(b));
 | 
				
			||||||
 | 
							return -ENOTBLK;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (le64_to_cpu(sb->magic) != SUPERBLOCK_MAGIC) {
 | 
				
			||||||
 | 
							DMERR("Superblock check failed: magic %llu, expected %llu",
 | 
				
			||||||
 | 
							      le64_to_cpu(sb->magic),
 | 
				
			||||||
 | 
							      (unsigned long long)SUPERBLOCK_MAGIC);
 | 
				
			||||||
 | 
							return -EILSEQ;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						csum = dm_bm_checksum(&sb->flags, sb_block_size - sizeof(__le32),
 | 
				
			||||||
 | 
								      SUPERBLOCK_CSUM_XOR);
 | 
				
			||||||
 | 
						if (sb->csum != cpu_to_le32(csum)) {
 | 
				
			||||||
 | 
							DMERR("Superblock check failed: checksum %u, expected %u",
 | 
				
			||||||
 | 
							      csum, le32_to_cpu(sb->csum));
 | 
				
			||||||
 | 
							return -EILSEQ;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Check metadata version */
 | 
				
			||||||
 | 
						metadata_version = le32_to_cpu(sb->version);
 | 
				
			||||||
 | 
						if (metadata_version < DM_CLONE_MIN_METADATA_VERSION ||
 | 
				
			||||||
 | 
						    metadata_version > DM_CLONE_MAX_METADATA_VERSION) {
 | 
				
			||||||
 | 
							DMERR("Clone metadata version %u found, but only versions between %u and %u supported.",
 | 
				
			||||||
 | 
							      metadata_version, DM_CLONE_MIN_METADATA_VERSION,
 | 
				
			||||||
 | 
							      DM_CLONE_MAX_METADATA_VERSION);
 | 
				
			||||||
 | 
							return -EINVAL;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static struct dm_block_validator sb_validator = {
 | 
				
			||||||
 | 
						.name = "superblock",
 | 
				
			||||||
 | 
						.prepare_for_write = sb_prepare_for_write,
 | 
				
			||||||
 | 
						.check = sb_check
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Check if the superblock is formatted or not. We consider the superblock to
 | 
				
			||||||
 | 
					 * be formatted in case we find non-zero bytes in it.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					static int __superblock_all_zeroes(struct dm_block_manager *bm, bool *formatted)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int r;
 | 
				
			||||||
 | 
						unsigned int i, nr_words;
 | 
				
			||||||
 | 
						struct dm_block *sblock;
 | 
				
			||||||
 | 
						__le64 *data_le, zero = cpu_to_le64(0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * We don't use a validator here because the superblock could be all
 | 
				
			||||||
 | 
						 * zeroes.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						r = dm_bm_read_lock(bm, SUPERBLOCK_LOCATION, NULL, &sblock);
 | 
				
			||||||
 | 
						if (r) {
 | 
				
			||||||
 | 
							DMERR("Failed to read_lock superblock");
 | 
				
			||||||
 | 
							return r;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						data_le = dm_block_data(sblock);
 | 
				
			||||||
 | 
						*formatted = false;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* This assumes that the block size is a multiple of 8 bytes */
 | 
				
			||||||
 | 
						BUG_ON(dm_bm_block_size(bm) % sizeof(__le64));
 | 
				
			||||||
 | 
						nr_words = dm_bm_block_size(bm) / sizeof(__le64);
 | 
				
			||||||
 | 
						for (i = 0; i < nr_words; i++) {
 | 
				
			||||||
 | 
							if (data_le[i] != zero) {
 | 
				
			||||||
 | 
								*formatted = true;
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						dm_bm_unlock(sblock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*---------------------------------------------------------------------------*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Low-level metadata handling.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					static inline int superblock_read_lock(struct dm_clone_metadata *cmd,
 | 
				
			||||||
 | 
									       struct dm_block **sblock)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return dm_bm_read_lock(cmd->bm, SUPERBLOCK_LOCATION, &sb_validator, sblock);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline int superblock_write_lock(struct dm_clone_metadata *cmd,
 | 
				
			||||||
 | 
										struct dm_block **sblock)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return dm_bm_write_lock(cmd->bm, SUPERBLOCK_LOCATION, &sb_validator, sblock);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline int superblock_write_lock_zero(struct dm_clone_metadata *cmd,
 | 
				
			||||||
 | 
										     struct dm_block **sblock)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return dm_bm_write_lock_zero(cmd->bm, SUPERBLOCK_LOCATION, &sb_validator, sblock);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int __copy_sm_root(struct dm_clone_metadata *cmd)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int r;
 | 
				
			||||||
 | 
						size_t root_size;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						r = dm_sm_root_size(cmd->sm, &root_size);
 | 
				
			||||||
 | 
						if (r)
 | 
				
			||||||
 | 
							return r;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return dm_sm_copy_root(cmd->sm, &cmd->metadata_space_map_root, root_size);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* Save dm-clone metadata in superblock */
 | 
				
			||||||
 | 
					static void __prepare_superblock(struct dm_clone_metadata *cmd,
 | 
				
			||||||
 | 
									 struct superblock_disk *sb)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						sb->flags = cpu_to_le32(0UL);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* FIXME: UUID is currently unused */
 | 
				
			||||||
 | 
						memset(sb->uuid, 0, sizeof(sb->uuid));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						sb->magic = cpu_to_le64(SUPERBLOCK_MAGIC);
 | 
				
			||||||
 | 
						sb->version = cpu_to_le32(DM_CLONE_MAX_METADATA_VERSION);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Save the metadata space_map root */
 | 
				
			||||||
 | 
						memcpy(&sb->metadata_space_map_root, &cmd->metadata_space_map_root,
 | 
				
			||||||
 | 
						       sizeof(cmd->metadata_space_map_root));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						sb->region_size = cpu_to_le64(cmd->region_size);
 | 
				
			||||||
 | 
						sb->target_size = cpu_to_le64(cmd->target_size);
 | 
				
			||||||
 | 
						sb->bitset_root = cpu_to_le64(cmd->bitset_root);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int __open_metadata(struct dm_clone_metadata *cmd)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int r;
 | 
				
			||||||
 | 
						struct dm_block *sblock;
 | 
				
			||||||
 | 
						struct superblock_disk *sb;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						r = superblock_read_lock(cmd, &sblock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (r) {
 | 
				
			||||||
 | 
							DMERR("Failed to read_lock superblock");
 | 
				
			||||||
 | 
							return r;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						sb = dm_block_data(sblock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Verify that target_size and region_size haven't changed. */
 | 
				
			||||||
 | 
						if (cmd->region_size != le64_to_cpu(sb->region_size) ||
 | 
				
			||||||
 | 
						    cmd->target_size != le64_to_cpu(sb->target_size)) {
 | 
				
			||||||
 | 
							DMERR("Region and/or target size don't match the ones in metadata");
 | 
				
			||||||
 | 
							r = -EINVAL;
 | 
				
			||||||
 | 
							goto out_with_lock;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						r = dm_tm_open_with_sm(cmd->bm, SUPERBLOCK_LOCATION,
 | 
				
			||||||
 | 
								       sb->metadata_space_map_root,
 | 
				
			||||||
 | 
								       sizeof(sb->metadata_space_map_root),
 | 
				
			||||||
 | 
								       &cmd->tm, &cmd->sm);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (r) {
 | 
				
			||||||
 | 
							DMERR("dm_tm_open_with_sm failed");
 | 
				
			||||||
 | 
							goto out_with_lock;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						dm_disk_bitset_init(cmd->tm, &cmd->bitset_info);
 | 
				
			||||||
 | 
						cmd->bitset_root = le64_to_cpu(sb->bitset_root);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					out_with_lock:
 | 
				
			||||||
 | 
						dm_bm_unlock(sblock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return r;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int __format_metadata(struct dm_clone_metadata *cmd)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int r;
 | 
				
			||||||
 | 
						struct dm_block *sblock;
 | 
				
			||||||
 | 
						struct superblock_disk *sb;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						r = dm_tm_create_with_sm(cmd->bm, SUPERBLOCK_LOCATION, &cmd->tm, &cmd->sm);
 | 
				
			||||||
 | 
						if (r) {
 | 
				
			||||||
 | 
							DMERR("Failed to create transaction manager");
 | 
				
			||||||
 | 
							return r;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						dm_disk_bitset_init(cmd->tm, &cmd->bitset_info);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						r = dm_bitset_empty(&cmd->bitset_info, &cmd->bitset_root);
 | 
				
			||||||
 | 
						if (r) {
 | 
				
			||||||
 | 
							DMERR("Failed to create empty on-disk bitset");
 | 
				
			||||||
 | 
							goto err_with_tm;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						r = dm_bitset_resize(&cmd->bitset_info, cmd->bitset_root, 0,
 | 
				
			||||||
 | 
								     cmd->nr_regions, false, &cmd->bitset_root);
 | 
				
			||||||
 | 
						if (r) {
 | 
				
			||||||
 | 
							DMERR("Failed to resize on-disk bitset to %lu entries", cmd->nr_regions);
 | 
				
			||||||
 | 
							goto err_with_tm;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Flush to disk all blocks, except the superblock */
 | 
				
			||||||
 | 
						r = dm_tm_pre_commit(cmd->tm);
 | 
				
			||||||
 | 
						if (r) {
 | 
				
			||||||
 | 
							DMERR("dm_tm_pre_commit failed");
 | 
				
			||||||
 | 
							goto err_with_tm;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						r = __copy_sm_root(cmd);
 | 
				
			||||||
 | 
						if (r) {
 | 
				
			||||||
 | 
							DMERR("__copy_sm_root failed");
 | 
				
			||||||
 | 
							goto err_with_tm;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						r = superblock_write_lock_zero(cmd, &sblock);
 | 
				
			||||||
 | 
						if (r) {
 | 
				
			||||||
 | 
							DMERR("Failed to write_lock superblock");
 | 
				
			||||||
 | 
							goto err_with_tm;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						sb = dm_block_data(sblock);
 | 
				
			||||||
 | 
						__prepare_superblock(cmd, sb);
 | 
				
			||||||
 | 
						r = dm_tm_commit(cmd->tm, sblock);
 | 
				
			||||||
 | 
						if (r) {
 | 
				
			||||||
 | 
							DMERR("Failed to commit superblock");
 | 
				
			||||||
 | 
							goto err_with_tm;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					err_with_tm:
 | 
				
			||||||
 | 
						dm_sm_destroy(cmd->sm);
 | 
				
			||||||
 | 
						dm_tm_destroy(cmd->tm);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return r;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int __open_or_format_metadata(struct dm_clone_metadata *cmd, bool may_format_device)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int r;
 | 
				
			||||||
 | 
						bool formatted = false;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						r = __superblock_all_zeroes(cmd->bm, &formatted);
 | 
				
			||||||
 | 
						if (r)
 | 
				
			||||||
 | 
							return r;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!formatted)
 | 
				
			||||||
 | 
							return may_format_device ? __format_metadata(cmd) : -EPERM;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return __open_metadata(cmd);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int __create_persistent_data_structures(struct dm_clone_metadata *cmd,
 | 
				
			||||||
 | 
										       bool may_format_device)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int r;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Create block manager */
 | 
				
			||||||
 | 
						cmd->bm = dm_block_manager_create(cmd->bdev,
 | 
				
			||||||
 | 
										 DM_CLONE_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
 | 
				
			||||||
 | 
										 DM_CLONE_MAX_CONCURRENT_LOCKS);
 | 
				
			||||||
 | 
						if (IS_ERR(cmd->bm)) {
 | 
				
			||||||
 | 
							DMERR("Failed to create block manager");
 | 
				
			||||||
 | 
							return PTR_ERR(cmd->bm);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						r = __open_or_format_metadata(cmd, may_format_device);
 | 
				
			||||||
 | 
						if (r)
 | 
				
			||||||
 | 
							dm_block_manager_destroy(cmd->bm);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return r;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void __destroy_persistent_data_structures(struct dm_clone_metadata *cmd)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						dm_sm_destroy(cmd->sm);
 | 
				
			||||||
 | 
						dm_tm_destroy(cmd->tm);
 | 
				
			||||||
 | 
						dm_block_manager_destroy(cmd->bm);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*---------------------------------------------------------------------------*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static size_t bitmap_size(unsigned long nr_bits)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return BITS_TO_LONGS(nr_bits) * sizeof(long);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int dirty_map_init(struct dm_clone_metadata *cmd)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						cmd->dmap[0].changed = 0;
 | 
				
			||||||
 | 
						cmd->dmap[0].dirty_words = kvzalloc(bitmap_size(cmd->nr_words), GFP_KERNEL);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!cmd->dmap[0].dirty_words) {
 | 
				
			||||||
 | 
							DMERR("Failed to allocate dirty bitmap");
 | 
				
			||||||
 | 
							return -ENOMEM;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						cmd->dmap[1].changed = 0;
 | 
				
			||||||
 | 
						cmd->dmap[1].dirty_words = kvzalloc(bitmap_size(cmd->nr_words), GFP_KERNEL);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!cmd->dmap[1].dirty_words) {
 | 
				
			||||||
 | 
							DMERR("Failed to allocate dirty bitmap");
 | 
				
			||||||
 | 
							kvfree(cmd->dmap[0].dirty_words);
 | 
				
			||||||
 | 
							return -ENOMEM;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						cmd->current_dmap = &cmd->dmap[0];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void dirty_map_exit(struct dm_clone_metadata *cmd)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						kvfree(cmd->dmap[0].dirty_words);
 | 
				
			||||||
 | 
						kvfree(cmd->dmap[1].dirty_words);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int __load_bitset_in_core(struct dm_clone_metadata *cmd)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int r;
 | 
				
			||||||
 | 
						unsigned long i;
 | 
				
			||||||
 | 
						struct dm_bitset_cursor c;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Flush bitset cache */
 | 
				
			||||||
 | 
						r = dm_bitset_flush(&cmd->bitset_info, cmd->bitset_root, &cmd->bitset_root);
 | 
				
			||||||
 | 
						if (r)
 | 
				
			||||||
 | 
							return r;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						r = dm_bitset_cursor_begin(&cmd->bitset_info, cmd->bitset_root, cmd->nr_regions, &c);
 | 
				
			||||||
 | 
						if (r)
 | 
				
			||||||
 | 
							return r;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for (i = 0; ; i++) {
 | 
				
			||||||
 | 
							if (dm_bitset_cursor_get_value(&c))
 | 
				
			||||||
 | 
								__set_bit(i, cmd->region_map);
 | 
				
			||||||
 | 
							else
 | 
				
			||||||
 | 
								__clear_bit(i, cmd->region_map);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (i >= (cmd->nr_regions - 1))
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							r = dm_bitset_cursor_next(&c);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (r)
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						dm_bitset_cursor_end(&c);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return r;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					struct dm_clone_metadata *dm_clone_metadata_open(struct block_device *bdev,
 | 
				
			||||||
 | 
											 sector_t target_size,
 | 
				
			||||||
 | 
											 sector_t region_size)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int r;
 | 
				
			||||||
 | 
						struct dm_clone_metadata *cmd;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
 | 
				
			||||||
 | 
						if (!cmd) {
 | 
				
			||||||
 | 
							DMERR("Failed to allocate memory for dm-clone metadata");
 | 
				
			||||||
 | 
							return ERR_PTR(-ENOMEM);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						cmd->bdev = bdev;
 | 
				
			||||||
 | 
						cmd->target_size = target_size;
 | 
				
			||||||
 | 
						cmd->region_size = region_size;
 | 
				
			||||||
 | 
						cmd->nr_regions = dm_sector_div_up(cmd->target_size, cmd->region_size);
 | 
				
			||||||
 | 
						cmd->nr_words = BITS_TO_LONGS(cmd->nr_regions);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						init_rwsem(&cmd->lock);
 | 
				
			||||||
 | 
						spin_lock_init(&cmd->bitmap_lock);
 | 
				
			||||||
 | 
						cmd->read_only = 0;
 | 
				
			||||||
 | 
						cmd->fail_io = false;
 | 
				
			||||||
 | 
						cmd->hydration_done = false;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						cmd->region_map = kvmalloc(bitmap_size(cmd->nr_regions), GFP_KERNEL);
 | 
				
			||||||
 | 
						if (!cmd->region_map) {
 | 
				
			||||||
 | 
							DMERR("Failed to allocate memory for region bitmap");
 | 
				
			||||||
 | 
							r = -ENOMEM;
 | 
				
			||||||
 | 
							goto out_with_md;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						r = __create_persistent_data_structures(cmd, true);
 | 
				
			||||||
 | 
						if (r)
 | 
				
			||||||
 | 
							goto out_with_region_map;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						r = __load_bitset_in_core(cmd);
 | 
				
			||||||
 | 
						if (r) {
 | 
				
			||||||
 | 
							DMERR("Failed to load on-disk region map");
 | 
				
			||||||
 | 
							goto out_with_pds;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						r = dirty_map_init(cmd);
 | 
				
			||||||
 | 
						if (r)
 | 
				
			||||||
 | 
							goto out_with_pds;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (bitmap_full(cmd->region_map, cmd->nr_regions))
 | 
				
			||||||
 | 
							cmd->hydration_done = true;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return cmd;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					out_with_pds:
 | 
				
			||||||
 | 
						__destroy_persistent_data_structures(cmd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					out_with_region_map:
 | 
				
			||||||
 | 
						kvfree(cmd->region_map);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					out_with_md:
 | 
				
			||||||
 | 
						kfree(cmd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return ERR_PTR(r);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void dm_clone_metadata_close(struct dm_clone_metadata *cmd)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						if (!cmd->fail_io)
 | 
				
			||||||
 | 
							__destroy_persistent_data_structures(cmd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						dirty_map_exit(cmd);
 | 
				
			||||||
 | 
						kvfree(cmd->region_map);
 | 
				
			||||||
 | 
						kfree(cmd);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					bool dm_clone_is_hydration_done(struct dm_clone_metadata *cmd)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return cmd->hydration_done;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					bool dm_clone_is_region_hydrated(struct dm_clone_metadata *cmd, unsigned long region_nr)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return dm_clone_is_hydration_done(cmd) || test_bit(region_nr, cmd->region_map);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					bool dm_clone_is_range_hydrated(struct dm_clone_metadata *cmd,
 | 
				
			||||||
 | 
									unsigned long start, unsigned long nr_regions)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						unsigned long bit;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (dm_clone_is_hydration_done(cmd))
 | 
				
			||||||
 | 
							return true;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						bit = find_next_zero_bit(cmd->region_map, cmd->nr_regions, start);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return (bit >= (start + nr_regions));
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					unsigned long dm_clone_nr_of_hydrated_regions(struct dm_clone_metadata *cmd)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return bitmap_weight(cmd->region_map, cmd->nr_regions);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					unsigned long dm_clone_find_next_unhydrated_region(struct dm_clone_metadata *cmd,
 | 
				
			||||||
 | 
											   unsigned long start)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return find_next_zero_bit(cmd->region_map, cmd->nr_regions, start);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int __update_metadata_word(struct dm_clone_metadata *cmd, unsigned long word)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int r;
 | 
				
			||||||
 | 
						unsigned long index = word * BITS_PER_LONG;
 | 
				
			||||||
 | 
						unsigned long max_index = min(cmd->nr_regions, (word + 1) * BITS_PER_LONG);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						while (index < max_index) {
 | 
				
			||||||
 | 
							if (test_bit(index, cmd->region_map)) {
 | 
				
			||||||
 | 
								r = dm_bitset_set_bit(&cmd->bitset_info, cmd->bitset_root,
 | 
				
			||||||
 | 
										      index, &cmd->bitset_root);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								if (r) {
 | 
				
			||||||
 | 
									DMERR("dm_bitset_set_bit failed");
 | 
				
			||||||
 | 
									return r;
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							index++;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int __metadata_commit(struct dm_clone_metadata *cmd)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int r;
 | 
				
			||||||
 | 
						struct dm_block *sblock;
 | 
				
			||||||
 | 
						struct superblock_disk *sb;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Flush bitset cache */
 | 
				
			||||||
 | 
						r = dm_bitset_flush(&cmd->bitset_info, cmd->bitset_root, &cmd->bitset_root);
 | 
				
			||||||
 | 
						if (r) {
 | 
				
			||||||
 | 
							DMERR("dm_bitset_flush failed");
 | 
				
			||||||
 | 
							return r;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Flush to disk all blocks, except the superblock */
 | 
				
			||||||
 | 
						r = dm_tm_pre_commit(cmd->tm);
 | 
				
			||||||
 | 
						if (r) {
 | 
				
			||||||
 | 
							DMERR("dm_tm_pre_commit failed");
 | 
				
			||||||
 | 
							return r;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Save the space map root in cmd->metadata_space_map_root */
 | 
				
			||||||
 | 
						r = __copy_sm_root(cmd);
 | 
				
			||||||
 | 
						if (r) {
 | 
				
			||||||
 | 
							DMERR("__copy_sm_root failed");
 | 
				
			||||||
 | 
							return r;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Lock the superblock */
 | 
				
			||||||
 | 
						r = superblock_write_lock_zero(cmd, &sblock);
 | 
				
			||||||
 | 
						if (r) {
 | 
				
			||||||
 | 
							DMERR("Failed to write_lock superblock");
 | 
				
			||||||
 | 
							return r;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Save the metadata in superblock */
 | 
				
			||||||
 | 
						sb = dm_block_data(sblock);
 | 
				
			||||||
 | 
						__prepare_superblock(cmd, sb);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Unlock superblock and commit it to disk */
 | 
				
			||||||
 | 
						r = dm_tm_commit(cmd->tm, sblock);
 | 
				
			||||||
 | 
						if (r) {
 | 
				
			||||||
 | 
							DMERR("Failed to commit superblock");
 | 
				
			||||||
 | 
							return r;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * FIXME: Find a more efficient way to check if the hydration is done.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						if (bitmap_full(cmd->region_map, cmd->nr_regions))
 | 
				
			||||||
 | 
							cmd->hydration_done = true;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int __flush_dmap(struct dm_clone_metadata *cmd, struct dirty_map *dmap)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int r;
 | 
				
			||||||
 | 
						unsigned long word, flags;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						word = 0;
 | 
				
			||||||
 | 
						do {
 | 
				
			||||||
 | 
							word = find_next_bit(dmap->dirty_words, cmd->nr_words, word);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (word == cmd->nr_words)
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							r = __update_metadata_word(cmd, word);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (r)
 | 
				
			||||||
 | 
								return r;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							__clear_bit(word, dmap->dirty_words);
 | 
				
			||||||
 | 
							word++;
 | 
				
			||||||
 | 
						} while (word < cmd->nr_words);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						r = __metadata_commit(cmd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (r)
 | 
				
			||||||
 | 
							return r;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Update the changed flag */
 | 
				
			||||||
 | 
						spin_lock_irqsave(&cmd->bitmap_lock, flags);
 | 
				
			||||||
 | 
						dmap->changed = 0;
 | 
				
			||||||
 | 
						spin_unlock_irqrestore(&cmd->bitmap_lock, flags);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int dm_clone_metadata_commit(struct dm_clone_metadata *cmd)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int r = -EPERM;
 | 
				
			||||||
 | 
						unsigned long flags;
 | 
				
			||||||
 | 
						struct dirty_map *dmap, *next_dmap;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						down_write(&cmd->lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (cmd->fail_io || dm_bm_is_read_only(cmd->bm))
 | 
				
			||||||
 | 
							goto out;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Get current dirty bitmap */
 | 
				
			||||||
 | 
						dmap = cmd->current_dmap;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Get next dirty bitmap */
 | 
				
			||||||
 | 
						next_dmap = (dmap == &cmd->dmap[0]) ? &cmd->dmap[1] : &cmd->dmap[0];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * The last commit failed, so we don't have a clean dirty-bitmap to
 | 
				
			||||||
 | 
						 * use.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						if (WARN_ON(next_dmap->changed)) {
 | 
				
			||||||
 | 
							r = -EINVAL;
 | 
				
			||||||
 | 
							goto out;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Swap dirty bitmaps */
 | 
				
			||||||
 | 
						spin_lock_irqsave(&cmd->bitmap_lock, flags);
 | 
				
			||||||
 | 
						cmd->current_dmap = next_dmap;
 | 
				
			||||||
 | 
						spin_unlock_irqrestore(&cmd->bitmap_lock, flags);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * No one is accessing the old dirty bitmap anymore, so we can flush
 | 
				
			||||||
 | 
						 * it.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						r = __flush_dmap(cmd, dmap);
 | 
				
			||||||
 | 
					out:
 | 
				
			||||||
 | 
						up_write(&cmd->lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return r;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int dm_clone_set_region_hydrated(struct dm_clone_metadata *cmd, unsigned long region_nr)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int r = 0;
 | 
				
			||||||
 | 
						struct dirty_map *dmap;
 | 
				
			||||||
 | 
						unsigned long word, flags;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						word = region_nr / BITS_PER_LONG;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						spin_lock_irqsave(&cmd->bitmap_lock, flags);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (cmd->read_only) {
 | 
				
			||||||
 | 
							r = -EPERM;
 | 
				
			||||||
 | 
							goto out;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						dmap = cmd->current_dmap;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						__set_bit(word, dmap->dirty_words);
 | 
				
			||||||
 | 
						__set_bit(region_nr, cmd->region_map);
 | 
				
			||||||
 | 
						dmap->changed = 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					out:
 | 
				
			||||||
 | 
						spin_unlock_irqrestore(&cmd->bitmap_lock, flags);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return r;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int dm_clone_cond_set_range(struct dm_clone_metadata *cmd, unsigned long start,
 | 
				
			||||||
 | 
								    unsigned long nr_regions)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int r = 0;
 | 
				
			||||||
 | 
						struct dirty_map *dmap;
 | 
				
			||||||
 | 
						unsigned long word, region_nr, flags;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						spin_lock_irqsave(&cmd->bitmap_lock, flags);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (cmd->read_only) {
 | 
				
			||||||
 | 
							r = -EPERM;
 | 
				
			||||||
 | 
							goto out;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						dmap = cmd->current_dmap;
 | 
				
			||||||
 | 
						for (region_nr = start; region_nr < (start + nr_regions); region_nr++) {
 | 
				
			||||||
 | 
							if (!test_bit(region_nr, cmd->region_map)) {
 | 
				
			||||||
 | 
								word = region_nr / BITS_PER_LONG;
 | 
				
			||||||
 | 
								__set_bit(word, dmap->dirty_words);
 | 
				
			||||||
 | 
								__set_bit(region_nr, cmd->region_map);
 | 
				
			||||||
 | 
								dmap->changed = 1;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					out:
 | 
				
			||||||
 | 
						spin_unlock_irqrestore(&cmd->bitmap_lock, flags);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return r;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * WARNING: This must not be called concurrently with either
 | 
				
			||||||
 | 
					 * dm_clone_set_region_hydrated() or dm_clone_cond_set_range(), as it changes
 | 
				
			||||||
 | 
					 * cmd->region_map without taking the cmd->bitmap_lock spinlock. The only
 | 
				
			||||||
 | 
					 * exception is after setting the metadata to read-only mode, using
 | 
				
			||||||
 | 
					 * dm_clone_metadata_set_read_only().
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * We don't take the spinlock because __load_bitset_in_core() does I/O, so it
 | 
				
			||||||
 | 
					 * may block.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					int dm_clone_reload_in_core_bitset(struct dm_clone_metadata *cmd)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int r = -EINVAL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						down_write(&cmd->lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (cmd->fail_io)
 | 
				
			||||||
 | 
							goto out;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						r = __load_bitset_in_core(cmd);
 | 
				
			||||||
 | 
					out:
 | 
				
			||||||
 | 
						up_write(&cmd->lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return r;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					bool dm_clone_changed_this_transaction(struct dm_clone_metadata *cmd)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						bool r;
 | 
				
			||||||
 | 
						unsigned long flags;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						spin_lock_irqsave(&cmd->bitmap_lock, flags);
 | 
				
			||||||
 | 
						r = cmd->dmap[0].changed || cmd->dmap[1].changed;
 | 
				
			||||||
 | 
						spin_unlock_irqrestore(&cmd->bitmap_lock, flags);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return r;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int dm_clone_metadata_abort(struct dm_clone_metadata *cmd)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int r = -EPERM;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						down_write(&cmd->lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (cmd->fail_io || dm_bm_is_read_only(cmd->bm))
 | 
				
			||||||
 | 
							goto out;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						__destroy_persistent_data_structures(cmd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						r = __create_persistent_data_structures(cmd, false);
 | 
				
			||||||
 | 
						if (r) {
 | 
				
			||||||
 | 
							/* If something went wrong we can neither write nor read the metadata */
 | 
				
			||||||
 | 
							cmd->fail_io = true;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					out:
 | 
				
			||||||
 | 
						up_write(&cmd->lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return r;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void dm_clone_metadata_set_read_only(struct dm_clone_metadata *cmd)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						unsigned long flags;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						down_write(&cmd->lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						spin_lock_irqsave(&cmd->bitmap_lock, flags);
 | 
				
			||||||
 | 
						cmd->read_only = 1;
 | 
				
			||||||
 | 
						spin_unlock_irqrestore(&cmd->bitmap_lock, flags);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!cmd->fail_io)
 | 
				
			||||||
 | 
							dm_bm_set_read_only(cmd->bm);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						up_write(&cmd->lock);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void dm_clone_metadata_set_read_write(struct dm_clone_metadata *cmd)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						unsigned long flags;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						down_write(&cmd->lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						spin_lock_irqsave(&cmd->bitmap_lock, flags);
 | 
				
			||||||
 | 
						cmd->read_only = 0;
 | 
				
			||||||
 | 
						spin_unlock_irqrestore(&cmd->bitmap_lock, flags);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!cmd->fail_io)
 | 
				
			||||||
 | 
							dm_bm_set_read_write(cmd->bm);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						up_write(&cmd->lock);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int dm_clone_get_free_metadata_block_count(struct dm_clone_metadata *cmd,
 | 
				
			||||||
 | 
										   dm_block_t *result)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int r = -EINVAL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						down_read(&cmd->lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!cmd->fail_io)
 | 
				
			||||||
 | 
							r = dm_sm_get_nr_free(cmd->sm, result);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						up_read(&cmd->lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return r;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int dm_clone_get_metadata_dev_size(struct dm_clone_metadata *cmd,
 | 
				
			||||||
 | 
									   dm_block_t *result)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int r = -EINVAL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						down_read(&cmd->lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!cmd->fail_io)
 | 
				
			||||||
 | 
							r = dm_sm_get_nr_blocks(cmd->sm, result);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						up_read(&cmd->lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return r;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										158
									
								
								drivers/md/dm-clone-metadata.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										158
									
								
								drivers/md/dm-clone-metadata.h
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,158 @@
 | 
				
			||||||
 | 
					/* SPDX-License-Identifier: GPL-2.0-only */
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Copyright (C) 2019 Arrikto, Inc. All Rights Reserved.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef DM_CLONE_METADATA_H
 | 
				
			||||||
 | 
					#define DM_CLONE_METADATA_H
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "persistent-data/dm-block-manager.h"
 | 
				
			||||||
 | 
					#include "persistent-data/dm-space-map-metadata.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define DM_CLONE_METADATA_BLOCK_SIZE DM_SM_METADATA_BLOCK_SIZE
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * The metadata device is currently limited in size.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					#define DM_CLONE_METADATA_MAX_SECTORS DM_SM_METADATA_MAX_SECTORS
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * A metadata device larger than 16GB triggers a warning.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					#define DM_CLONE_METADATA_MAX_SECTORS_WARNING (16 * (1024 * 1024 * 1024 >> SECTOR_SHIFT))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define SPACE_MAP_ROOT_SIZE 128
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* dm-clone metadata */
 | 
				
			||||||
 | 
					struct dm_clone_metadata;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Set region status to hydrated.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * @cmd: The dm-clone metadata
 | 
				
			||||||
 | 
					 * @region_nr: The region number
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * This function doesn't block, so it's safe to call it from interrupt context.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					int dm_clone_set_region_hydrated(struct dm_clone_metadata *cmd, unsigned long region_nr);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Set status of all regions in the provided range to hydrated, if not already
 | 
				
			||||||
 | 
					 * hydrated.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * @cmd: The dm-clone metadata
 | 
				
			||||||
 | 
					 * @start: Starting region number
 | 
				
			||||||
 | 
					 * @nr_regions: Number of regions in the range
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * This function doesn't block, so it's safe to call it from interrupt context.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					int dm_clone_cond_set_range(struct dm_clone_metadata *cmd, unsigned long start,
 | 
				
			||||||
 | 
								    unsigned long nr_regions);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Read existing or create fresh metadata.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * @bdev: The device storing the metadata
 | 
				
			||||||
 | 
					 * @target_size: The target size
 | 
				
			||||||
 | 
					 * @region_size: The region size
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * @returns: The dm-clone metadata
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * This function reads the superblock of @bdev and checks if it's all zeroes.
 | 
				
			||||||
 | 
					 * If it is, it formats @bdev and creates fresh metadata. If it isn't, it
 | 
				
			||||||
 | 
					 * validates the metadata stored in @bdev.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					struct dm_clone_metadata *dm_clone_metadata_open(struct block_device *bdev,
 | 
				
			||||||
 | 
											 sector_t target_size,
 | 
				
			||||||
 | 
											 sector_t region_size);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Free the resources related to metadata management.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					void dm_clone_metadata_close(struct dm_clone_metadata *cmd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Commit dm-clone metadata to disk.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					int dm_clone_metadata_commit(struct dm_clone_metadata *cmd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Reload the in core copy of the on-disk bitmap.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * This should be used after aborting a metadata transaction and setting the
 | 
				
			||||||
 | 
					 * metadata to read-only, to invalidate the in-core cache and make it match the
 | 
				
			||||||
 | 
					 * on-disk metadata.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * WARNING: It must not be called concurrently with either
 | 
				
			||||||
 | 
					 * dm_clone_set_region_hydrated() or dm_clone_cond_set_range(), as it updates
 | 
				
			||||||
 | 
					 * the region bitmap without taking the relevant spinlock. We don't take the
 | 
				
			||||||
 | 
					 * spinlock because dm_clone_reload_in_core_bitset() does I/O, so it may block.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * But, it's safe to use it after calling dm_clone_metadata_set_read_only(),
 | 
				
			||||||
 | 
					 * because the latter sets the metadata to read-only mode. Both
 | 
				
			||||||
 | 
					 * dm_clone_set_region_hydrated() and dm_clone_cond_set_range() refuse to touch
 | 
				
			||||||
 | 
					 * the region bitmap, after calling dm_clone_metadata_set_read_only().
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					int dm_clone_reload_in_core_bitset(struct dm_clone_metadata *cmd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Check whether dm-clone's metadata changed this transaction.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					bool dm_clone_changed_this_transaction(struct dm_clone_metadata *cmd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Abort current metadata transaction and rollback metadata to the last
 | 
				
			||||||
 | 
					 * committed transaction.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					int dm_clone_metadata_abort(struct dm_clone_metadata *cmd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Switches metadata to a read only mode. Once read-only mode has been entered
 | 
				
			||||||
 | 
					 * the following functions will return -EPERM:
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 *   dm_clone_metadata_commit()
 | 
				
			||||||
 | 
					 *   dm_clone_set_region_hydrated()
 | 
				
			||||||
 | 
					 *   dm_clone_cond_set_range()
 | 
				
			||||||
 | 
					 *   dm_clone_metadata_abort()
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					void dm_clone_metadata_set_read_only(struct dm_clone_metadata *cmd);
 | 
				
			||||||
 | 
					void dm_clone_metadata_set_read_write(struct dm_clone_metadata *cmd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Returns true if the hydration of the destination device is finished.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					bool dm_clone_is_hydration_done(struct dm_clone_metadata *cmd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Returns true if region @region_nr is hydrated.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					bool dm_clone_is_region_hydrated(struct dm_clone_metadata *cmd, unsigned long region_nr);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Returns true if all the regions in the range are hydrated.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					bool dm_clone_is_range_hydrated(struct dm_clone_metadata *cmd,
 | 
				
			||||||
 | 
									unsigned long start, unsigned long nr_regions);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Returns the number of hydrated regions.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					unsigned long dm_clone_nr_of_hydrated_regions(struct dm_clone_metadata *cmd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Returns the first unhydrated region with region_nr >= @start
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					unsigned long dm_clone_find_next_unhydrated_region(struct dm_clone_metadata *cmd,
 | 
				
			||||||
 | 
											   unsigned long start);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Get the number of free metadata blocks.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					int dm_clone_get_free_metadata_block_count(struct dm_clone_metadata *cmd, dm_block_t *result);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Get the total number of metadata blocks.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					int dm_clone_get_metadata_dev_size(struct dm_clone_metadata *cmd, dm_block_t *result);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif /* DM_CLONE_METADATA_H */
 | 
				
			||||||
							
								
								
									
										2191
									
								
								drivers/md/dm-clone-target.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2191
									
								
								drivers/md/dm-clone-target.c
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
		Loading…
	
		Reference in a new issue