mirror of
https://github.com/torvalds/linux.git
synced 2025-11-01 09:09:47 +02:00
With all the preparation patches, we're able to finally enable btrfs block size (sector size) larger than page size support and give it a full fstests run. And obviously this new feature is hidden behind experimental flags, and should not be considered as a core feature yet as btrfs' default block size is still 4K. But this is still a feature that will shine in the future where 16K block sized device are widely adopted. For now there are some features explicitly disabled: - Direct IO This is the most complex part to support, the root reason is we can not control the pages of iov iter passed in. User space programs can only ensure the virtual addresses are contiguous, but have no control on their physical addresses. Our bs > ps support heavily relies on large folios, and direct IO memory can easily break it. So direct IO is disabled and will always fall back to buffered IO. - RAID56 In theory we can convert RAID56 to use large folios, but it will need to be converted back to page based if we want to support direct IO in the future. So just reject it for now. - Encoded send - Encoded read Both are utilizing btrfs_encoded_read_regular_fill_pages(), and send is utilizing vmallocated memory. Unfortunately for vmallocated memory we can not guarantee the minimal folio order. For send, it will just always fallback to regular writes, which reads from page cache and will follow the existing folio order requirement. - Encoded write Encoded write itself is allocating pages by themselves, and we can easily change it to follow the minimal order. But since encoded read is already disabled, there is no need to only enable encoded write. Finally just like what we did for bs < ps support in the past, add a warning message for bs > ps mounts. Signed-off-by: Qu Wenruo <wqu@suse.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
275 lines
7.7 KiB
C
275 lines
7.7 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
#include "messages.h"
|
|
#include "fs.h"
|
|
#include "accessors.h"
|
|
#include "volumes.h"
|
|
|
|
static const struct btrfs_csums {
|
|
u16 size;
|
|
const char name[10];
|
|
const char driver[12];
|
|
} btrfs_csums[] = {
|
|
[BTRFS_CSUM_TYPE_CRC32] = { .size = 4, .name = "crc32c" },
|
|
[BTRFS_CSUM_TYPE_XXHASH] = { .size = 8, .name = "xxhash64" },
|
|
[BTRFS_CSUM_TYPE_SHA256] = { .size = 32, .name = "sha256" },
|
|
[BTRFS_CSUM_TYPE_BLAKE2] = { .size = 32, .name = "blake2b",
|
|
.driver = "blake2b-256" },
|
|
};
|
|
|
|
/* This exists for btrfs-progs usages. */
|
|
u16 btrfs_csum_type_size(u16 type)
|
|
{
|
|
return btrfs_csums[type].size;
|
|
}
|
|
|
|
int btrfs_super_csum_size(const struct btrfs_super_block *s)
|
|
{
|
|
u16 t = btrfs_super_csum_type(s);
|
|
|
|
/* csum type is validated at mount time. */
|
|
return btrfs_csum_type_size(t);
|
|
}
|
|
|
|
const char *btrfs_super_csum_name(u16 csum_type)
|
|
{
|
|
/* csum type is validated at mount time. */
|
|
return btrfs_csums[csum_type].name;
|
|
}
|
|
|
|
/*
|
|
* Return driver name if defined, otherwise the name that's also a valid driver
|
|
* name.
|
|
*/
|
|
const char *btrfs_super_csum_driver(u16 csum_type)
|
|
{
|
|
/* csum type is validated at mount time */
|
|
return btrfs_csums[csum_type].driver[0] ?
|
|
btrfs_csums[csum_type].driver :
|
|
btrfs_csums[csum_type].name;
|
|
}
|
|
|
|
size_t __attribute_const__ btrfs_get_num_csums(void)
|
|
{
|
|
return ARRAY_SIZE(btrfs_csums);
|
|
}
|
|
|
|
/*
|
|
* We support the following block sizes for all systems:
|
|
*
|
|
* - 4K
|
|
* This is the most common block size. For PAGE SIZE > 4K cases the subpage
|
|
* mode is used.
|
|
*
|
|
* - PAGE_SIZE
|
|
* The straightforward block size to support.
|
|
*
|
|
* And extra support for the following block sizes based on the kernel config:
|
|
*
|
|
* - MIN_BLOCKSIZE
|
|
* This is either 4K (regular builds) or 2K (debug builds)
|
|
* This allows testing subpage routines on x86_64.
|
|
*/
|
|
bool __attribute_const__ btrfs_supported_blocksize(u32 blocksize)
|
|
{
|
|
/* @blocksize should be validated first. */
|
|
ASSERT(is_power_of_2(blocksize) && blocksize >= BTRFS_MIN_BLOCKSIZE &&
|
|
blocksize <= BTRFS_MAX_BLOCKSIZE);
|
|
|
|
if (blocksize == PAGE_SIZE || blocksize == SZ_4K || blocksize == BTRFS_MIN_BLOCKSIZE)
|
|
return true;
|
|
#ifdef CONFIG_BTRFS_EXPERIMENTAL
|
|
/*
|
|
* For bs > ps support it's done by specifying a minimal folio order
|
|
* for filemap, thus implying large data folios.
|
|
* For HIGHMEM systems, we can not always access the content of a (large)
|
|
* folio in one go, but go through them page by page.
|
|
*
|
|
* A lot of features don't implement a proper PAGE sized loop for large
|
|
* folios, this includes:
|
|
*
|
|
* - compression
|
|
* - verity
|
|
* - encoded write
|
|
*
|
|
* Considering HIGHMEM is such a pain to deal with and it's going
|
|
* to be deprecated eventually, just reject HIGHMEM && bs > ps cases.
|
|
*/
|
|
if (IS_ENABLED(CONFIG_HIGHMEM) && blocksize > PAGE_SIZE)
|
|
return false;
|
|
return true;
|
|
#endif
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* Start exclusive operation @type, return true on success.
|
|
*/
|
|
bool btrfs_exclop_start(struct btrfs_fs_info *fs_info,
|
|
enum btrfs_exclusive_operation type)
|
|
{
|
|
bool ret = false;
|
|
|
|
spin_lock(&fs_info->super_lock);
|
|
if (fs_info->exclusive_operation == BTRFS_EXCLOP_NONE) {
|
|
fs_info->exclusive_operation = type;
|
|
ret = true;
|
|
}
|
|
spin_unlock(&fs_info->super_lock);
|
|
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Conditionally allow to enter the exclusive operation in case it's compatible
|
|
* with the running one. This must be paired with btrfs_exclop_start_unlock()
|
|
* and btrfs_exclop_finish().
|
|
*
|
|
* Compatibility:
|
|
* - the same type is already running
|
|
* - when trying to add a device and balance has been paused
|
|
* - not BTRFS_EXCLOP_NONE - this is intentionally incompatible and the caller
|
|
* must check the condition first that would allow none -> @type
|
|
*/
|
|
bool btrfs_exclop_start_try_lock(struct btrfs_fs_info *fs_info,
|
|
enum btrfs_exclusive_operation type)
|
|
{
|
|
spin_lock(&fs_info->super_lock);
|
|
if (fs_info->exclusive_operation == type ||
|
|
(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE_PAUSED &&
|
|
type == BTRFS_EXCLOP_DEV_ADD))
|
|
return true;
|
|
|
|
spin_unlock(&fs_info->super_lock);
|
|
return false;
|
|
}
|
|
|
|
void btrfs_exclop_start_unlock(struct btrfs_fs_info *fs_info)
|
|
{
|
|
spin_unlock(&fs_info->super_lock);
|
|
}
|
|
|
|
void btrfs_exclop_finish(struct btrfs_fs_info *fs_info)
|
|
{
|
|
spin_lock(&fs_info->super_lock);
|
|
WRITE_ONCE(fs_info->exclusive_operation, BTRFS_EXCLOP_NONE);
|
|
spin_unlock(&fs_info->super_lock);
|
|
sysfs_notify(&fs_info->fs_devices->fsid_kobj, NULL, "exclusive_operation");
|
|
}
|
|
|
|
void btrfs_exclop_balance(struct btrfs_fs_info *fs_info,
|
|
enum btrfs_exclusive_operation op)
|
|
{
|
|
switch (op) {
|
|
case BTRFS_EXCLOP_BALANCE_PAUSED:
|
|
spin_lock(&fs_info->super_lock);
|
|
ASSERT(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE ||
|
|
fs_info->exclusive_operation == BTRFS_EXCLOP_DEV_ADD ||
|
|
fs_info->exclusive_operation == BTRFS_EXCLOP_NONE ||
|
|
fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE_PAUSED);
|
|
fs_info->exclusive_operation = BTRFS_EXCLOP_BALANCE_PAUSED;
|
|
spin_unlock(&fs_info->super_lock);
|
|
break;
|
|
case BTRFS_EXCLOP_BALANCE:
|
|
spin_lock(&fs_info->super_lock);
|
|
ASSERT(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE_PAUSED);
|
|
fs_info->exclusive_operation = BTRFS_EXCLOP_BALANCE;
|
|
spin_unlock(&fs_info->super_lock);
|
|
break;
|
|
default:
|
|
btrfs_warn(fs_info,
|
|
"invalid exclop balance operation %d requested", op);
|
|
}
|
|
}
|
|
|
|
void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag,
|
|
const char *name)
|
|
{
|
|
struct btrfs_super_block *disk_super;
|
|
u64 features;
|
|
|
|
disk_super = fs_info->super_copy;
|
|
features = btrfs_super_incompat_flags(disk_super);
|
|
if (!(features & flag)) {
|
|
spin_lock(&fs_info->super_lock);
|
|
features = btrfs_super_incompat_flags(disk_super);
|
|
if (!(features & flag)) {
|
|
features |= flag;
|
|
btrfs_set_super_incompat_flags(disk_super, features);
|
|
btrfs_info(fs_info,
|
|
"setting incompat feature flag for %s (0x%llx)",
|
|
name, flag);
|
|
}
|
|
spin_unlock(&fs_info->super_lock);
|
|
set_bit(BTRFS_FS_FEATURE_CHANGED, &fs_info->flags);
|
|
}
|
|
}
|
|
|
|
void __btrfs_clear_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag,
|
|
const char *name)
|
|
{
|
|
struct btrfs_super_block *disk_super;
|
|
u64 features;
|
|
|
|
disk_super = fs_info->super_copy;
|
|
features = btrfs_super_incompat_flags(disk_super);
|
|
if (features & flag) {
|
|
spin_lock(&fs_info->super_lock);
|
|
features = btrfs_super_incompat_flags(disk_super);
|
|
if (features & flag) {
|
|
features &= ~flag;
|
|
btrfs_set_super_incompat_flags(disk_super, features);
|
|
btrfs_info(fs_info,
|
|
"clearing incompat feature flag for %s (0x%llx)",
|
|
name, flag);
|
|
}
|
|
spin_unlock(&fs_info->super_lock);
|
|
set_bit(BTRFS_FS_FEATURE_CHANGED, &fs_info->flags);
|
|
}
|
|
}
|
|
|
|
void __btrfs_set_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag,
|
|
const char *name)
|
|
{
|
|
struct btrfs_super_block *disk_super;
|
|
u64 features;
|
|
|
|
disk_super = fs_info->super_copy;
|
|
features = btrfs_super_compat_ro_flags(disk_super);
|
|
if (!(features & flag)) {
|
|
spin_lock(&fs_info->super_lock);
|
|
features = btrfs_super_compat_ro_flags(disk_super);
|
|
if (!(features & flag)) {
|
|
features |= flag;
|
|
btrfs_set_super_compat_ro_flags(disk_super, features);
|
|
btrfs_info(fs_info,
|
|
"setting compat-ro feature flag for %s (0x%llx)",
|
|
name, flag);
|
|
}
|
|
spin_unlock(&fs_info->super_lock);
|
|
set_bit(BTRFS_FS_FEATURE_CHANGED, &fs_info->flags);
|
|
}
|
|
}
|
|
|
|
void __btrfs_clear_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag,
|
|
const char *name)
|
|
{
|
|
struct btrfs_super_block *disk_super;
|
|
u64 features;
|
|
|
|
disk_super = fs_info->super_copy;
|
|
features = btrfs_super_compat_ro_flags(disk_super);
|
|
if (features & flag) {
|
|
spin_lock(&fs_info->super_lock);
|
|
features = btrfs_super_compat_ro_flags(disk_super);
|
|
if (features & flag) {
|
|
features &= ~flag;
|
|
btrfs_set_super_compat_ro_flags(disk_super, features);
|
|
btrfs_info(fs_info,
|
|
"clearing compat-ro feature flag for %s (0x%llx)",
|
|
name, flag);
|
|
}
|
|
spin_unlock(&fs_info->super_lock);
|
|
set_bit(BTRFS_FS_FEATURE_CHANGED, &fs_info->flags);
|
|
}
|
|
}
|