mirror of
https://github.com/torvalds/linux.git
synced 2025-11-03 10:10:33 +02:00
Pull the big VFS changes from Al Viro:
"This one is *big* and changes quite a few things around VFS. What's in there:
- the first of two really major architecture changes - death to open
intents.
The former is finally there; it was very long in making, but with
Miklos getting through really hard and messy final push in
fs/namei.c, we finally have it. Unlike his variant, this one
doesn't introduce struct opendata; what we have instead is
->atomic_open() taking preallocated struct file * and passing
everything via its fields.
Instead of returning struct file *, it returns -E... on error, 0
on success and 1 in "deal with it yourself" case (e.g. symlink
found on server, etc.).
See comments before fs/namei.c:atomic_open(). That made a lot of
goodies finally possible and quite a few are in that pile:
->lookup(), ->d_revalidate() and ->create() do not get struct
nameidata * anymore; ->lookup() and ->d_revalidate() get lookup
flags instead, ->create() gets "do we want it exclusive" flag.
With the introduction of new helper (kern_path_locked()) we are rid
of all struct nameidata instances outside of fs/namei.c; it's still
visible in namei.h, but not for long. Come the next cycle,
declaration will move either to fs/internal.h or to fs/namei.c
itself. [me, miklos, hch]
- The second major change: behaviour of final fput(). Now we have
__fput() done without any locks held by caller *and* not from deep
in call stack.
That obviously lifts a lot of constraints on the locking in there.
Moreover, it's legal now to call fput() from atomic contexts (which
has immediately simplified life for aio.c). We also don't need
anti-recursion logics in __scm_destroy() anymore.
There is a price, though - the damn thing has become partially
asynchronous. For fput() from normal process we are guaranteed
that pending __fput() will be done before the caller returns to
userland, exits or gets stopped for ptrace.
For kernel threads and atomic contexts it's done via
schedule_work(), so theoretically we might need a way to make sure
it's finished; so far only one such place had been found, but there
might be more.
There's flush_delayed_fput() (do all pending __fput()) and there's
__fput_sync() (fput() analog doing __fput() immediately). I hope
we won't need them often; see warnings in fs/file_table.c for
details. [me, based on task_work series from Oleg merged last
cycle]
- sync series from Jan
- large part of "death to sync_supers()" work from Artem; the only
bits missing here are exofs and ext4 ones. As far as I understand,
those are going via the exofs and ext4 trees resp.; once they are
in, we can put ->write_super() to the rest, along with the thread
calling it.
- preparatory bits from unionmount series (from dhowells).
- assorted cleanups and fixes all over the place, as usual.
This is not the last pile for this cycle; there's at least jlayton's
ESTALE work and fsfreeze series (the latter - in dire need of fixes,
so I'm not sure it'll make the cut this cycle). I'll probably throw
symlink/hardlink restrictions stuff from Kees into the next pile, too.
Plus there's a lot of misc patches I hadn't thrown into that one -
it's large enough as it is..."
* 'for-linus-2' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (127 commits)
ext4: switch EXT4_IOC_RESIZE_FS to mnt_want_write_file()
btrfs: switch btrfs_ioctl_balance() to mnt_want_write_file()
switch dentry_open() to struct path, make it grab references itself
spufs: shift dget/mntget towards dentry_open()
zoran: don't bother with struct file * in zoran_map
ecryptfs: don't reinvent the wheels, please - use struct completion
don't expose I_NEW inodes via dentry->d_inode
tidy up namei.c a bit
unobfuscate follow_up() a bit
ext3: pass custom EOF to generic_file_llseek_size()
ext4: use core vfs llseek code for dir seeks
vfs: allow custom EOF in generic_file_llseek code
vfs: Avoid unnecessary WB_SYNC_NONE writeback during sys_sync and reorder sync passes
vfs: Remove unnecessary flushing of block devices
vfs: Make sys_sync writeout also block device inodes
vfs: Create function for iterating over block devices
vfs: Reorder operations during sys_sync
quota: Move quota syncing to ->sync_fs method
quota: Split dquot_quota_sync() to writeback and cache flushing part
vfs: Move noop_backing_dev_info check from sync into writeback
...
515 lines
12 KiB
C
515 lines
12 KiB
C
/*
|
|
* linux/fs/ext4/ioctl.c
|
|
*
|
|
* Copyright (C) 1993, 1994, 1995
|
|
* Remy Card (card@masi.ibp.fr)
|
|
* Laboratoire MASI - Institut Blaise Pascal
|
|
* Universite Pierre et Marie Curie (Paris VI)
|
|
*/
|
|
|
|
#include <linux/fs.h>
|
|
#include <linux/jbd2.h>
|
|
#include <linux/capability.h>
|
|
#include <linux/time.h>
|
|
#include <linux/compat.h>
|
|
#include <linux/mount.h>
|
|
#include <linux/file.h>
|
|
#include <asm/uaccess.h>
|
|
#include "ext4_jbd2.h"
|
|
#include "ext4.h"
|
|
|
|
#define MAX_32_NUM ((((unsigned long long) 1) << 32) - 1)
|
|
|
|
long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
|
{
|
|
struct inode *inode = filp->f_dentry->d_inode;
|
|
struct super_block *sb = inode->i_sb;
|
|
struct ext4_inode_info *ei = EXT4_I(inode);
|
|
unsigned int flags;
|
|
|
|
ext4_debug("cmd = %u, arg = %lu\n", cmd, arg);
|
|
|
|
switch (cmd) {
|
|
case EXT4_IOC_GETFLAGS:
|
|
ext4_get_inode_flags(ei);
|
|
flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
|
|
return put_user(flags, (int __user *) arg);
|
|
case EXT4_IOC_SETFLAGS: {
|
|
handle_t *handle = NULL;
|
|
int err, migrate = 0;
|
|
struct ext4_iloc iloc;
|
|
unsigned int oldflags, mask, i;
|
|
unsigned int jflag;
|
|
|
|
if (!inode_owner_or_capable(inode))
|
|
return -EACCES;
|
|
|
|
if (get_user(flags, (int __user *) arg))
|
|
return -EFAULT;
|
|
|
|
err = mnt_want_write_file(filp);
|
|
if (err)
|
|
return err;
|
|
|
|
flags = ext4_mask_flags(inode->i_mode, flags);
|
|
|
|
err = -EPERM;
|
|
mutex_lock(&inode->i_mutex);
|
|
/* Is it quota file? Do not allow user to mess with it */
|
|
if (IS_NOQUOTA(inode))
|
|
goto flags_out;
|
|
|
|
oldflags = ei->i_flags;
|
|
|
|
/* The JOURNAL_DATA flag is modifiable only by root */
|
|
jflag = flags & EXT4_JOURNAL_DATA_FL;
|
|
|
|
/*
|
|
* The IMMUTABLE and APPEND_ONLY flags can only be changed by
|
|
* the relevant capability.
|
|
*
|
|
* This test looks nicer. Thanks to Pauline Middelink
|
|
*/
|
|
if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
|
|
if (!capable(CAP_LINUX_IMMUTABLE))
|
|
goto flags_out;
|
|
}
|
|
|
|
/*
|
|
* The JOURNAL_DATA flag can only be changed by
|
|
* the relevant capability.
|
|
*/
|
|
if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
|
|
if (!capable(CAP_SYS_RESOURCE))
|
|
goto flags_out;
|
|
}
|
|
if (oldflags & EXT4_EXTENTS_FL) {
|
|
/* We don't support clearning extent flags */
|
|
if (!(flags & EXT4_EXTENTS_FL)) {
|
|
err = -EOPNOTSUPP;
|
|
goto flags_out;
|
|
}
|
|
} else if (flags & EXT4_EXTENTS_FL) {
|
|
/* migrate the file */
|
|
migrate = 1;
|
|
flags &= ~EXT4_EXTENTS_FL;
|
|
}
|
|
|
|
if (flags & EXT4_EOFBLOCKS_FL) {
|
|
/* we don't support adding EOFBLOCKS flag */
|
|
if (!(oldflags & EXT4_EOFBLOCKS_FL)) {
|
|
err = -EOPNOTSUPP;
|
|
goto flags_out;
|
|
}
|
|
} else if (oldflags & EXT4_EOFBLOCKS_FL)
|
|
ext4_truncate(inode);
|
|
|
|
handle = ext4_journal_start(inode, 1);
|
|
if (IS_ERR(handle)) {
|
|
err = PTR_ERR(handle);
|
|
goto flags_out;
|
|
}
|
|
if (IS_SYNC(inode))
|
|
ext4_handle_sync(handle);
|
|
err = ext4_reserve_inode_write(handle, inode, &iloc);
|
|
if (err)
|
|
goto flags_err;
|
|
|
|
for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {
|
|
if (!(mask & EXT4_FL_USER_MODIFIABLE))
|
|
continue;
|
|
if (mask & flags)
|
|
ext4_set_inode_flag(inode, i);
|
|
else
|
|
ext4_clear_inode_flag(inode, i);
|
|
}
|
|
|
|
ext4_set_inode_flags(inode);
|
|
inode->i_ctime = ext4_current_time(inode);
|
|
|
|
err = ext4_mark_iloc_dirty(handle, inode, &iloc);
|
|
flags_err:
|
|
ext4_journal_stop(handle);
|
|
if (err)
|
|
goto flags_out;
|
|
|
|
if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
|
|
err = ext4_change_inode_journal_flag(inode, jflag);
|
|
if (err)
|
|
goto flags_out;
|
|
if (migrate)
|
|
err = ext4_ext_migrate(inode);
|
|
flags_out:
|
|
mutex_unlock(&inode->i_mutex);
|
|
mnt_drop_write_file(filp);
|
|
return err;
|
|
}
|
|
case EXT4_IOC_GETVERSION:
|
|
case EXT4_IOC_GETVERSION_OLD:
|
|
return put_user(inode->i_generation, (int __user *) arg);
|
|
case EXT4_IOC_SETVERSION:
|
|
case EXT4_IOC_SETVERSION_OLD: {
|
|
handle_t *handle;
|
|
struct ext4_iloc iloc;
|
|
__u32 generation;
|
|
int err;
|
|
|
|
if (!inode_owner_or_capable(inode))
|
|
return -EPERM;
|
|
|
|
if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
|
|
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
|
|
ext4_warning(sb, "Setting inode version is not "
|
|
"supported with metadata_csum enabled.");
|
|
return -ENOTTY;
|
|
}
|
|
|
|
err = mnt_want_write_file(filp);
|
|
if (err)
|
|
return err;
|
|
if (get_user(generation, (int __user *) arg)) {
|
|
err = -EFAULT;
|
|
goto setversion_out;
|
|
}
|
|
|
|
mutex_lock(&inode->i_mutex);
|
|
handle = ext4_journal_start(inode, 1);
|
|
if (IS_ERR(handle)) {
|
|
err = PTR_ERR(handle);
|
|
goto unlock_out;
|
|
}
|
|
err = ext4_reserve_inode_write(handle, inode, &iloc);
|
|
if (err == 0) {
|
|
inode->i_ctime = ext4_current_time(inode);
|
|
inode->i_generation = generation;
|
|
err = ext4_mark_iloc_dirty(handle, inode, &iloc);
|
|
}
|
|
ext4_journal_stop(handle);
|
|
|
|
unlock_out:
|
|
mutex_unlock(&inode->i_mutex);
|
|
setversion_out:
|
|
mnt_drop_write_file(filp);
|
|
return err;
|
|
}
|
|
case EXT4_IOC_GROUP_EXTEND: {
|
|
ext4_fsblk_t n_blocks_count;
|
|
int err, err2=0;
|
|
|
|
err = ext4_resize_begin(sb);
|
|
if (err)
|
|
return err;
|
|
|
|
if (get_user(n_blocks_count, (__u32 __user *)arg)) {
|
|
err = -EFAULT;
|
|
goto group_extend_out;
|
|
}
|
|
|
|
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
|
|
EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
|
|
ext4_msg(sb, KERN_ERR,
|
|
"Online resizing not supported with bigalloc");
|
|
err = -EOPNOTSUPP;
|
|
goto group_extend_out;
|
|
}
|
|
|
|
err = mnt_want_write_file(filp);
|
|
if (err)
|
|
goto group_extend_out;
|
|
|
|
err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count);
|
|
if (EXT4_SB(sb)->s_journal) {
|
|
jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
|
|
err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
|
|
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
|
|
}
|
|
if (err == 0)
|
|
err = err2;
|
|
mnt_drop_write_file(filp);
|
|
group_extend_out:
|
|
ext4_resize_end(sb);
|
|
return err;
|
|
}
|
|
|
|
case EXT4_IOC_MOVE_EXT: {
|
|
struct move_extent me;
|
|
struct file *donor_filp;
|
|
int err;
|
|
|
|
if (!(filp->f_mode & FMODE_READ) ||
|
|
!(filp->f_mode & FMODE_WRITE))
|
|
return -EBADF;
|
|
|
|
if (copy_from_user(&me,
|
|
(struct move_extent __user *)arg, sizeof(me)))
|
|
return -EFAULT;
|
|
me.moved_len = 0;
|
|
|
|
donor_filp = fget(me.donor_fd);
|
|
if (!donor_filp)
|
|
return -EBADF;
|
|
|
|
if (!(donor_filp->f_mode & FMODE_WRITE)) {
|
|
err = -EBADF;
|
|
goto mext_out;
|
|
}
|
|
|
|
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
|
|
EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
|
|
ext4_msg(sb, KERN_ERR,
|
|
"Online defrag not supported with bigalloc");
|
|
return -EOPNOTSUPP;
|
|
}
|
|
|
|
err = mnt_want_write_file(filp);
|
|
if (err)
|
|
goto mext_out;
|
|
|
|
err = ext4_move_extents(filp, donor_filp, me.orig_start,
|
|
me.donor_start, me.len, &me.moved_len);
|
|
mnt_drop_write_file(filp);
|
|
|
|
if (copy_to_user((struct move_extent __user *)arg,
|
|
&me, sizeof(me)))
|
|
err = -EFAULT;
|
|
mext_out:
|
|
fput(donor_filp);
|
|
return err;
|
|
}
|
|
|
|
case EXT4_IOC_GROUP_ADD: {
|
|
struct ext4_new_group_data input;
|
|
int err, err2=0;
|
|
|
|
err = ext4_resize_begin(sb);
|
|
if (err)
|
|
return err;
|
|
|
|
if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg,
|
|
sizeof(input))) {
|
|
err = -EFAULT;
|
|
goto group_add_out;
|
|
}
|
|
|
|
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
|
|
EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
|
|
ext4_msg(sb, KERN_ERR,
|
|
"Online resizing not supported with bigalloc");
|
|
err = -EOPNOTSUPP;
|
|
goto group_add_out;
|
|
}
|
|
|
|
err = mnt_want_write_file(filp);
|
|
if (err)
|
|
goto group_add_out;
|
|
|
|
err = ext4_group_add(sb, &input);
|
|
if (EXT4_SB(sb)->s_journal) {
|
|
jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
|
|
err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
|
|
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
|
|
}
|
|
if (err == 0)
|
|
err = err2;
|
|
mnt_drop_write_file(filp);
|
|
group_add_out:
|
|
ext4_resize_end(sb);
|
|
return err;
|
|
}
|
|
|
|
case EXT4_IOC_MIGRATE:
|
|
{
|
|
int err;
|
|
if (!inode_owner_or_capable(inode))
|
|
return -EACCES;
|
|
|
|
err = mnt_want_write_file(filp);
|
|
if (err)
|
|
return err;
|
|
/*
|
|
* inode_mutex prevent write and truncate on the file.
|
|
* Read still goes through. We take i_data_sem in
|
|
* ext4_ext_swap_inode_data before we switch the
|
|
* inode format to prevent read.
|
|
*/
|
|
mutex_lock(&(inode->i_mutex));
|
|
err = ext4_ext_migrate(inode);
|
|
mutex_unlock(&(inode->i_mutex));
|
|
mnt_drop_write_file(filp);
|
|
return err;
|
|
}
|
|
|
|
case EXT4_IOC_ALLOC_DA_BLKS:
|
|
{
|
|
int err;
|
|
if (!inode_owner_or_capable(inode))
|
|
return -EACCES;
|
|
|
|
err = mnt_want_write_file(filp);
|
|
if (err)
|
|
return err;
|
|
err = ext4_alloc_da_blocks(inode);
|
|
mnt_drop_write_file(filp);
|
|
return err;
|
|
}
|
|
|
|
case EXT4_IOC_RESIZE_FS: {
|
|
ext4_fsblk_t n_blocks_count;
|
|
struct super_block *sb = inode->i_sb;
|
|
int err = 0, err2 = 0;
|
|
|
|
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
|
|
EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
|
|
ext4_msg(sb, KERN_ERR,
|
|
"Online resizing not (yet) supported with bigalloc");
|
|
return -EOPNOTSUPP;
|
|
}
|
|
|
|
if (EXT4_HAS_INCOMPAT_FEATURE(sb,
|
|
EXT4_FEATURE_INCOMPAT_META_BG)) {
|
|
ext4_msg(sb, KERN_ERR,
|
|
"Online resizing not (yet) supported with meta_bg");
|
|
return -EOPNOTSUPP;
|
|
}
|
|
|
|
if (copy_from_user(&n_blocks_count, (__u64 __user *)arg,
|
|
sizeof(__u64))) {
|
|
return -EFAULT;
|
|
}
|
|
|
|
if (n_blocks_count > MAX_32_NUM &&
|
|
!EXT4_HAS_INCOMPAT_FEATURE(sb,
|
|
EXT4_FEATURE_INCOMPAT_64BIT)) {
|
|
ext4_msg(sb, KERN_ERR,
|
|
"File system only supports 32-bit block numbers");
|
|
return -EOPNOTSUPP;
|
|
}
|
|
|
|
err = ext4_resize_begin(sb);
|
|
if (err)
|
|
return err;
|
|
|
|
err = mnt_want_write_file(filp);
|
|
if (err)
|
|
goto resizefs_out;
|
|
|
|
err = ext4_resize_fs(sb, n_blocks_count);
|
|
if (EXT4_SB(sb)->s_journal) {
|
|
jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
|
|
err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
|
|
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
|
|
}
|
|
if (err == 0)
|
|
err = err2;
|
|
mnt_drop_write_file(filp);
|
|
resizefs_out:
|
|
ext4_resize_end(sb);
|
|
return err;
|
|
}
|
|
|
|
case FITRIM:
|
|
{
|
|
struct request_queue *q = bdev_get_queue(sb->s_bdev);
|
|
struct fstrim_range range;
|
|
int ret = 0;
|
|
|
|
if (!capable(CAP_SYS_ADMIN))
|
|
return -EPERM;
|
|
|
|
if (!blk_queue_discard(q))
|
|
return -EOPNOTSUPP;
|
|
|
|
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
|
|
EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
|
|
ext4_msg(sb, KERN_ERR,
|
|
"FITRIM not supported with bigalloc");
|
|
return -EOPNOTSUPP;
|
|
}
|
|
|
|
if (copy_from_user(&range, (struct fstrim_range __user *)arg,
|
|
sizeof(range)))
|
|
return -EFAULT;
|
|
|
|
range.minlen = max((unsigned int)range.minlen,
|
|
q->limits.discard_granularity);
|
|
ret = ext4_trim_fs(sb, &range);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
if (copy_to_user((struct fstrim_range __user *)arg, &range,
|
|
sizeof(range)))
|
|
return -EFAULT;
|
|
|
|
return 0;
|
|
}
|
|
|
|
default:
|
|
return -ENOTTY;
|
|
}
|
|
}
|
|
|
|
#ifdef CONFIG_COMPAT
|
|
long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
|
{
|
|
/* These are just misnamed, they actually get/put from/to user an int */
|
|
switch (cmd) {
|
|
case EXT4_IOC32_GETFLAGS:
|
|
cmd = EXT4_IOC_GETFLAGS;
|
|
break;
|
|
case EXT4_IOC32_SETFLAGS:
|
|
cmd = EXT4_IOC_SETFLAGS;
|
|
break;
|
|
case EXT4_IOC32_GETVERSION:
|
|
cmd = EXT4_IOC_GETVERSION;
|
|
break;
|
|
case EXT4_IOC32_SETVERSION:
|
|
cmd = EXT4_IOC_SETVERSION;
|
|
break;
|
|
case EXT4_IOC32_GROUP_EXTEND:
|
|
cmd = EXT4_IOC_GROUP_EXTEND;
|
|
break;
|
|
case EXT4_IOC32_GETVERSION_OLD:
|
|
cmd = EXT4_IOC_GETVERSION_OLD;
|
|
break;
|
|
case EXT4_IOC32_SETVERSION_OLD:
|
|
cmd = EXT4_IOC_SETVERSION_OLD;
|
|
break;
|
|
case EXT4_IOC32_GETRSVSZ:
|
|
cmd = EXT4_IOC_GETRSVSZ;
|
|
break;
|
|
case EXT4_IOC32_SETRSVSZ:
|
|
cmd = EXT4_IOC_SETRSVSZ;
|
|
break;
|
|
case EXT4_IOC32_GROUP_ADD: {
|
|
struct compat_ext4_new_group_input __user *uinput;
|
|
struct ext4_new_group_input input;
|
|
mm_segment_t old_fs;
|
|
int err;
|
|
|
|
uinput = compat_ptr(arg);
|
|
err = get_user(input.group, &uinput->group);
|
|
err |= get_user(input.block_bitmap, &uinput->block_bitmap);
|
|
err |= get_user(input.inode_bitmap, &uinput->inode_bitmap);
|
|
err |= get_user(input.inode_table, &uinput->inode_table);
|
|
err |= get_user(input.blocks_count, &uinput->blocks_count);
|
|
err |= get_user(input.reserved_blocks,
|
|
&uinput->reserved_blocks);
|
|
if (err)
|
|
return -EFAULT;
|
|
old_fs = get_fs();
|
|
set_fs(KERNEL_DS);
|
|
err = ext4_ioctl(file, EXT4_IOC_GROUP_ADD,
|
|
(unsigned long) &input);
|
|
set_fs(old_fs);
|
|
return err;
|
|
}
|
|
case EXT4_IOC_MOVE_EXT:
|
|
case FITRIM:
|
|
case EXT4_IOC_RESIZE_FS:
|
|
break;
|
|
default:
|
|
return -ENOIOCTLCMD;
|
|
}
|
|
return ext4_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
|
|
}
|
|
#endif
|