forked from mirrors/linux
		
	libfs: Add directory operations for stable offsets
Create a vector of directory operations in fs/libfs.c that handles directory seeks and readdir via stable offsets instead of the current cursor-based mechanism. For the moment these are unused. Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Message-Id: <168814732984.530310.11190772066786107220.stgit@manet.1015granger.net> Signed-off-by: Christian Brauner <brauner@kernel.org>
This commit is contained in:
		
							parent
							
								
									509f006932
								
							
						
					
					
						commit
						6faddda69f
					
				
					 4 changed files with 276 additions and 4 deletions
				
			
		|  | @ -85,13 +85,14 @@ prototypes:: | |||
| 			    struct dentry *dentry, struct fileattr *fa); | ||||
| 	int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa); | ||||
| 	struct posix_acl * (*get_acl)(struct mnt_idmap *, struct dentry *, int); | ||||
| 	struct offset_ctx *(*get_offset_ctx)(struct inode *inode); | ||||
| 
 | ||||
| locking rules: | ||||
| 	all may block | ||||
| 
 | ||||
| ==============	============================================= | ||||
| ==============	================================================== | ||||
| ops		i_rwsem(inode) | ||||
| ==============	============================================= | ||||
| ==============	================================================== | ||||
| lookup:		shared | ||||
| create:		exclusive | ||||
| link:		exclusive (both) | ||||
|  | @ -115,7 +116,8 @@ atomic_open:	shared (exclusive if O_CREAT is set in open flags) | |||
| tmpfile:	no | ||||
| fileattr_get:	no or exclusive | ||||
| fileattr_set:	exclusive | ||||
| ==============	============================================= | ||||
| get_offset_ctx  no | ||||
| ==============	================================================== | ||||
| 
 | ||||
| 
 | ||||
| 	Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_rwsem | ||||
|  |  | |||
|  | @ -515,6 +515,7 @@ As of kernel 2.6.22, the following members are defined: | |||
| 		int (*fileattr_set)(struct mnt_idmap *idmap, | ||||
| 				    struct dentry *dentry, struct fileattr *fa); | ||||
| 		int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa); | ||||
| 	        struct offset_ctx *(*get_offset_ctx)(struct inode *inode); | ||||
| 	}; | ||||
| 
 | ||||
| Again, all methods are called without any locks being held, unless | ||||
|  | @ -675,7 +676,10 @@ otherwise noted. | |||
| 	called on ioctl(FS_IOC_SETFLAGS) and ioctl(FS_IOC_FSSETXATTR) to | ||||
| 	change miscellaneous file flags and attributes.  Callers hold | ||||
| 	i_rwsem exclusive.  If unset, then fall back to f_op->ioctl(). | ||||
| 
 | ||||
| ``get_offset_ctx`` | ||||
| 	called to get the offset context for a directory inode. A | ||||
|         filesystem must define this operation to use | ||||
|         simple_offset_dir_operations. | ||||
| 
 | ||||
| The Address Space Object | ||||
| ======================== | ||||
|  |  | |||
							
								
								
									
										248
									
								
								fs/libfs.c
									
									
									
									
									
								
							
							
						
						
									
										248
									
								
								fs/libfs.c
									
									
									
									
									
								
							|  | @ -239,6 +239,254 @@ const struct inode_operations simple_dir_inode_operations = { | |||
| }; | ||||
| EXPORT_SYMBOL(simple_dir_inode_operations); | ||||
| 
 | ||||
| static void offset_set(struct dentry *dentry, u32 offset) | ||||
| { | ||||
| 	dentry->d_fsdata = (void *)((uintptr_t)(offset)); | ||||
| } | ||||
| 
 | ||||
| static u32 dentry2offset(struct dentry *dentry) | ||||
| { | ||||
| 	return (u32)((uintptr_t)(dentry->d_fsdata)); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * simple_offset_init - initialize an offset_ctx | ||||
|  * @octx: directory offset map to be initialized | ||||
|  * | ||||
|  */ | ||||
| void simple_offset_init(struct offset_ctx *octx) | ||||
| { | ||||
| 	xa_init_flags(&octx->xa, XA_FLAGS_ALLOC1); | ||||
| 
 | ||||
| 	/* 0 is '.', 1 is '..', so always start with offset 2 */ | ||||
| 	octx->next_offset = 2; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * simple_offset_add - Add an entry to a directory's offset map | ||||
|  * @octx: directory offset ctx to be updated | ||||
|  * @dentry: new dentry being added | ||||
|  * | ||||
|  * Returns zero on success. @so_ctx and the dentry offset are updated. | ||||
|  * Otherwise, a negative errno value is returned. | ||||
|  */ | ||||
| int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry) | ||||
| { | ||||
| 	static const struct xa_limit limit = XA_LIMIT(2, U32_MAX); | ||||
| 	u32 offset; | ||||
| 	int ret; | ||||
| 
 | ||||
| 	if (dentry2offset(dentry) != 0) | ||||
| 		return -EBUSY; | ||||
| 
 | ||||
| 	ret = xa_alloc_cyclic(&octx->xa, &offset, dentry, limit, | ||||
| 			      &octx->next_offset, GFP_KERNEL); | ||||
| 	if (ret < 0) | ||||
| 		return ret; | ||||
| 
 | ||||
| 	offset_set(dentry, offset); | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * simple_offset_remove - Remove an entry to a directory's offset map | ||||
|  * @octx: directory offset ctx to be updated | ||||
|  * @dentry: dentry being removed | ||||
|  * | ||||
|  */ | ||||
| void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry) | ||||
| { | ||||
| 	u32 offset; | ||||
| 
 | ||||
| 	offset = dentry2offset(dentry); | ||||
| 	if (offset == 0) | ||||
| 		return; | ||||
| 
 | ||||
| 	xa_erase(&octx->xa, offset); | ||||
| 	offset_set(dentry, 0); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * simple_offset_rename_exchange - exchange rename with directory offsets | ||||
|  * @old_dir: parent of dentry being moved | ||||
|  * @old_dentry: dentry being moved | ||||
|  * @new_dir: destination parent | ||||
|  * @new_dentry: destination dentry | ||||
|  * | ||||
|  * Returns zero on success. Otherwise a negative errno is returned and the | ||||
|  * rename is rolled back. | ||||
|  */ | ||||
| int simple_offset_rename_exchange(struct inode *old_dir, | ||||
| 				  struct dentry *old_dentry, | ||||
| 				  struct inode *new_dir, | ||||
| 				  struct dentry *new_dentry) | ||||
| { | ||||
| 	struct offset_ctx *old_ctx = old_dir->i_op->get_offset_ctx(old_dir); | ||||
| 	struct offset_ctx *new_ctx = new_dir->i_op->get_offset_ctx(new_dir); | ||||
| 	u32 old_index = dentry2offset(old_dentry); | ||||
| 	u32 new_index = dentry2offset(new_dentry); | ||||
| 	int ret; | ||||
| 
 | ||||
| 	simple_offset_remove(old_ctx, old_dentry); | ||||
| 	simple_offset_remove(new_ctx, new_dentry); | ||||
| 
 | ||||
| 	ret = simple_offset_add(new_ctx, old_dentry); | ||||
| 	if (ret) | ||||
| 		goto out_restore; | ||||
| 
 | ||||
| 	ret = simple_offset_add(old_ctx, new_dentry); | ||||
| 	if (ret) { | ||||
| 		simple_offset_remove(new_ctx, old_dentry); | ||||
| 		goto out_restore; | ||||
| 	} | ||||
| 
 | ||||
| 	ret = simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry); | ||||
| 	if (ret) { | ||||
| 		simple_offset_remove(new_ctx, old_dentry); | ||||
| 		simple_offset_remove(old_ctx, new_dentry); | ||||
| 		goto out_restore; | ||||
| 	} | ||||
| 	return 0; | ||||
| 
 | ||||
| out_restore: | ||||
| 	offset_set(old_dentry, old_index); | ||||
| 	xa_store(&old_ctx->xa, old_index, old_dentry, GFP_KERNEL); | ||||
| 	offset_set(new_dentry, new_index); | ||||
| 	xa_store(&new_ctx->xa, new_index, new_dentry, GFP_KERNEL); | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * simple_offset_destroy - Release offset map | ||||
|  * @octx: directory offset ctx that is about to be destroyed | ||||
|  * | ||||
|  * During fs teardown (eg. umount), a directory's offset map might still | ||||
|  * contain entries. xa_destroy() cleans out anything that remains. | ||||
|  */ | ||||
| void simple_offset_destroy(struct offset_ctx *octx) | ||||
| { | ||||
| 	xa_destroy(&octx->xa); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * offset_dir_llseek - Advance the read position of a directory descriptor | ||||
|  * @file: an open directory whose position is to be updated | ||||
|  * @offset: a byte offset | ||||
|  * @whence: enumerator describing the starting position for this update | ||||
|  * | ||||
|  * SEEK_END, SEEK_DATA, and SEEK_HOLE are not supported for directories. | ||||
|  * | ||||
|  * Returns the updated read position if successful; otherwise a | ||||
|  * negative errno is returned and the read position remains unchanged. | ||||
|  */ | ||||
| static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence) | ||||
| { | ||||
| 	switch (whence) { | ||||
| 	case SEEK_CUR: | ||||
| 		offset += file->f_pos; | ||||
| 		fallthrough; | ||||
| 	case SEEK_SET: | ||||
| 		if (offset >= 0) | ||||
| 			break; | ||||
| 		fallthrough; | ||||
| 	default: | ||||
| 		return -EINVAL; | ||||
| 	} | ||||
| 
 | ||||
| 	return vfs_setpos(file, offset, U32_MAX); | ||||
| } | ||||
| 
 | ||||
| static struct dentry *offset_find_next(struct xa_state *xas) | ||||
| { | ||||
| 	struct dentry *child, *found = NULL; | ||||
| 
 | ||||
| 	rcu_read_lock(); | ||||
| 	child = xas_next_entry(xas, U32_MAX); | ||||
| 	if (!child) | ||||
| 		goto out; | ||||
| 	spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED); | ||||
| 	if (simple_positive(child)) | ||||
| 		found = dget_dlock(child); | ||||
| 	spin_unlock(&child->d_lock); | ||||
| out: | ||||
| 	rcu_read_unlock(); | ||||
| 	return found; | ||||
| } | ||||
| 
 | ||||
| static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry) | ||||
| { | ||||
| 	u32 offset = dentry2offset(dentry); | ||||
| 	struct inode *inode = d_inode(dentry); | ||||
| 
 | ||||
| 	return ctx->actor(ctx, dentry->d_name.name, dentry->d_name.len, offset, | ||||
| 			  inode->i_ino, fs_umode_to_dtype(inode->i_mode)); | ||||
| } | ||||
| 
 | ||||
| static void offset_iterate_dir(struct dentry *dir, struct dir_context *ctx) | ||||
| { | ||||
| 	struct inode *inode = d_inode(dir); | ||||
| 	struct offset_ctx *so_ctx = inode->i_op->get_offset_ctx(inode); | ||||
| 	XA_STATE(xas, &so_ctx->xa, ctx->pos); | ||||
| 	struct dentry *dentry; | ||||
| 
 | ||||
| 	while (true) { | ||||
| 		spin_lock(&dir->d_lock); | ||||
| 		dentry = offset_find_next(&xas); | ||||
| 		spin_unlock(&dir->d_lock); | ||||
| 		if (!dentry) | ||||
| 			break; | ||||
| 
 | ||||
| 		if (!offset_dir_emit(ctx, dentry)) { | ||||
| 			dput(dentry); | ||||
| 			break; | ||||
| 		} | ||||
| 
 | ||||
| 		dput(dentry); | ||||
| 		ctx->pos = xas.xa_index + 1; | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * offset_readdir - Emit entries starting at offset @ctx->pos | ||||
|  * @file: an open directory to iterate over | ||||
|  * @ctx: directory iteration context | ||||
|  * | ||||
|  * Caller must hold @file's i_rwsem to prevent insertion or removal of | ||||
|  * entries during this call. | ||||
|  * | ||||
|  * On entry, @ctx->pos contains an offset that represents the first entry | ||||
|  * to be read from the directory. | ||||
|  * | ||||
|  * The operation continues until there are no more entries to read, or | ||||
|  * until the ctx->actor indicates there is no more space in the caller's | ||||
|  * output buffer. | ||||
|  * | ||||
|  * On return, @ctx->pos contains an offset that will read the next entry | ||||
|  * in this directory when shmem_readdir() is called again with @ctx. | ||||
|  * | ||||
|  * Return values: | ||||
|  *   %0 - Complete | ||||
|  */ | ||||
| static int offset_readdir(struct file *file, struct dir_context *ctx) | ||||
| { | ||||
| 	struct dentry *dir = file->f_path.dentry; | ||||
| 
 | ||||
| 	lockdep_assert_held(&d_inode(dir)->i_rwsem); | ||||
| 
 | ||||
| 	if (!dir_emit_dots(file, ctx)) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	offset_iterate_dir(dir, ctx); | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| const struct file_operations simple_offset_dir_operations = { | ||||
| 	.llseek		= offset_dir_llseek, | ||||
| 	.iterate_shared	= offset_readdir, | ||||
| 	.read		= generic_read_dir, | ||||
| 	.fsync		= noop_fsync, | ||||
| }; | ||||
| 
 | ||||
| static struct dentry *find_next_child(struct dentry *parent, struct dentry *prev) | ||||
| { | ||||
| 	struct dentry *child = NULL; | ||||
|  |  | |||
|  | @ -1770,6 +1770,7 @@ struct dir_context { | |||
| 
 | ||||
| struct iov_iter; | ||||
| struct io_uring_cmd; | ||||
| struct offset_ctx; | ||||
| 
 | ||||
| struct file_operations { | ||||
| 	struct module *owner; | ||||
|  | @ -1857,6 +1858,7 @@ struct inode_operations { | |||
| 	int (*fileattr_set)(struct mnt_idmap *idmap, | ||||
| 			    struct dentry *dentry, struct fileattr *fa); | ||||
| 	int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa); | ||||
| 	struct offset_ctx *(*get_offset_ctx)(struct inode *inode); | ||||
| } ____cacheline_aligned; | ||||
| 
 | ||||
| static inline ssize_t call_read_iter(struct file *file, struct kiocb *kio, | ||||
|  | @ -2971,6 +2973,22 @@ extern ssize_t simple_read_from_buffer(void __user *to, size_t count, | |||
| extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos, | ||||
| 		const void __user *from, size_t count); | ||||
| 
 | ||||
| struct offset_ctx { | ||||
| 	struct xarray		xa; | ||||
| 	u32			next_offset; | ||||
| }; | ||||
| 
 | ||||
| void simple_offset_init(struct offset_ctx *octx); | ||||
| int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry); | ||||
| void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry); | ||||
| int simple_offset_rename_exchange(struct inode *old_dir, | ||||
| 				  struct dentry *old_dentry, | ||||
| 				  struct inode *new_dir, | ||||
| 				  struct dentry *new_dentry); | ||||
| void simple_offset_destroy(struct offset_ctx *octx); | ||||
| 
 | ||||
| extern const struct file_operations simple_offset_dir_operations; | ||||
| 
 | ||||
| extern int __generic_file_fsync(struct file *, loff_t, loff_t, int); | ||||
| extern int generic_file_fsync(struct file *, loff_t, loff_t, int); | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Chuck Lever
						Chuck Lever