Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement FIDUPERANGE for Linux. #15393

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 17 additions & 7 deletions include/os/linux/zfs/sys/zpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -184,18 +184,28 @@ zpl_dir_emit_dots(struct file *file, zpl_dir_context_t *ctx)
}
#endif /* HAVE_VFS_ITERATE */


typedef struct zfs_locked_range zfs_locked_range_t;
/* zpl_file_range.c */

/* handlers for file_operations of the same name */
extern ssize_t zpl_copy_file_range(struct file *src_file, loff_t src_off,
struct file *dst_file, loff_t dst_off, size_t len, unsigned int flags);
extern loff_t zpl_remap_file_range(struct file *src_file, loff_t src_off,
struct file *dst_file, loff_t dst_off, loff_t len, unsigned int flags);
extern int zpl_clone_file_range(struct file *src_file, loff_t src_off,
extern ssize_t
zpl_copy_file_range(struct file *src_file, loff_t src_off,
struct file *dst_file, loff_t dst_off, size_t len,
unsigned int flags);
extern loff_t
zpl_remap_file_range(struct file *src_file, loff_t src_off,
struct file *dst_file, loff_t dst_off, loff_t len,
unsigned int flags);
extern int
zpl_clone_file_range(struct file *src_file, loff_t src_off,
struct file *dst_file, loff_t dst_off, uint64_t len);
extern int zpl_dedupe_file_range(struct file *src_file, loff_t src_off,
extern int
zpl_dedupe_file_range(struct file *src_file, loff_t src_off,
struct file *dst_file, loff_t dst_off, uint64_t len);
extern int
zpl_dedupe_file_compare_locked(struct file *src_file, loff_t src_off,
struct file *dst_file, loff_t dst_off, uint64_t len, bool *is_same,
zfs_locked_range_t *src_zlr, zfs_locked_range_t *dst_zlr);

/* compat for FICLONE/FICLONERANGE/FIDEDUPERANGE ioctls */
typedef struct {
Expand Down
5 changes: 5 additions & 0 deletions include/sys/zfs_vnops.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,19 @@
#ifndef _SYS_FS_ZFS_VNOPS_H
#define _SYS_FS_ZFS_VNOPS_H
#include <sys/zfs_vnops_os.h>
typedef struct zfs_locked_range zfs_locked_range_t;

extern int zfs_fsync(znode_t *, int, cred_t *);
extern int zfs_read(znode_t *, zfs_uio_t *, int, cred_t *);
extern int zfs_read_locked(znode_t *, zfs_uio_t *, int, cred_t *,
zfs_locked_range_t *);
extern int zfs_write(znode_t *, zfs_uio_t *, int, cred_t *);
extern int zfs_holey(znode_t *, ulong_t, loff_t *);
extern int zfs_access(znode_t *, int, int, cred_t *);
extern int zfs_clone_range(znode_t *, uint64_t *, znode_t *, uint64_t *,
uint64_t *, cred_t *);
extern int zfs_clone_range_locked(znode_t *, uint64_t *, znode_t *, uint64_t *,
uint64_t *, cred_t *, zfs_locked_range_t *, zfs_locked_range_t *);
extern int zfs_clone_range_replay(znode_t *, uint64_t, uint64_t, uint64_t,
const blkptr_t *, size_t);

Expand Down
259 changes: 239 additions & 20 deletions module/os/linux/zfs/zpl_file_range.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,55 @@ __zpl_clone_file_range(struct file *src_file, loff_t src_off,
return ((ssize_t)len_o);
}

#if defined(HAVE_VFS_COPY_FILE_RANGE) || \
/*
* Clone part of a file via block cloning.
* This function requires that a read lock is held on the source range,
* and a write lock is held on the destination range.
*
* Note that we are not required to update file offsets; the kernel will take
* care of that depending on how it was called.
*/
static ssize_t
__zpl_clone_file_range_locked(struct file *src_file, loff_t src_off,
struct file *dst_file, loff_t dst_off, size_t len,
zfs_locked_range_t *src_zlr, zfs_locked_range_t *dst_zlr)
{
struct inode *src_i = file_inode(src_file);
struct inode *dst_i = file_inode(dst_file);
uint64_t src_off_o = (uint64_t)src_off;
uint64_t dst_off_o = (uint64_t)dst_off;
uint64_t len_o = (uint64_t)len;
cred_t *cr = CRED();
fstrans_cookie_t cookie;
int err;

if (!spa_feature_is_enabled(
dmu_objset_spa(ITOZSB(dst_i)->z_os), SPA_FEATURE_BLOCK_CLONING))
return (-EOPNOTSUPP);

if (src_i != dst_i)
spl_inode_lock_shared(src_i);
spl_inode_lock(dst_i);

crhold(cr);
cookie = spl_fstrans_mark();
err = -zfs_clone_range_locked(ITOZ(src_i), &src_off_o, ITOZ(dst_i),
&dst_off_o, &len_o, cr, src_zlr, dst_zlr);

spl_fstrans_unmark(cookie);
crfree(cr);

spl_inode_unlock(dst_i);
if (src_i != dst_i)
spl_inode_unlock_shared(src_i);

if (err < 0)
return (err);

return ((ssize_t)len_o);
}

#if defined(HAVE_VFS_COPY_FILE_RANGE) || \
defined(HAVE_VFS_FILE_OPERATIONS_EXTEND)
/*
* Entry point for copy_file_range(). Copy len bytes from src_off in src_file
Expand Down Expand Up @@ -147,35 +195,73 @@ zpl_remap_file_range(struct file *src_file, loff_t src_off,
* doesn't do that, so we just turn the flag off.
*/
flags &= ~REMAP_FILE_CAN_SHORTEN;

if (flags & REMAP_FILE_DEDUP)
/* No support for dedup yet */
return (-EOPNOTSUPP);

/* Zero length means to clone everything to the end of the file */
if (flags & REMAP_FILE_DEDUP) {
/* Both nodes must be range locked */
zfs_locked_range_t *src_zlr;
zfs_locked_range_t *dst_zlr;

/*
* Maintain predictable lock order.
*/
if (src_file < dst_file ||
(src_file == dst_file && src_off < dst_off)) {

src_zlr = zfs_rangelock_enter(
&ITOZ(file_inode(src_file))->z_rangelock, src_off,
len, RL_READER);
dst_zlr = zfs_rangelock_enter(
&ITOZ(file_inode(dst_file))->z_rangelock, dst_off,
len, RL_WRITER);

} else {
dst_zlr = zfs_rangelock_enter(
&ITOZ(file_inode(dst_file))->z_rangelock, dst_off,
len, RL_WRITER);
src_zlr = zfs_rangelock_enter(
&ITOZ(file_inode(src_file))->z_rangelock, src_off,
len, RL_READER);
}

bool same = false;
int ret = zpl_dedupe_file_compare_locked(src_file, src_off,
dst_file, dst_off, len, &same, src_zlr, dst_zlr);
if (ret) {
goto cleanup;
}
if (!same) {
ret = -EBADE;
goto cleanup;
}
ret = __zpl_clone_file_range_locked(src_file, src_off, dst_file,
dst_off, len, src_zlr, dst_zlr);
cleanup:
zfs_rangelock_exit(src_zlr);
zfs_rangelock_exit(dst_zlr);
return (ret);
}
if (len == 0)
len = i_size_read(file_inode(src_file)) - src_off;

return (__zpl_clone_file_range(src_file, src_off,
dst_file, dst_off, len));
return (
__zpl_clone_file_range(src_file, src_off, dst_file, dst_off, len));
}
#endif /* HAVE_VFS_REMAP_FILE_RANGE */

#if defined(HAVE_VFS_CLONE_FILE_RANGE) || \
#if defined(HAVE_VFS_CLONE_FILE_RANGE) || \
defined(HAVE_VFS_FILE_OPERATIONS_EXTEND)
/*
* Entry point for FICLONE and FICLONERANGE, before Linux 4.20.
*/
int
zpl_clone_file_range(struct file *src_file, loff_t src_off,
struct file *dst_file, loff_t dst_off, uint64_t len)
struct file *dst_file, loff_t dst_off, uint64_t len)
{
/* Zero length means to clone everything to the end of the file */
if (len == 0)
len = i_size_read(file_inode(src_file)) - src_off;

return (__zpl_clone_file_range(src_file, src_off,
dst_file, dst_off, len));
return (
__zpl_clone_file_range(src_file, src_off, dst_file, dst_off, len));
}
#endif /* HAVE_VFS_CLONE_FILE_RANGE || HAVE_VFS_FILE_OPERATIONS_EXTEND */

Expand All @@ -202,9 +288,10 @@ zpl_ioctl_ficlone(struct file *dst_file, void *arg)
if (src_file == NULL)
return (-EBADF);

if (dst_file->f_op != src_file->f_op)
if (dst_file->f_op != src_file->f_op) {
fput(src_file);
return (-EXDEV);

}
size_t len = i_size_read(file_inode(src_file));

ssize_t ret =
Expand Down Expand Up @@ -237,8 +324,10 @@ zpl_ioctl_ficlonerange(struct file *dst_file, void __user *arg)
if (src_file == NULL)
return (-EBADF);

if (dst_file->f_op != src_file->f_op)
if (dst_file->f_op != src_file->f_op) {
fput(src_file);
return (-EXDEV);
}

size_t len = fcr.fcr_src_length;
if (len == 0)
Expand All @@ -263,10 +352,140 @@ zpl_ioctl_ficlonerange(struct file *dst_file, void __user *arg)

/* Entry point for FIDEDUPERANGE, before Linux 4.5. */
long
zpl_ioctl_fideduperange(struct file *filp, void *arg)
zpl_ioctl_fideduperange(struct file *src_file, void *arg)
{
(void) arg;
zfs_ioc_compat_dedupe_range_t dup;
int i;
if (copy_from_user(&dup, arg, sizeof (dup)))
return (-EFAULT);

/* No support for dedup yet */
return (-ENOTTY);
u16 count = dup.fdr_dest_count;
struct inode *src_inode = file_inode(src_file);

/* Nothing to duplicate to */
if (count == 0)
return (-EINVAL);

/* Check the src file */
if (!(src_file->f_mode & FMODE_READ))
return (-EINVAL);

if (S_ISDIR(src_inode->i_mode))
return (-EISDIR);

if (!S_ISREG(src_inode->i_mode))
return (-EINVAL);

if (dup.fdr_src_offset + dup.fdr_src_length > i_size_read(src_inode))
return (-EINVAL);

/* Check the dup structure */
if (dup.fdr_reserved1 || dup.fdr_reserved2)
return (-EINVAL);

/* Set output values to safe results */
for (i = 0; i < count; i++) {
dup.fdr_info[i].fdri_bytes_deduped = 0ULL;
dup.fdr_info[i].fdri_status = FILE_DEDUPE_RANGE_SAME;
}

for (i = 0; i < count; i++) {
struct fd dst_fd = fdget(dup.fdr_info[i].fdri_dest_fd);
struct file *dst_file = dst_fd.file;

if (!dst_file) {
dup.fdr_info[i].fdri_status = -EBADF;
continue;
}
if (dup.fdr_info[i].fdri_reserved) {
dup.fdr_info[i].fdri_status = -EINVAL;
goto do_fdput;
}
loff_t deduped =
zpl_remap_file_range(src_file, dup.fdr_src_offset, dst_file,
dup.fdr_info[i].fdri_dest_offset, dup.fdr_src_length,
REMAP_FILE_DEDUP);
if (deduped == -EBADE) {
dup.fdr_info[i].fdri_status = FILE_DEDUPE_RANGE_DIFFERS;
} else if (deduped < 0) {
dup.fdr_info[i].fdri_status = deduped;
} else {
dup.fdr_info[i].fdri_bytes_deduped = dup.fdr_src_length;
}
do_fdput:
fdput(dst_fd);
}
return (0);
}

int
zpl_dedupe_file_compare_locked(struct file *src_file, loff_t src_off,
struct file *dst_file, loff_t dst_off, uint64_t len, bool *is_same,
zfs_locked_range_t *src_zlr, zfs_locked_range_t *dst_zlr)
{
bool same = true;
int err = 0;
znode_t *src_znode = ITOZ(file_inode(src_file));
znode_t *dst_znode = ITOZ(file_inode(dst_file));
fstrans_cookie_t cookie;
cred_t *cr = CRED();
void *src_buf = kmem_zalloc(PAGE_SIZE, KM_SLEEP);
void *dst_buf = kmem_zalloc(PAGE_SIZE, KM_SLEEP);

while (len) {
zfs_uio_t uio;

uint64_t cmp_len = min(PAGE_SIZE - offset_in_page(src_off),
PAGE_SIZE - offset_in_page(dst_off));

cmp_len = min(cmp_len, len);

if (cmp_len == 0)
break;
struct iovec iov;
iov.iov_base = src_buf;
iov.iov_len = cmp_len;

zfs_uio_iovec_init(&uio, &iov, 1, 0, UIO_SYSSPACE, cmp_len, 0);
crhold(cr);
cookie = spl_fstrans_mark();
err = -zfs_read_locked(
src_znode, &uio, src_file->f_flags, cr, src_zlr);
spl_fstrans_unmark(cookie);
crfree(cr);
if (zfs_uio_resid(&uio) != 0)
err = -EIO;
if (err)
goto done;
iov.iov_base = dst_buf;
iov.iov_len = cmp_len;

zfs_uio_iovec_init(&uio, &iov, 1, 0, UIO_SYSSPACE, cmp_len, 0);
crhold(cr);
cookie = spl_fstrans_mark();
err = -zfs_read_locked(
dst_znode, &uio, dst_file->f_flags, cr, dst_zlr);
spl_fstrans_unmark(cookie);
crfree(cr);
if (zfs_uio_resid(&uio) != 0)
err = -EIO;

if (err)
goto done;
if (memcmp(src_buf, dst_buf, cmp_len))
same = false;

if (!same)
break;

src_off += cmp_len;
dst_off += cmp_len;
len -= cmp_len;
}

*is_same = same;
done:
kmem_free(src_buf, PAGE_SIZE);
kmem_free(dst_buf, PAGE_SIZE);
return (err);
}
Loading