diff --git a/lib/libzfs/Makefile.am b/lib/libzfs/Makefile.am index e3527ffe7058..5dd09d609b57 100644 --- a/lib/libzfs/Makefile.am +++ b/lib/libzfs/Makefile.am @@ -43,7 +43,6 @@ if BUILD_LINUX USER_C += \ os/linux/libzfs_mount_os.c \ os/linux/libzfs_pool_os.c \ - os/linux/libzfs_sendrecv_os.c \ os/linux/libzfs_util_os.c endif diff --git a/lib/libzfs/libzfs_impl.h b/lib/libzfs/libzfs_impl.h index ce7373582f0e..50e5969b651c 100644 --- a/lib/libzfs/libzfs_impl.h +++ b/lib/libzfs/libzfs_impl.h @@ -256,7 +256,6 @@ extern int libzfs_load_module(void); extern int zpool_relabel_disk(libzfs_handle_t *hdl, const char *path, const char *msg); extern int find_shares_object(differ_info_t *di); -extern void libzfs_set_pipe_max(int infd); extern void zfs_commit_proto(zfs_share_proto_t *); #ifdef __cplusplus diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c index 8b732bb222a4..1ec313cee82f 100644 --- a/lib/libzfs/libzfs_sendrecv.c +++ b/lib/libzfs/libzfs_sendrecv.c @@ -5142,13 +5142,6 @@ zfs_receive(libzfs_handle_t *hdl, const char *tosnap, nvlist_t *props, return (-2); } - /* - * It is not uncommon for gigabytes to be processed in zfs receive. - * Speculatively increase the buffer size if supported by the platform. - */ - if (S_ISFIFO(sb.st_mode)) - libzfs_set_pipe_max(infd); - if (props) { err = nvlist_lookup_string(props, "origin", &originsnap); if (err && err != ENOENT) diff --git a/lib/libzfs/os/freebsd/libzfs_compat.c b/lib/libzfs/os/freebsd/libzfs_compat.c index 4d7421df8d3b..be7e229fa16a 100644 --- a/lib/libzfs/os/freebsd/libzfs_compat.c +++ b/lib/libzfs/os/freebsd/libzfs_compat.c @@ -39,12 +39,6 @@ #define ZFS_KMOD "openzfs" #endif -void -libzfs_set_pipe_max(int infd) -{ - /* FreeBSD automatically resizes */ -} - static int execvPe(const char *name, const char *path, char * const *argv, char * const *envp) diff --git a/lib/libzfs/os/linux/libzfs_sendrecv_os.c b/lib/libzfs/os/linux/libzfs_sendrecv_os.c deleted file mode 100644 index 593c38ec62df..000000000000 --- a/lib/libzfs/os/linux/libzfs_sendrecv_os.c +++ /dev/null @@ -1,52 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - - -#include - -#include "../../libzfs_impl.h" - -#ifndef F_SETPIPE_SZ -#define F_SETPIPE_SZ (F_SETLEASE + 7) -#endif /* F_SETPIPE_SZ */ - -#ifndef F_GETPIPE_SZ -#define F_GETPIPE_SZ (F_GETLEASE + 7) -#endif /* F_GETPIPE_SZ */ - -void -libzfs_set_pipe_max(int infd) -{ - FILE *procf = fopen("/proc/sys/fs/pipe-max-size", "re"); - - if (procf != NULL) { - unsigned long max_psize; - long cur_psize; - if (fscanf(procf, "%lu", &max_psize) > 0) { - cur_psize = fcntl(infd, F_GETPIPE_SZ); - if (cur_psize > 0 && - max_psize > (unsigned long) cur_psize) - fcntl(infd, F_SETPIPE_SZ, - max_psize); - } - fclose(procf); - } -} diff --git a/lib/libzfs_core/libzfs_core.c b/lib/libzfs_core/libzfs_core.c index ce33b2153062..5c51dc1c1115 100644 --- a/lib/libzfs_core/libzfs_core.c +++ b/lib/libzfs_core/libzfs_core.c @@ -26,6 +26,7 @@ * Copyright 2017 RackTop Systems. * Copyright (c) 2017 Open-E, Inc. All Rights Reserved. * Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved. + * Copyright (c) 2021 Rich Ercolani. */ /* @@ -96,6 +97,19 @@ static int g_fd = -1; static pthread_mutex_t g_lock = PTHREAD_MUTEX_INITIALIZER; static int g_refcount; +#ifdef __linux__ +#ifndef F_SETPIPE_SZ +#define F_SETPIPE_SZ (F_SETLEASE + 7) +#endif /* F_SETPIPE_SZ */ + +#ifndef F_GETPIPE_SZ +#define F_GETPIPE_SZ (F_GETLEASE + 7) +#endif /* F_GETPIPE_SZ */ +#endif + +static unsigned long lzc_get_pipe_max(void); +static void lzc_set_pipe_max(int infd); + #ifdef ZFS_DEBUG static zfs_ioc_t fail_ioc_cmd = ZFS_IOC_LAST; static zfs_errno_t fail_ioc_err; @@ -645,6 +659,92 @@ lzc_send_resume(const char *snapname, const char *from, int fd, resumeoff, NULL)); } +static unsigned long +lzc_get_pipe_max() +{ + /* FreeBSD automatically grows to 64k */ + unsigned long max_psize = 65536; +#ifdef __linux__ + FILE *procf = fopen("/proc/sys/fs/pipe-max-size", "re"); + + if (procf != NULL) { + if (fscanf(procf, "%lu", &max_psize) <= 0) { + max_psize = max_psize; + } + fclose(procf); + } +#endif + return (max_psize); +} + +static void +lzc_set_pipe_max(int infd) +{ +#ifdef __linux__ + unsigned long max_psize = lzc_get_pipe_max(); + long cur_psize; + cur_psize = fcntl(infd, F_GETPIPE_SZ); + if (cur_psize > 0 && + max_psize > (unsigned long) cur_psize) + fcntl(infd, F_SETPIPE_SZ, + max_psize); +#endif +} + + +struct sendargs { + int ioctlfd; + int inputfd; + int outputfd; +}; +typedef struct sendargs sendargs_t; + +static void * +do_send_output(void *voidargs) +{ + sendargs_t *args = (sendargs_t *)voidargs; + sigset_t sigs; + int buflen = lzc_get_pipe_max(); + + /* + * See the comment above the close() call for why + * we can't just die from SIGPIPE. + */ + sigemptyset(&sigs); + sigaddset(&sigs, SIGPIPE); + pthread_sigmask(SIG_BLOCK, &sigs, NULL); + + + int err = 1; +#ifdef __linux__ + while (err > 0) { + err = splice(args->inputfd, NULL, args->outputfd, NULL, buflen, + SPLICE_F_MORE); + } +#else + void* buf = calloc(1, buflen); + while (err > 0) { + err = read(args->inputfd, buf, buflen); + if (err <= 0) { + break; + } + err = write(args->outputfd, buf, err); + } + free(buf); +#endif + if (err < 0) { + err = errno; + } + /* + * If we just return here, the other thread often blocks + * indefinitely on the ioctl completing, which won't happen + * because we stopped consuming the data. So we close the pipe + * here, and the other thread exits in a timely fashion. + */ + close(args->inputfd); + return ((void *)(uintptr_t)err); +} + /* * snapname: The name of the "tosnap", or the snapshot whose contents we are * sending. @@ -664,9 +764,18 @@ lzc_send_resume_redacted(const char *snapname, const char *from, int fd, { nvlist_t *args; int err; + int pipefd[2]; + pthread_t mythread; + sendargs_t sendargs; + int threadstatus; + + + err = pipe2(pipefd, O_CLOEXEC); + + lzc_set_pipe_max(pipefd[0]); args = fnvlist_alloc(); - fnvlist_add_int32(args, "fd", fd); + fnvlist_add_int32(args, "fd", pipefd[1]); if (from != NULL) fnvlist_add_string(args, "fromsnap", from); if (flags & LZC_SEND_FLAG_LARGE_BLOCK) @@ -686,8 +795,32 @@ lzc_send_resume_redacted(const char *snapname, const char *from, int fd, if (redactbook != NULL) fnvlist_add_string(args, "redactbook", redactbook); + sendargs.inputfd = pipefd[0]; + sendargs.outputfd = fd; + sendargs.ioctlfd = pipefd[1]; + + pthread_create(&mythread, NULL, do_send_output, (void *)&sendargs); + err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL); + + close(pipefd[1]); + + pthread_join(mythread, (void *)&threadstatus); + nvlist_free(args); + + + if (threadstatus != 0) { + err = threadstatus; + /* + * if we don't set errno here, there are some edge cases + * where we wind up dying unexpectedly with + * "internal error: [normal warning msg]: Success" + */ + errno = threadstatus; + } + + return (err); } @@ -792,6 +925,7 @@ recv_impl(const char *snapname, nvlist_t *recvdprops, nvlist_t *localprops, char *atp; int error; boolean_t payload = B_FALSE; + struct stat sb; ASSERT3S(g_refcount, >, 0); VERIFY3S(g_fd, !=, -1); @@ -811,6 +945,21 @@ recv_impl(const char *snapname, nvlist_t *recvdprops, nvlist_t *localprops, *slashp = '\0'; } + /* + * The only way fstat can fail is if we do not have a valid file + * descriptor. + */ + if (fstat(input_fd, &sb) == -1) { + return (-errno); + } + + /* + * It is not uncommon for gigabytes to be processed in zfs receive. + * Speculatively increase the buffer size if supported by the platform. + */ + if (S_ISFIFO(sb.st_mode)) + lzc_set_pipe_max(input_fd); + /* * The begin_record is normally a non-byteswapped BEGIN record. * For resumable streams it may be set to any non-byteswapped