diff --git a/META b/META index 76ca22cbae00..25cd947d6182 100644 --- a/META +++ b/META @@ -7,4 +7,4 @@ Release-Tags: relext License: CDDL Author: OpenZFS Linux-Maximum: 6.10 -Linux-Minimum: 3.10 +Linux-Minimum: 4.18 diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index 349c208c521b..aa7da68aa683 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -522,7 +522,7 @@ get_usage(zpool_help_t idx) return (gettext("\tstatus [--power] [-j [--json-int, " "--json-flat-vdevs, ...\n" "\t --json-pool-key-guid]] [-c [script1,script2,...]] " - "[-DegiLpPstvx] ...\n" + "[-dDegiLpPstvx] ...\n" "\t [-T d|u] [pool] [interval [count]]\n")); case HELP_UPGRADE: return (gettext("\tupgrade\n" @@ -2602,6 +2602,7 @@ typedef struct status_cbdata { boolean_t cb_print_unhealthy; boolean_t cb_print_status; boolean_t cb_print_slow_ios; + boolean_t cb_print_dio_verify; boolean_t cb_print_vdev_init; boolean_t cb_print_vdev_trim; vdev_cmd_data_list_t *vcdl; @@ -2879,7 +2880,7 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name, uint_t c, i, vsc, children; pool_scan_stat_t *ps = NULL; vdev_stat_t *vs; - char rbuf[6], wbuf[6], cbuf[6]; + char rbuf[6], wbuf[6], cbuf[6], dbuf[6]; char *vname; uint64_t notpresent; spare_cbdata_t spare_cb; @@ -2997,6 +2998,17 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name, printf(" %5s", "-"); } } + if (VDEV_STAT_VALID(vs_dio_verify_errors, vsc) && + cb->cb_print_dio_verify) { + zfs_nicenum(vs->vs_dio_verify_errors, dbuf, + sizeof (dbuf)); + + if (cb->cb_literal) + printf(" %5llu", + (u_longlong_t)vs->vs_dio_verify_errors); + else + printf(" %5s", dbuf); + } } if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, @@ -10873,6 +10885,10 @@ status_callback(zpool_handle_t *zhp, void *data) printf_color(ANSI_BOLD, " %5s", gettext("POWER")); } + if (cbp->cb_print_dio_verify) { + printf_color(ANSI_BOLD, " %5s", gettext("DIO")); + } + if (cbp->vcdl != NULL) print_cmd_columns(cbp->vcdl, 0); @@ -10921,10 +10937,11 @@ status_callback(zpool_handle_t *zhp, void *data) } /* - * zpool status [-c [script1,script2,...]] [-DegiLpPstvx] [--power] [-T d|u] ... - * [pool] [interval [count]] + * zpool status [-c [script1,script2,...]] [-dDegiLpPstvx] [--power] ... + * [-T d|u] [pool] [interval [count]] * * -c CMD For each vdev, run command CMD + * -d Display Direct I/O write verify errors * -D Display dedup status (undocumented) * -e Display only unhealthy vdevs * -g Display guid for individual vdev name. @@ -10967,7 +10984,7 @@ zpool_do_status(int argc, char **argv) }; /* check options */ - while ((c = getopt_long(argc, argv, "c:jDegiLpPstT:vx", long_options, + while ((c = getopt_long(argc, argv, "c:jdDegiLpPstT:vx", long_options, NULL)) != -1) { switch (c) { case 'c': @@ -10994,6 +11011,9 @@ zpool_do_status(int argc, char **argv) } cmd = optarg; break; + case 'd': + cb.cb_print_dio_verify = B_TRUE; + break; case 'D': if (++cb.cb_dedup_stats > 2) cb.cb_dedup_stats = 2; diff --git a/cmd/ztest.c b/cmd/ztest.c index ce031632e758..8ad576627635 100644 --- a/cmd/ztest.c +++ b/cmd/ztest.c @@ -2262,6 +2262,13 @@ ztest_replay_write(void *arg1, void *arg2, boolean_t byteswap) if (ztest_random(4) != 0) { int prefetch = ztest_random(2) ? DMU_READ_PREFETCH : DMU_READ_NO_PREFETCH; + + /* + * We will randomly set when to do O_DIRECT on a read. + */ + if (ztest_random(4) == 0) + prefetch |= DMU_DIRECTIO; + ztest_block_tag_t rbt; VERIFY(dmu_read(os, lr->lr_foid, offset, @@ -2813,6 +2820,13 @@ ztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset) enum ztest_io_type io_type; uint64_t blocksize; void *data; + uint32_t dmu_read_flags = DMU_READ_NO_PREFETCH; + + /* + * We will randomly set when to do O_DIRECT on a read. + */ + if (ztest_random(4) == 0) + dmu_read_flags |= DMU_DIRECTIO; VERIFY0(dmu_object_info(zd->zd_os, object, &doi)); blocksize = doi.doi_data_block_size; @@ -2878,7 +2892,7 @@ ztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset) (void) pthread_rwlock_unlock(&ztest_name_lock); VERIFY0(dmu_read(zd->zd_os, object, offset, blocksize, data, - DMU_READ_NO_PREFETCH)); + dmu_read_flags)); (void) ztest_write(zd, object, offset, blocksize, data); break; @@ -5045,6 +5059,13 @@ ztest_dmu_read_write(ztest_ds_t *zd, uint64_t id) uint64_t stride = 123456789ULL; uint64_t width = 40; int free_percent = 5; + uint32_t dmu_read_flags = DMU_READ_PREFETCH; + + /* + * We will randomly set when to do O_DIRECT on a read. + */ + if (ztest_random(4) == 0) + dmu_read_flags |= DMU_DIRECTIO; /* * This test uses two objects, packobj and bigobj, that are always @@ -5123,10 +5144,10 @@ ztest_dmu_read_write(ztest_ds_t *zd, uint64_t id) * Read the current contents of our objects. */ error = dmu_read(os, packobj, packoff, packsize, packbuf, - DMU_READ_PREFETCH); + dmu_read_flags); ASSERT0(error); error = dmu_read(os, bigobj, bigoff, bigsize, bigbuf, - DMU_READ_PREFETCH); + dmu_read_flags); ASSERT0(error); /* @@ -5244,9 +5265,9 @@ ztest_dmu_read_write(ztest_ds_t *zd, uint64_t id) void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL); VERIFY0(dmu_read(os, packobj, packoff, - packsize, packcheck, DMU_READ_PREFETCH)); + packsize, packcheck, dmu_read_flags)); VERIFY0(dmu_read(os, bigobj, bigoff, - bigsize, bigcheck, DMU_READ_PREFETCH)); + bigsize, bigcheck, dmu_read_flags)); ASSERT0(memcmp(packbuf, packcheck, packsize)); ASSERT0(memcmp(bigbuf, bigcheck, bigsize)); @@ -5336,6 +5357,13 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id) dmu_buf_t *bonus_db; arc_buf_t **bigbuf_arcbufs; dmu_object_info_t doi; + uint32_t dmu_read_flags = DMU_READ_PREFETCH; + + /* + * We will randomly set when to do O_DIRECT on a read. + */ + if (ztest_random(4) == 0) + dmu_read_flags |= DMU_DIRECTIO; size = sizeof (ztest_od_t) * OD_ARRAY_SIZE; od = umem_alloc(size, UMEM_NOFAIL); @@ -5466,10 +5494,10 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id) */ if (i != 0 || ztest_random(2) != 0) { error = dmu_read(os, packobj, packoff, - packsize, packbuf, DMU_READ_PREFETCH); + packsize, packbuf, dmu_read_flags); ASSERT0(error); error = dmu_read(os, bigobj, bigoff, bigsize, - bigbuf, DMU_READ_PREFETCH); + bigbuf, dmu_read_flags); ASSERT0(error); } compare_and_update_pbbufs(s, packbuf, bigbuf, bigsize, @@ -5529,9 +5557,9 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id) void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL); VERIFY0(dmu_read(os, packobj, packoff, - packsize, packcheck, DMU_READ_PREFETCH)); + packsize, packcheck, dmu_read_flags)); VERIFY0(dmu_read(os, bigobj, bigoff, - bigsize, bigcheck, DMU_READ_PREFETCH)); + bigsize, bigcheck, dmu_read_flags)); ASSERT0(memcmp(packbuf, packcheck, packsize)); ASSERT0(memcmp(bigbuf, bigcheck, bigsize)); diff --git a/config/intlmacosx.m4 b/config/intlmacosx.m4 deleted file mode 100644 index 30e6f50e0ac6..000000000000 --- a/config/intlmacosx.m4 +++ /dev/null @@ -1,72 +0,0 @@ -# intlmacosx.m4 serial 6 (gettext-0.20) -dnl Copyright (C) 2004-2014, 2016, 2019 Free Software Foundation, Inc. -dnl This file is free software; the Free Software Foundation -dnl gives unlimited permission to copy and/or distribute it, -dnl with or without modifications, as long as this notice is preserved. -dnl -dnl This file can be used in projects which are not available under -dnl the GNU General Public License or the GNU Library General Public -dnl License but which still want to provide support for the GNU gettext -dnl functionality. -dnl Please note that the actual code of the GNU gettext library is covered -dnl by the GNU Library General Public License, and the rest of the GNU -dnl gettext package is covered by the GNU General Public License. -dnl They are *not* in the public domain. - -dnl Checks for special options needed on Mac OS X. -dnl Defines INTL_MACOSX_LIBS. -AC_DEFUN([gt_INTL_MACOSX], -[ - dnl Check for API introduced in Mac OS X 10.4. - AC_CACHE_CHECK([for CFPreferencesCopyAppValue], - [gt_cv_func_CFPreferencesCopyAppValue], - [gt_save_LIBS="$LIBS" - LIBS="$LIBS -Wl,-framework -Wl,CoreFoundation" - AC_LINK_IFELSE( - [AC_LANG_PROGRAM( - [[#include ]], - [[CFPreferencesCopyAppValue(NULL, NULL)]])], - [gt_cv_func_CFPreferencesCopyAppValue=yes], - [gt_cv_func_CFPreferencesCopyAppValue=no]) - LIBS="$gt_save_LIBS"]) - if test $gt_cv_func_CFPreferencesCopyAppValue = yes; then - AC_DEFINE([HAVE_CFPREFERENCESCOPYAPPVALUE], [1], - [Define to 1 if you have the Mac OS X function CFPreferencesCopyAppValue in the CoreFoundation framework.]) - fi - dnl Check for API introduced in Mac OS X 10.5. - AC_CACHE_CHECK([for CFLocaleCopyCurrent], [gt_cv_func_CFLocaleCopyCurrent], - [gt_save_LIBS="$LIBS" - LIBS="$LIBS -Wl,-framework -Wl,CoreFoundation" - AC_LINK_IFELSE( - [AC_LANG_PROGRAM( - [[#include ]], - [[CFLocaleCopyCurrent();]])], - [gt_cv_func_CFLocaleCopyCurrent=yes], - [gt_cv_func_CFLocaleCopyCurrent=no]) - LIBS="$gt_save_LIBS"]) - if test $gt_cv_func_CFLocaleCopyCurrent = yes; then - AC_DEFINE([HAVE_CFLOCALECOPYCURRENT], [1], - [Define to 1 if you have the Mac OS X function CFLocaleCopyCurrent in the CoreFoundation framework.]) - fi - AC_CACHE_CHECK([for CFLocaleCopyPreferredLanguages], [gt_cv_func_CFLocaleCopyPreferredLanguages], - [gt_save_LIBS="$LIBS" - LIBS="$LIBS -Wl,-framework -Wl,CoreFoundation" - AC_LINK_IFELSE( - [AC_LANG_PROGRAM( - [[#include ]], - [[CFLocaleCopyPreferredLanguages();]])], - [gt_cv_func_CFLocaleCopyPreferredLanguages=yes], - [gt_cv_func_CFLocaleCopyPreferredLanguages=no]) - LIBS="$gt_save_LIBS"]) - if test $gt_cv_func_CFLocaleCopyPreferredLanguages = yes; then - AC_DEFINE([HAVE_CFLOCALECOPYPREFERREDLANGUAGES], [1], - [Define to 1 if you have the Mac OS X function CFLocaleCopyPreferredLanguages in the CoreFoundation framework.]) - fi - INTL_MACOSX_LIBS= - if test $gt_cv_func_CFPreferencesCopyAppValue = yes \ - || test $gt_cv_func_CFLocaleCopyCurrent = yes \ - || test $gt_cv_func_CFLocaleCopyPreferredLanguages = yes; then - INTL_MACOSX_LIBS="-Wl,-framework -Wl,CoreFoundation" - fi - AC_SUBST([INTL_MACOSX_LIBS]) -]) diff --git a/config/kernel-acl.m4 b/config/kernel-acl.m4 index 3ae5dc6b6dbc..3dbd97948189 100644 --- a/config/kernel-acl.m4 +++ b/config/kernel-acl.m4 @@ -1,112 +1,3 @@ -dnl # -dnl # Check if posix_acl_release can be used from a ZFS_META_LICENSED -dnl # module. The is_owner_or_cap macro was replaced by -dnl # inode_owner_or_capable -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_POSIX_ACL_RELEASE], [ - ZFS_LINUX_TEST_SRC([posix_acl_release], [ - #include - #include - #include - ], [ - struct posix_acl *tmp = posix_acl_alloc(1, 0); - posix_acl_release(tmp); - ], [], [ZFS_META_LICENSE]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_POSIX_ACL_RELEASE], [ - AC_MSG_CHECKING([whether posix_acl_release() is available]) - ZFS_LINUX_TEST_RESULT([posix_acl_release], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_POSIX_ACL_RELEASE, 1, - [posix_acl_release() is available]) - - AC_MSG_CHECKING([whether posix_acl_release() is GPL-only]) - ZFS_LINUX_TEST_RESULT([posix_acl_release_license], [ - AC_MSG_RESULT(no) - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_POSIX_ACL_RELEASE_GPL_ONLY, 1, - [posix_acl_release() is GPL-only]) - ]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # 3.14 API change, -dnl # set_cached_acl() and forget_cached_acl() changed from inline to -dnl # EXPORT_SYMBOL. In the former case, they may not be usable because of -dnl # posix_acl_release. In the latter case, we can always use them. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_SET_CACHED_ACL_USABLE], [ - ZFS_LINUX_TEST_SRC([set_cached_acl], [ - #include - #include - #include - ], [ - struct inode *ip = NULL; - struct posix_acl *acl = posix_acl_alloc(1, 0); - set_cached_acl(ip, ACL_TYPE_ACCESS, acl); - forget_cached_acl(ip, ACL_TYPE_ACCESS); - ], [], [ZFS_META_LICENSE]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_SET_CACHED_ACL_USABLE], [ - AC_MSG_CHECKING([whether set_cached_acl() is usable]) - ZFS_LINUX_TEST_RESULT([set_cached_acl_license], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_SET_CACHED_ACL_USABLE, 1, - [set_cached_acl() is usable]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # 3.1 API change, -dnl # posix_acl_chmod() was added as the preferred interface. -dnl # -dnl # 3.14 API change, -dnl # posix_acl_chmod() was changed to __posix_acl_chmod() -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_POSIX_ACL_CHMOD], [ - ZFS_LINUX_TEST_SRC([posix_acl_chmod], [ - #include - #include - ],[ - posix_acl_chmod(NULL, 0, 0) - ]) - - ZFS_LINUX_TEST_SRC([__posix_acl_chmod], [ - #include - #include - ],[ - __posix_acl_chmod(NULL, 0, 0) - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_POSIX_ACL_CHMOD], [ - AC_MSG_CHECKING([whether __posix_acl_chmod exists]) - ZFS_LINUX_TEST_RESULT([__posix_acl_chmod], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE___POSIX_ACL_CHMOD, 1, - [__posix_acl_chmod() exists]) - ],[ - AC_MSG_RESULT(no) - - AC_MSG_CHECKING([whether posix_acl_chmod exists]) - ZFS_LINUX_TEST_RESULT([posix_acl_chmod], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_POSIX_ACL_CHMOD, 1, - [posix_acl_chmod() exists]) - ],[ - ZFS_LINUX_TEST_ERROR([posix_acl_chmod()]) - ]) - ]) -]) - dnl # dnl # 3.1 API change, dnl # posix_acl_equiv_mode now wants an umode_t instead of a mode_t @@ -130,34 +21,6 @@ AC_DEFUN([ZFS_AC_KERNEL_POSIX_ACL_EQUIV_MODE_WANTS_UMODE_T], [ ]) ]) -dnl # -dnl # 4.8 API change, -dnl # The function posix_acl_valid now must be passed a namespace. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_POSIX_ACL_VALID_WITH_NS], [ - ZFS_LINUX_TEST_SRC([posix_acl_valid_with_ns], [ - #include - #include - ],[ - struct user_namespace *user_ns = NULL; - const struct posix_acl *acl = NULL; - int error; - - error = posix_acl_valid(user_ns, acl); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_POSIX_ACL_VALID_WITH_NS], [ - AC_MSG_CHECKING([whether posix_acl_valid() wants user namespace]) - ZFS_LINUX_TEST_RESULT([posix_acl_valid_with_ns], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_POSIX_ACL_VALID_WITH_NS, 1, - [posix_acl_valid() wants user namespace]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - dnl # dnl # 3.1 API change, dnl # Check if inode_operations contains the function get_acl @@ -226,9 +89,6 @@ AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_GET_ACL], [ ]) ]) -dnl # -dnl # 3.14 API change, -dnl # Check if inode_operations contains the function set_acl dnl # dnl # 5.12 API change, dnl # set_acl() added a user_namespace* parameter first @@ -290,106 +150,35 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_SET_ACL], [ ]) AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_SET_ACL], [ - AC_MSG_CHECKING([whether iops->set_acl() exists]) + AC_MSG_CHECKING([whether iops->set_acl() with 4 args exists]) ZFS_LINUX_TEST_RESULT([inode_operations_set_acl_userns], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_SET_ACL, 1, [iops->set_acl() exists]) AC_DEFINE(HAVE_SET_ACL_USERNS, 1, [iops->set_acl() takes 4 args]) ],[ ZFS_LINUX_TEST_RESULT([inode_operations_set_acl_mnt_idmap_dentry], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_SET_ACL, 1, [iops->set_acl() exists]) AC_DEFINE(HAVE_SET_ACL_IDMAP_DENTRY, 1, [iops->set_acl() takes 4 args, arg1 is struct mnt_idmap *]) ],[ ZFS_LINUX_TEST_RESULT([inode_operations_set_acl_userns_dentry], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_SET_ACL, 1, [iops->set_acl() exists]) AC_DEFINE(HAVE_SET_ACL_USERNS_DENTRY_ARG2, 1, [iops->set_acl() takes 4 args, arg2 is struct dentry *]) ],[ - ZFS_LINUX_TEST_RESULT([inode_operations_set_acl], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_SET_ACL, 1, [iops->set_acl() exists, takes 3 args]) - ],[ - ZFS_LINUX_REQUIRE_API([i_op->set_acl()], [3.14]) - ]) + AC_MSG_RESULT(no) ]) ]) ]) ]) -dnl # -dnl # 4.7 API change, -dnl # The kernel get_acl will now check cache before calling i_op->get_acl and -dnl # do set_cached_acl after that, so i_op->get_acl don't need to do that -dnl # anymore. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_GET_ACL_HANDLE_CACHE], [ - ZFS_LINUX_TEST_SRC([get_acl_handle_cache], [ - #include - ],[ - void *sentinel __attribute__ ((unused)) = - uncached_acl_sentinel(NULL); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_GET_ACL_HANDLE_CACHE], [ - AC_MSG_CHECKING([whether uncached_acl_sentinel() exists]) - ZFS_LINUX_TEST_RESULT([get_acl_handle_cache], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_KERNEL_GET_ACL_HANDLE_CACHE, 1, - [uncached_acl_sentinel() exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # 4.16 kernel: check if struct posix_acl acl.a_refcount is a refcount_t. -dnl # It's an atomic_t on older kernels. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_ACL_HAS_REFCOUNT], [ - ZFS_LINUX_TEST_SRC([acl_refcount], [ - #include - #include - #include - ],[ - struct posix_acl acl; - refcount_t *r __attribute__ ((unused)) = &acl.a_refcount; - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_ACL_HAS_REFCOUNT], [ - AC_MSG_CHECKING([whether posix_acl has refcount_t]) - ZFS_LINUX_TEST_RESULT([acl_refcount], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_ACL_REFCOUNT, 1, [posix_acl has refcount_t]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - AC_DEFUN([ZFS_AC_KERNEL_SRC_ACL], [ - ZFS_AC_KERNEL_SRC_POSIX_ACL_RELEASE - ZFS_AC_KERNEL_SRC_SET_CACHED_ACL_USABLE - ZFS_AC_KERNEL_SRC_POSIX_ACL_CHMOD ZFS_AC_KERNEL_SRC_POSIX_ACL_EQUIV_MODE_WANTS_UMODE_T - ZFS_AC_KERNEL_SRC_POSIX_ACL_VALID_WITH_NS ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_GET_ACL ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_SET_ACL - ZFS_AC_KERNEL_SRC_GET_ACL_HANDLE_CACHE - ZFS_AC_KERNEL_SRC_ACL_HAS_REFCOUNT ]) AC_DEFUN([ZFS_AC_KERNEL_ACL], [ - ZFS_AC_KERNEL_POSIX_ACL_RELEASE - ZFS_AC_KERNEL_SET_CACHED_ACL_USABLE - ZFS_AC_KERNEL_POSIX_ACL_CHMOD ZFS_AC_KERNEL_POSIX_ACL_EQUIV_MODE_WANTS_UMODE_T - ZFS_AC_KERNEL_POSIX_ACL_VALID_WITH_NS ZFS_AC_KERNEL_INODE_OPERATIONS_GET_ACL ZFS_AC_KERNEL_INODE_OPERATIONS_SET_ACL - ZFS_AC_KERNEL_GET_ACL_HANDLE_CACHE - ZFS_AC_KERNEL_ACL_HAS_REFCOUNT ]) diff --git a/config/kernel-aio-fsync.m4 b/config/kernel-aio-fsync.m4 deleted file mode 100644 index b4dbf29ba781..000000000000 --- a/config/kernel-aio-fsync.m4 +++ /dev/null @@ -1,23 +0,0 @@ -dnl # -dnl # Linux 4.9-rc5+ ABI, removal of the .aio_fsync field -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_AIO_FSYNC], [ - ZFS_LINUX_TEST_SRC([aio_fsync], [ - #include - - static const struct file_operations - fops __attribute__ ((unused)) = { - .aio_fsync = NULL, - }; - ],[]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_AIO_FSYNC], [ - AC_MSG_CHECKING([whether fops->aio_fsync() exists]) - ZFS_LINUX_TEST_RESULT([aio_fsync], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_FILE_AIO_FSYNC, 1, [fops->aio_fsync() exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-bdi.m4 b/config/kernel-bdi.m4 deleted file mode 100644 index 9758863a9cbf..000000000000 --- a/config/kernel-bdi.m4 +++ /dev/null @@ -1,81 +0,0 @@ -dnl # -dnl # Check available BDI interfaces. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_BDI], [ - ZFS_LINUX_TEST_SRC([super_setup_bdi_name], [ - #include - struct super_block sb; - ], [ - char *name = "bdi"; - atomic_long_t zfs_bdi_seq; - int error __attribute__((unused)); - atomic_long_set(&zfs_bdi_seq, 0); - error = - super_setup_bdi_name(&sb, "%.28s-%ld", name, - atomic_long_inc_return(&zfs_bdi_seq)); - ]) - - ZFS_LINUX_TEST_SRC([bdi_setup_and_register], [ - #include - struct backing_dev_info bdi; - ], [ - char *name = "bdi"; - int error __attribute__((unused)) = - bdi_setup_and_register(&bdi, name); - ]) - - ZFS_LINUX_TEST_SRC([bdi_setup_and_register_3args], [ - #include - struct backing_dev_info bdi; - ], [ - char *name = "bdi"; - unsigned int cap = BDI_CAP_MAP_COPY; - int error __attribute__((unused)) = - bdi_setup_and_register(&bdi, name, cap); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_BDI], [ - dnl # - dnl # 4.12, super_setup_bdi_name() introduced. - dnl # - AC_MSG_CHECKING([whether super_setup_bdi_name() exists]) - ZFS_LINUX_TEST_RESULT_SYMBOL([super_setup_bdi_name], - [super_setup_bdi_name], [fs/super.c], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_SUPER_SETUP_BDI_NAME, 1, - [super_setup_bdi_name() exits]) - ], [ - AC_MSG_RESULT(no) - - dnl # - dnl # 4.0 - 4.11, bdi_setup_and_register() takes 2 arguments. - dnl # - AC_MSG_CHECKING( - [whether bdi_setup_and_register() wants 2 args]) - ZFS_LINUX_TEST_RESULT_SYMBOL([bdi_setup_and_register], - [bdi_setup_and_register], [mm/backing-dev.c], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_2ARGS_BDI_SETUP_AND_REGISTER, 1, - [bdi_setup_and_register() wants 2 args]) - ], [ - AC_MSG_RESULT(no) - - dnl # - dnl # 2.6.34 - 3.19, bdi_setup_and_register() - dnl # takes 3 arguments. - dnl # - AC_MSG_CHECKING( - [whether bdi_setup_and_register() wants 3 args]) - ZFS_LINUX_TEST_RESULT_SYMBOL( - [bdi_setup_and_register_3args], - [bdi_setup_and_register], [mm/backing-dev.c], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_3ARGS_BDI_SETUP_AND_REGISTER, 1, - [bdi_setup_and_register() wants 3 args]) - ], [ - ZFS_LINUX_TEST_ERROR([bdi_setup]) - ]) - ]) - ]) -]) diff --git a/config/kernel-bio.m4 b/config/kernel-bio.m4 index b22c1a3de7e1..8afc9c59ddad 100644 --- a/config/kernel-bio.m4 +++ b/config/kernel-bio.m4 @@ -1,81 +1,3 @@ -dnl # -dnl # 2.6.36 API change, -dnl # REQ_FAILFAST_{DEV|TRANSPORT|DRIVER} -dnl # REQ_DISCARD -dnl # REQ_FLUSH -dnl # -dnl # 4.8 - 4.9 API, -dnl # REQ_FLUSH was renamed to REQ_PREFLUSH -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_REQ], [ - ZFS_LINUX_TEST_SRC([req_failfast_mask], [ - #include - ],[ - int flags __attribute__ ((unused)); - flags = REQ_FAILFAST_MASK; - ]) - - ZFS_LINUX_TEST_SRC([req_discard], [ - #include - ],[ - int flags __attribute__ ((unused)); - flags = REQ_DISCARD; - ]) - - ZFS_LINUX_TEST_SRC([req_flush], [ - #include - ],[ - int flags __attribute__ ((unused)); - flags = REQ_FLUSH; - ]) - - ZFS_LINUX_TEST_SRC([req_preflush], [ - #include - ],[ - int flags __attribute__ ((unused)); - flags = REQ_PREFLUSH; - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_BIO_REQ_FAILFAST_MASK], [ - AC_MSG_CHECKING([whether REQ_FAILFAST_MASK is defined]) - ZFS_LINUX_TEST_RESULT([req_failfast_mask], [ - AC_MSG_RESULT(yes) - ],[ - ZFS_LINUX_TEST_ERROR([REQ_FAILFAST_MASK]) - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_BIO_REQ_DISCARD], [ - AC_MSG_CHECKING([whether REQ_DISCARD is defined]) - ZFS_LINUX_TEST_RESULT([req_discard], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_REQ_DISCARD, 1, [REQ_DISCARD is defined]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_BIO_REQ_FLUSH], [ - AC_MSG_CHECKING([whether REQ_FLUSH is defined]) - ZFS_LINUX_TEST_RESULT([req_flush], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_REQ_FLUSH, 1, [REQ_FLUSH is defined]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_BIO_REQ_PREFLUSH], [ - AC_MSG_CHECKING([whether REQ_PREFLUSH is defined]) - ZFS_LINUX_TEST_RESULT([req_preflush], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_REQ_PREFLUSH, 1, [REQ_PREFLUSH is defined]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - dnl # dnl # Linux 4.8 API, dnl # @@ -84,31 +6,6 @@ dnl # checking the bio->bi_rw flags. The following checks are used to dnl # detect if a specific operation is supported. dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_OPS], [ - ZFS_LINUX_TEST_SRC([req_op_discard], [ - #include - ],[ - int op __attribute__ ((unused)) = REQ_OP_DISCARD; - ]) - - ZFS_LINUX_TEST_SRC([req_op_secure_erase], [ - #include - ],[ - int op __attribute__ ((unused)) = REQ_OP_SECURE_ERASE; - ]) - - ZFS_LINUX_TEST_SRC([req_op_flush], [ - #include - ],[ - int op __attribute__ ((unused)) = REQ_OP_FLUSH; - ]) - - ZFS_LINUX_TEST_SRC([bio_bi_opf], [ - #include - ],[ - struct bio bio __attribute__ ((unused)); - bio.bi_opf = 0; - ]) - ZFS_LINUX_TEST_SRC([bio_set_op_attrs], [ #include ],[ @@ -117,47 +14,6 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_OPS], [ ]) ]) -AC_DEFUN([ZFS_AC_KERNEL_BIO_REQ_OP_DISCARD], [ - AC_MSG_CHECKING([whether REQ_OP_DISCARD is defined]) - ZFS_LINUX_TEST_RESULT([req_op_discard], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_REQ_OP_DISCARD, 1, [REQ_OP_DISCARD is defined]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_BIO_REQ_OP_SECURE_ERASE], [ - AC_MSG_CHECKING([whether REQ_OP_SECURE_ERASE is defined]) - ZFS_LINUX_TEST_RESULT([req_op_secure_erase], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_REQ_OP_SECURE_ERASE, 1, - [REQ_OP_SECURE_ERASE is defined]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_BIO_REQ_OP_FLUSH], [ - AC_MSG_CHECKING([whether REQ_OP_FLUSH is defined]) - ZFS_LINUX_TEST_RESULT([req_op_flush], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_REQ_OP_FLUSH, 1, [REQ_OP_FLUSH is defined]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_BIO_BI_OPF], [ - AC_MSG_CHECKING([whether bio->bi_opf is defined]) - ZFS_LINUX_TEST_RESULT([bio_bi_opf], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BIO_BI_OPF, 1, [bio->bi_opf is defined]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - AC_DEFUN([ZFS_AC_KERNEL_BIO_SET_OP_ATTRS], [ AC_MSG_CHECKING([whether bio_set_op_attrs is available]) ZFS_LINUX_TEST_RESULT([bio_set_op_attrs], [ @@ -210,127 +66,20 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_SET_DEV_MACRO], [ ]) AC_DEFUN([ZFS_AC_KERNEL_BIO_SET_DEV], [ - AC_MSG_CHECKING([whether bio_set_dev() is available]) - ZFS_LINUX_TEST_RESULT([bio_set_dev], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BIO_SET_DEV, 1, [bio_set_dev() is available]) - - AC_MSG_CHECKING([whether bio_set_dev() is GPL-only]) - ZFS_LINUX_TEST_RESULT([bio_set_dev_license], [ - AC_MSG_RESULT(no) - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BIO_SET_DEV_GPL_ONLY, 1, - [bio_set_dev() GPL-only]) - ]) - - AC_MSG_CHECKING([whether bio_set_dev() is a macro]) - ZFS_LINUX_TEST_RESULT([bio_set_dev_macro], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BIO_SET_DEV_MACRO, 1, - [bio_set_dev() is a macro]) - ],[ - AC_MSG_RESULT(no) - ]) - ],[ + AC_MSG_CHECKING([whether bio_set_dev() is GPL-only]) + ZFS_LINUX_TEST_RESULT([bio_set_dev_license], [ AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # 4.3 API change -dnl # Error argument dropped from bio_endio in favor of newly introduced -dnl # bio->bi_error. This also replaces bio->bi_flags value BIO_UPTODATE. -dnl # Introduced by torvalds/linux@4246a0b63bd8f56a1469b12eafeb875b1041a451 -dnl # ("block: add a bi_error field to struct bio"). -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_END_IO_T_ARGS], [ - ZFS_LINUX_TEST_SRC([bio_end_io_t_args], [ - #include - static void wanted_end_io(struct bio *bio) { return; } - bio_end_io_t *end_io __attribute__ ((unused)) = wanted_end_io; - ], []) -]) - -AC_DEFUN([ZFS_AC_KERNEL_BIO_END_IO_T_ARGS], [ - AC_MSG_CHECKING([whether bio_end_io_t wants 1 arg]) - ZFS_LINUX_TEST_RESULT([bio_end_io_t_args], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_1ARG_BIO_END_IO_T, 1, - [bio_end_io_t wants 1 arg]) - ], [ - AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # 4.13 API change -dnl # The bio->bi_error field was replaced with bio->bi_status which is an -dnl # enum which describes all possible error types. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_BI_STATUS], [ - ZFS_LINUX_TEST_SRC([bio_bi_status], [ - #include - ], [ - struct bio bio __attribute__ ((unused)); - blk_status_t status __attribute__ ((unused)) = BLK_STS_OK; - bio.bi_status = status; - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_BIO_BI_STATUS], [ - AC_MSG_CHECKING([whether bio->bi_status exists]) - ZFS_LINUX_TEST_RESULT([bio_bi_status], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BIO_BI_STATUS, 1, [bio->bi_status exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # 3.14 API change, -dnl # Immutable biovecs. A number of fields of struct bio are moved to -dnl # struct bvec_iter. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_BVEC_ITER], [ - ZFS_LINUX_TEST_SRC([bio_bvec_iter], [ - #include ],[ - struct bio bio; - bio.bi_iter.bi_sector = 0; - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_BIO_BVEC_ITER], [ - AC_MSG_CHECKING([whether bio has bi_iter]) - ZFS_LINUX_TEST_RESULT([bio_bvec_iter], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BIO_BVEC_ITER, 1, [bio has bi_iter]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # 4.8 API change -dnl # The rw argument has been removed from submit_bio/submit_bio_wait. -dnl # Callers are now expected to set bio->bi_rw instead of passing it in. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_SUBMIT_BIO], [ - ZFS_LINUX_TEST_SRC([submit_bio], [ - #include - ],[ - struct bio *bio = NULL; - (void) submit_bio(bio); + AC_DEFINE(HAVE_BIO_SET_DEV_GPL_ONLY, 1, + [bio_set_dev() GPL-only]) ]) -]) -AC_DEFUN([ZFS_AC_KERNEL_BIO_SUBMIT_BIO], [ - AC_MSG_CHECKING([whether submit_bio() wants 1 arg]) - ZFS_LINUX_TEST_RESULT([submit_bio], [ + AC_MSG_CHECKING([whether bio_set_dev() is a macro]) + ZFS_LINUX_TEST_RESULT([bio_set_dev_macro], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_1ARG_SUBMIT_BIO, 1, [submit_bio() wants 1 arg]) + AC_DEFINE(HAVE_BIO_SET_DEV_MACRO, 1, + [bio_set_dev() is a macro]) ],[ AC_MSG_RESULT(no) ]) @@ -449,31 +198,6 @@ AC_DEFUN([ZFS_AC_KERNEL_BDEV_SUBMIT_BIO_RETURNS_VOID], [ ]) ]) -dnl # -dnl # Linux 5.16 API -dnl # -dnl # The Linux 5.16 API moved struct blkcg_gq into linux/blk-cgroup.h, which -dnl # has been around since 2015. This test looks for the presence of that -dnl # header, so that it can be conditionally included where it exists, but -dnl # still be backward compatible with kernels that pre-date its introduction. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_CGROUP_HEADER], [ - ZFS_LINUX_TEST_SRC([blk_cgroup_header], [ - #include - ], []) -]) - -AC_DEFUN([ZFS_AC_KERNEL_BLK_CGROUP_HEADER], [ - AC_MSG_CHECKING([whether linux/blk-cgroup.h exists]) - ZFS_LINUX_TEST_RESULT([blk_cgroup_header],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_LINUX_BLK_CGROUP_HEADER, 1, - [linux/blk-cgroup.h exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - dnl # dnl # Linux 5.18 API dnl # @@ -510,43 +234,22 @@ AC_DEFUN([ZFS_AC_KERNEL_BIO_ALLOC_4ARG], [ ]) AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO], [ - ZFS_AC_KERNEL_SRC_REQ ZFS_AC_KERNEL_SRC_BIO_OPS ZFS_AC_KERNEL_SRC_BIO_SET_DEV - ZFS_AC_KERNEL_SRC_BIO_END_IO_T_ARGS - ZFS_AC_KERNEL_SRC_BIO_BI_STATUS - ZFS_AC_KERNEL_SRC_BIO_BVEC_ITER - ZFS_AC_KERNEL_SRC_BIO_SUBMIT_BIO ZFS_AC_KERNEL_SRC_BIO_CURRENT_BIO_LIST ZFS_AC_KERNEL_SRC_BLKG_TRYGET ZFS_AC_KERNEL_SRC_BIO_BDEV_DISK ZFS_AC_KERNEL_SRC_BDEV_SUBMIT_BIO_RETURNS_VOID ZFS_AC_KERNEL_SRC_BIO_SET_DEV_MACRO - ZFS_AC_KERNEL_SRC_BLK_CGROUP_HEADER ZFS_AC_KERNEL_SRC_BIO_ALLOC_4ARG ]) AC_DEFUN([ZFS_AC_KERNEL_BIO], [ - ZFS_AC_KERNEL_BIO_REQ_FAILFAST_MASK - ZFS_AC_KERNEL_BIO_REQ_DISCARD - ZFS_AC_KERNEL_BIO_REQ_FLUSH - ZFS_AC_KERNEL_BIO_REQ_PREFLUSH - - ZFS_AC_KERNEL_BIO_REQ_OP_DISCARD - ZFS_AC_KERNEL_BIO_REQ_OP_SECURE_ERASE - ZFS_AC_KERNEL_BIO_REQ_OP_FLUSH - ZFS_AC_KERNEL_BIO_BI_OPF ZFS_AC_KERNEL_BIO_SET_OP_ATTRS - ZFS_AC_KERNEL_BIO_SET_DEV - ZFS_AC_KERNEL_BIO_END_IO_T_ARGS - ZFS_AC_KERNEL_BIO_BI_STATUS - ZFS_AC_KERNEL_BIO_BVEC_ITER - ZFS_AC_KERNEL_BIO_SUBMIT_BIO ZFS_AC_KERNEL_BIO_CURRENT_BIO_LIST ZFS_AC_KERNEL_BLKG_TRYGET ZFS_AC_KERNEL_BIO_BDEV_DISK ZFS_AC_KERNEL_BDEV_SUBMIT_BIO_RETURNS_VOID - ZFS_AC_KERNEL_BLK_CGROUP_HEADER ZFS_AC_KERNEL_BIO_ALLOC_4ARG ]) diff --git a/config/kernel-blk-queue.m4 b/config/kernel-blk-queue.m4 index a064140f337a..cd2b143e89a0 100644 --- a/config/kernel-blk-queue.m4 +++ b/config/kernel-blk-queue.m4 @@ -161,7 +161,6 @@ AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_DISCARD], [ dnl # dnl # 5.19: bdev_max_secure_erase_sectors() available dnl # 4.8: blk_queue_secure_erase() available -dnl # 2.6.36: blk_queue_secdiscard() available dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_SECURE_ERASE], [ ZFS_LINUX_TEST_SRC([bdev_max_secure_erase_sectors], [ @@ -182,16 +181,6 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_SECURE_ERASE], [ memset(q, 0, sizeof(r)); value = blk_queue_secure_erase(q); ],[-Wframe-larger-than=8192]) - - ZFS_LINUX_TEST_SRC([blk_queue_secdiscard], [ - #include - ],[ - struct request_queue r; - struct request_queue *q = &r; - int value __attribute__ ((unused)); - memset(q, 0, sizeof(r)); - value = blk_queue_secdiscard(q); - ]) ]) AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_SECURE_ERASE], [ @@ -209,137 +198,11 @@ AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_SECURE_ERASE], [ AC_DEFINE(HAVE_BLK_QUEUE_SECURE_ERASE, 1, [blk_queue_secure_erase() is available]) ],[ - AC_MSG_RESULT(no) - - AC_MSG_CHECKING([whether blk_queue_secdiscard() is available]) - ZFS_LINUX_TEST_RESULT([blk_queue_secdiscard], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BLK_QUEUE_SECDISCARD, 1, - [blk_queue_secdiscard() is available]) - ],[ - ZFS_LINUX_TEST_ERROR([blk_queue_secure_erase]) - ]) + ZFS_LINUX_TEST_ERROR([blk_queue_secure_erase]) ]) ]) ]) -dnl # -dnl # 4.16 API change, -dnl # Introduction of blk_queue_flag_set and blk_queue_flag_clear -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLAG_SET], [ - ZFS_LINUX_TEST_SRC([blk_queue_flag_set], [ - #include - #include - ],[ - struct request_queue *q = NULL; - blk_queue_flag_set(0, q); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_FLAG_SET], [ - AC_MSG_CHECKING([whether blk_queue_flag_set() exists]) - ZFS_LINUX_TEST_RESULT([blk_queue_flag_set], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BLK_QUEUE_FLAG_SET, 1, - [blk_queue_flag_set() exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLAG_CLEAR], [ - ZFS_LINUX_TEST_SRC([blk_queue_flag_clear], [ - #include - #include - ],[ - struct request_queue *q = NULL; - blk_queue_flag_clear(0, q); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_FLAG_CLEAR], [ - AC_MSG_CHECKING([whether blk_queue_flag_clear() exists]) - ZFS_LINUX_TEST_RESULT([blk_queue_flag_clear], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BLK_QUEUE_FLAG_CLEAR, 1, - [blk_queue_flag_clear() exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # 2.6.36 API change, -dnl # Added blk_queue_flush() interface, while the previous interface -dnl # was available to all the new one is GPL-only. Thus in addition to -dnl # detecting if this function is available we determine if it is -dnl # GPL-only. If the GPL-only interface is there we implement our own -dnl # compatibility function, otherwise we use the function. The hope -dnl # is that long term this function will be opened up. -dnl # -dnl # 4.7 API change, -dnl # Replace blk_queue_flush with blk_queue_write_cache -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLUSH], [ - ZFS_LINUX_TEST_SRC([blk_queue_flush], [ - #include - ], [ - struct request_queue *q __attribute__ ((unused)) = NULL; - (void) blk_queue_flush(q, REQ_FLUSH); - ], [], [ZFS_META_LICENSE]) - - ZFS_LINUX_TEST_SRC([blk_queue_write_cache], [ - #include - #include - ], [ - struct request_queue *q __attribute__ ((unused)) = NULL; - blk_queue_write_cache(q, true, true); - ], [], [ZFS_META_LICENSE]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_FLUSH], [ - AC_MSG_CHECKING([whether blk_queue_flush() is available]) - ZFS_LINUX_TEST_RESULT([blk_queue_flush], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BLK_QUEUE_FLUSH, 1, - [blk_queue_flush() is available]) - - AC_MSG_CHECKING([whether blk_queue_flush() is GPL-only]) - ZFS_LINUX_TEST_RESULT([blk_queue_flush_license], [ - AC_MSG_RESULT(no) - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BLK_QUEUE_FLUSH_GPL_ONLY, 1, - [blk_queue_flush() is GPL-only]) - ]) - ],[ - AC_MSG_RESULT(no) - ]) - - dnl # - dnl # 4.7 API change - dnl # Replace blk_queue_flush with blk_queue_write_cache - dnl # - AC_MSG_CHECKING([whether blk_queue_write_cache() exists]) - ZFS_LINUX_TEST_RESULT([blk_queue_write_cache], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BLK_QUEUE_WRITE_CACHE, 1, - [blk_queue_write_cache() exists]) - - AC_MSG_CHECKING([whether blk_queue_write_cache() is GPL-only]) - ZFS_LINUX_TEST_RESULT([blk_queue_write_cache_license], [ - AC_MSG_RESULT(no) - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BLK_QUEUE_WRITE_CACHE_GPL_ONLY, 1, - [blk_queue_write_cache() is GPL-only]) - ]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - dnl # dnl # 2.6.34 API change dnl # blk_queue_max_hw_sectors() replaces blk_queue_max_sectors(). @@ -385,24 +248,7 @@ AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS], [ ]) ]) -dnl # -dnl # See if kernel supports block multi-queue and blk_status_t. -dnl # blk_status_t represents the new status codes introduced in the 4.13 -dnl # kernel patch: -dnl # -dnl # block: introduce new block status code type -dnl # -dnl # We do not currently support the "old" block multi-queue interfaces from -dnl # prior kernels. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_MQ], [ - ZFS_LINUX_TEST_SRC([blk_mq], [ - #include - ], [ - struct blk_mq_tag_set tag_set __attribute__ ((unused)) = {0}; - (void) blk_mq_alloc_tag_set(&tag_set); - return BLK_STS_OK; - ], []) +AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_MQ_RQ_HCTX], [ ZFS_LINUX_TEST_SRC([blk_mq_rq_hctx], [ #include #include @@ -413,18 +259,11 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_MQ], [ ], []) ]) -AC_DEFUN([ZFS_AC_KERNEL_BLK_MQ], [ - AC_MSG_CHECKING([whether block multiqueue with blk_status_t is available]) - ZFS_LINUX_TEST_RESULT([blk_mq], [ +AC_DEFUN([ZFS_AC_KERNEL_BLK_MQ_RQ_HCTX], [ + AC_MSG_CHECKING([whether block multiqueue hardware context is cached in struct request]) + ZFS_LINUX_TEST_RESULT([blk_mq_rq_hctx], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BLK_MQ, 1, [block multiqueue is available]) - AC_MSG_CHECKING([whether block multiqueue hardware context is cached in struct request]) - ZFS_LINUX_TEST_RESULT([blk_mq_rq_hctx], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BLK_MQ_RQ_HCTX, 1, [block multiqueue hardware context is cached in struct request]) - ], [ - AC_MSG_RESULT(no) - ]) + AC_DEFINE(HAVE_BLK_MQ_RQ_HCTX, 1, [block multiqueue hardware context is cached in struct request]) ], [ AC_MSG_RESULT(no) ]) @@ -437,12 +276,9 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE], [ ZFS_AC_KERNEL_SRC_BLK_QUEUE_UPDATE_READAHEAD ZFS_AC_KERNEL_SRC_BLK_QUEUE_DISCARD ZFS_AC_KERNEL_SRC_BLK_QUEUE_SECURE_ERASE - ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLAG_SET - ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLAG_CLEAR - ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLUSH ZFS_AC_KERNEL_SRC_BLK_QUEUE_MAX_HW_SECTORS ZFS_AC_KERNEL_SRC_BLK_QUEUE_MAX_SEGMENTS - ZFS_AC_KERNEL_SRC_BLK_MQ + ZFS_AC_KERNEL_SRC_BLK_MQ_RQ_HCTX ]) AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE], [ @@ -452,10 +288,7 @@ AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE], [ ZFS_AC_KERNEL_BLK_QUEUE_UPDATE_READAHEAD ZFS_AC_KERNEL_BLK_QUEUE_DISCARD ZFS_AC_KERNEL_BLK_QUEUE_SECURE_ERASE - ZFS_AC_KERNEL_BLK_QUEUE_FLAG_SET - ZFS_AC_KERNEL_BLK_QUEUE_FLAG_CLEAR - ZFS_AC_KERNEL_BLK_QUEUE_FLUSH ZFS_AC_KERNEL_BLK_QUEUE_MAX_HW_SECTORS ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS - ZFS_AC_KERNEL_BLK_MQ + ZFS_AC_KERNEL_BLK_MQ_RQ_HCTX ]) diff --git a/config/kernel-blkdev.m4 b/config/kernel-blkdev.m4 index 4f60f96acb56..83190c6fbe3f 100644 --- a/config/kernel-blkdev.m4 +++ b/config/kernel-blkdev.m4 @@ -396,7 +396,6 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_INVALIDATE_BDEV], [ dnl # dnl # 5.11 API, lookup_bdev() takes dev_t argument. dnl # 2.6.27 API, lookup_bdev() was first exported. -dnl # 4.4.0-6.21 API, lookup_bdev() on Ubuntu takes mode argument. dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_LOOKUP_BDEV], [ ZFS_LINUX_TEST_SRC([lookup_bdev_devt], [ @@ -418,15 +417,6 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_LOOKUP_BDEV], [ bdev = lookup_bdev(path); ]) - - ZFS_LINUX_TEST_SRC([lookup_bdev_mode], [ - #include - ], [ - struct block_device *bdev __attribute__ ((unused)); - const char path[] = "/example/path"; - - bdev = lookup_bdev(path, FMODE_READ); - ]) ]) AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_LOOKUP_BDEV], [ @@ -446,17 +436,7 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_LOOKUP_BDEV], [ AC_DEFINE(HAVE_1ARG_LOOKUP_BDEV, 1, [lookup_bdev() wants 1 arg]) ], [ - AC_MSG_RESULT(no) - - AC_MSG_CHECKING([whether lookup_bdev() wants mode arg]) - ZFS_LINUX_TEST_RESULT_SYMBOL([lookup_bdev_mode], - [lookup_bdev], [fs/block_dev.c], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_MODE_LOOKUP_BDEV, 1, - [lookup_bdev() wants mode arg]) - ], [ - ZFS_LINUX_TEST_ERROR([lookup_bdev()]) - ]) + ZFS_LINUX_TEST_ERROR([lookup_bdev()]) ]) ]) ]) diff --git a/config/kernel-clear-inode.m4 b/config/kernel-clear-inode.m4 deleted file mode 100644 index 3f454d7ec0d3..000000000000 --- a/config/kernel-clear-inode.m4 +++ /dev/null @@ -1,39 +0,0 @@ -dnl # -dnl # 3.5.0 API change -dnl # torvalds/linux@dbd5768f87ff6fb0a4fe09c4d7b6c4a24de99430 and -dnl # torvalds/linux@7994e6f7254354e03028a11f98a27bd67dace9f1 reworked -dnl # where inode_sync_wait() is called. -dnl # -dnl # Prior to these changes it would occur in end_writeback() but due -dnl # to various issues (described in the above commits) it has been -dnl # moved to evict(). This changes the ordering is which sync occurs -dnl # but otherwise doesn't impact the zpl implementation. -dnl # -dnl # The major impact here is the renaming of end_writeback() to -dnl # clear_inode(). However, care must be taken when detecting this -dnl # API change because as recently as 2.6.35 there was a clear_inode() -dnl # function. However, it was made obsolete by the evict_inode() API -dnl # change at the same time. -dnl # -dnl # Therefore, to ensure we have the correct API we only allow the -dnl # clear_inode() compatibility code to be defined iff the evict_inode() -dnl # functionality is also detected. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_CLEAR_INODE], [ - ZFS_LINUX_TEST_SRC([clear_inode], [ - #include - ], [ - clear_inode(NULL); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_CLEAR_INODE], [ - AC_MSG_CHECKING([whether clear_inode() is available]) - ZFS_LINUX_TEST_RESULT_SYMBOL([clear_inode], - [clear_inode], [fs/inode.c], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_CLEAR_INODE, 1, [clear_inode() is available]) - ], [ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-config-defined.m4 b/config/kernel-config-defined.m4 index 54837d728341..83c40fa6cd8e 100644 --- a/config/kernel-config-defined.m4 +++ b/config/kernel-config-defined.m4 @@ -4,21 +4,6 @@ dnl # detected at configure time and cause a build failure. Otherwise dnl # modules may be successfully built that behave incorrectly. dnl # AC_DEFUN([ZFS_AC_KERNEL_CONFIG_DEFINED], [ - AS_IF([test "x$cross_compiling" != xyes], [ - AC_RUN_IFELSE([ - AC_LANG_PROGRAM([ - #include "$LINUX/include/linux/license.h" - ], [ - return !license_is_gpl_compatible( - "$ZFS_META_LICENSE"); - ]) - ], [ - AC_DEFINE([ZFS_IS_GPL_COMPATIBLE], [1], - [Define to 1 if GPL-only symbols can be used]) - ], [ - ]) - ]) - ZFS_AC_KERNEL_SRC_CONFIG_MODULES ZFS_AC_KERNEL_SRC_CONFIG_BLOCK ZFS_AC_KERNEL_SRC_CONFIG_DEBUG_LOCK_ALLOC diff --git a/config/kernel-current-time.m4 b/config/kernel-current-time.m4 deleted file mode 100644 index ab7d9c5cedba..000000000000 --- a/config/kernel-current-time.m4 +++ /dev/null @@ -1,26 +0,0 @@ -dnl # -dnl # 4.9, current_time() added -dnl # 4.18, return type changed from timespec to timespec64 -dnl # -dnl # Note that we don't care about the return type in this check. If we have -dnl # to implement a fallback, we'll know we're <4.9, which was timespec. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_CURRENT_TIME], [ - ZFS_LINUX_TEST_SRC([current_time], [ - #include - ], [ - struct inode ip __attribute__ ((unused)); - (void) current_time(&ip); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_CURRENT_TIME], [ - AC_MSG_CHECKING([whether current_time() exists]) - ZFS_LINUX_TEST_RESULT_SYMBOL([current_time], - [current_time], [fs/inode.c], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_CURRENT_TIME, 1, [current_time() exists]) - ], [ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-dentry-alias.m4 b/config/kernel-dentry-alias.m4 deleted file mode 100644 index f0ddb8d010b0..000000000000 --- a/config/kernel-dentry-alias.m4 +++ /dev/null @@ -1,30 +0,0 @@ -dnl # -dnl # 3.18 API change -dnl # Dentry aliases are in d_u struct dentry member -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_DENTRY_ALIAS_D_U], [ - ZFS_LINUX_TEST_SRC([dentry_alias_d_u], [ - #include - #include - #include - ], [ - struct inode *inode __attribute__ ((unused)) = NULL; - struct dentry *dentry __attribute__ ((unused)) = NULL; - hlist_for_each_entry(dentry, &inode->i_dentry, - d_u.d_alias) { - d_drop(dentry); - } - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_DENTRY_ALIAS_D_U], [ - AC_MSG_CHECKING([whether dentry aliases are in d_u member]) - ZFS_LINUX_TEST_RESULT([dentry_alias_d_u], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_DENTRY_D_U_ALIASES, 1, - [dentry aliases are in d_u member]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - diff --git a/config/kernel-dentry-operations.m4 b/config/kernel-dentry-operations.m4 index 500f61e26aee..aa5a9f2aff39 100644 --- a/config/kernel-dentry-operations.m4 +++ b/config/kernel-dentry-operations.m4 @@ -1,26 +1,3 @@ -dnl # -dnl # 3.4.0 API change -dnl # Added d_make_root() to replace previous d_alloc_root() function. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_D_MAKE_ROOT], [ - ZFS_LINUX_TEST_SRC([d_make_root], [ - #include - ], [ - d_make_root(NULL); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_D_MAKE_ROOT], [ - AC_MSG_CHECKING([whether d_make_root() is available]) - ZFS_LINUX_TEST_RESULT_SYMBOL([d_make_root], - [d_make_root], [fs/dcache.c], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_D_MAKE_ROOT, 1, [d_make_root() is available]) - ], [ - AC_MSG_RESULT(no) - ]) -]) - dnl # dnl # 2.6.28 API change dnl # Added d_obtain_alias() helper function. @@ -43,31 +20,6 @@ AC_DEFUN([ZFS_AC_KERNEL_D_OBTAIN_ALIAS], [ ]) ]) -dnl # -dnl # 2.6.12 API change -dnl # d_prune_aliases() helper function available. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_D_PRUNE_ALIASES], [ - ZFS_LINUX_TEST_SRC([d_prune_aliases], [ - #include - ], [ - struct inode *ip = NULL; - d_prune_aliases(ip); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_D_PRUNE_ALIASES], [ - AC_MSG_CHECKING([whether d_prune_aliases() is available]) - ZFS_LINUX_TEST_RESULT_SYMBOL([d_prune_aliases], - [d_prune_aliases], [fs/dcache.c], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_D_PRUNE_ALIASES, 1, - [d_prune_aliases() is available]) - ], [ - ZFS_LINUX_TEST_ERROR([d_prune_aliases()]) - ]) -]) - dnl # dnl # 2.6.38 API change dnl # Added d_set_d_op() helper function. @@ -90,101 +42,14 @@ AC_DEFUN([ZFS_AC_KERNEL_D_SET_D_OP], [ ]) ]) -dnl # -dnl # 3.6 API change -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_D_REVALIDATE_NAMEIDATA], [ - ZFS_LINUX_TEST_SRC([dentry_operations_revalidate], [ - #include - #include - - static int revalidate (struct dentry *dentry, - struct nameidata *nidata) { return 0; } - - static const struct dentry_operations - dops __attribute__ ((unused)) = { - .d_revalidate = revalidate, - }; - ],[]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_D_REVALIDATE_NAMEIDATA], [ - AC_MSG_CHECKING([whether dops->d_revalidate() takes struct nameidata]) - ZFS_LINUX_TEST_RESULT([dentry_operations_revalidate], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_D_REVALIDATE_NAMEIDATA, 1, - [dops->d_revalidate() operation takes nameidata]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # 2.6.30 API change -dnl # The 'struct dentry_operations' was constified in the dentry structure. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_CONST_DENTRY_OPERATIONS], [ - ZFS_LINUX_TEST_SRC([dentry_operations_const], [ - #include - - const struct dentry_operations test_d_op = { - .d_revalidate = NULL, - }; - ],[ - struct dentry d __attribute__ ((unused)); - d.d_op = &test_d_op; - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_CONST_DENTRY_OPERATIONS], [ - AC_MSG_CHECKING([whether dentry uses const struct dentry_operations]) - ZFS_LINUX_TEST_RESULT([dentry_operations_const], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_CONST_DENTRY_OPERATIONS, 1, - [dentry uses const struct dentry_operations]) - ],[ - ZFS_LINUX_TEST_ERROR([const dentry_operations]) - ]) -]) - -dnl # -dnl # 2.6.38 API change -dnl # Added sb->s_d_op default dentry_operations member -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_S_D_OP], [ - ZFS_LINUX_TEST_SRC([super_block_s_d_op], [ - #include - ],[ - struct super_block sb __attribute__ ((unused)); - sb.s_d_op = NULL; - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_S_D_OP], [ - AC_MSG_CHECKING([whether super_block has s_d_op]) - ZFS_LINUX_TEST_RESULT([super_block_s_d_op], [ - AC_MSG_RESULT(yes) - ], [ - ZFS_LINUX_TEST_ERROR([super_block s_d_op]) - ]) -]) - AC_DEFUN([ZFS_AC_KERNEL_SRC_DENTRY], [ - ZFS_AC_KERNEL_SRC_D_MAKE_ROOT ZFS_AC_KERNEL_SRC_D_OBTAIN_ALIAS - ZFS_AC_KERNEL_SRC_D_PRUNE_ALIASES ZFS_AC_KERNEL_SRC_D_SET_D_OP - ZFS_AC_KERNEL_SRC_D_REVALIDATE_NAMEIDATA - ZFS_AC_KERNEL_SRC_CONST_DENTRY_OPERATIONS ZFS_AC_KERNEL_SRC_S_D_OP ]) AC_DEFUN([ZFS_AC_KERNEL_DENTRY], [ - ZFS_AC_KERNEL_D_MAKE_ROOT ZFS_AC_KERNEL_D_OBTAIN_ALIAS - ZFS_AC_KERNEL_D_PRUNE_ALIASES ZFS_AC_KERNEL_D_SET_D_OP - ZFS_AC_KERNEL_D_REVALIDATE_NAMEIDATA - ZFS_AC_KERNEL_CONST_DENTRY_OPERATIONS ZFS_AC_KERNEL_S_D_OP ]) diff --git a/config/kernel-dirty-inode.m4 b/config/kernel-dirty-inode.m4 deleted file mode 100644 index 2ef8658748ca..000000000000 --- a/config/kernel-dirty-inode.m4 +++ /dev/null @@ -1,29 +0,0 @@ -dnl # -dnl # 3.0 API change -dnl # The sops->dirty_inode() callbacks were updated to take a flags -dnl # argument. This allows the greater control over whether the -dnl # filesystem needs to push out a transaction or not. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_DIRTY_INODE], [ - ZFS_LINUX_TEST_SRC([dirty_inode_with_flags], [ - #include - - static void dirty_inode(struct inode *a, int b) { return; } - - static const struct super_operations - sops __attribute__ ((unused)) = { - .dirty_inode = dirty_inode, - }; - ],[]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_DIRTY_INODE], [ - AC_MSG_CHECKING([whether sops->dirty_inode() wants flags]) - ZFS_LINUX_TEST_RESULT([dirty_inode_with_flags], [ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_DIRTY_INODE_WITH_FLAGS, 1, - [sops->dirty_inode() wants flags]) - ],[ - AC_MSG_RESULT([no]) - ]) -]) diff --git a/config/kernel-encode-fh-inode.m4 b/config/kernel-encode-fh-inode.m4 deleted file mode 100644 index b3ec040b5e95..000000000000 --- a/config/kernel-encode-fh-inode.m4 +++ /dev/null @@ -1,27 +0,0 @@ -dnl # -dnl # 3.5.0 API change -dnl # torvalds/linux@b0b0382bb4904965a9e9fca77ad87514dfda0d1c changed the -dnl # ->encode_fh() callback to pass the child inode and its parents inode -dnl # rather than a dentry and a boolean saying whether we want the parent. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_ENCODE_FH_WITH_INODE], [ - ZFS_LINUX_TEST_SRC([export_operations_encode_fh], [ - #include - static int encode_fh(struct inode *inode, __u32 *fh, int *max_len, - struct inode *parent) { return 0; } - static struct export_operations eops __attribute__ ((unused))={ - .encode_fh = encode_fh, - }; - ],[]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_ENCODE_FH_WITH_INODE], [ - AC_MSG_CHECKING([whether eops->encode_fh() wants inode]) - ZFS_LINUX_TEST_RESULT([export_operations_encode_fh], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_ENCODE_FH_WITH_INODE, 1, - [eops->encode_fh() wants child and parent inodes]) - ],[ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-evict-inode.m4 b/config/kernel-evict-inode.m4 deleted file mode 100644 index 87082c9a2839..000000000000 --- a/config/kernel-evict-inode.m4 +++ /dev/null @@ -1,24 +0,0 @@ -dnl # -dnl # 2.6.36 API change -dnl # The sops->delete_inode() and sops->clear_inode() callbacks have -dnl # replaced by a single sops->evict_inode() callback. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_EVICT_INODE], [ - ZFS_LINUX_TEST_SRC([evict_inode], [ - #include - static void evict_inode (struct inode * t) { return; } - static struct super_operations sops __attribute__ ((unused)) = { - .evict_inode = evict_inode, - }; - ],[]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_EVICT_INODE], [ - AC_MSG_CHECKING([whether sops->evict_inode() exists]) - ZFS_LINUX_TEST_RESULT([evict_inode], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_EVICT_INODE, 1, [sops->evict_inode() exists]) - ],[ - ZFS_LINUX_TEST_ERROR([evict_inode]) - ]) -]) diff --git a/config/kernel-fadvise.m4 b/config/kernel-fadvise.m4 deleted file mode 100644 index 08912de16ed8..000000000000 --- a/config/kernel-fadvise.m4 +++ /dev/null @@ -1,23 +0,0 @@ -dnl # -dnl # Linux 4.19 API -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_FADVISE], [ - ZFS_LINUX_TEST_SRC([file_fadvise], [ - #include - - static const struct file_operations - fops __attribute__ ((unused)) = { - .fadvise = NULL, - }; - ],[]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_FADVISE], [ - AC_MSG_CHECKING([whether fops->fadvise() exists]) - ZFS_LINUX_TEST_RESULT([file_fadvise], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_FILE_FADVISE, 1, [fops->fadvise() exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-fallocate.m4 b/config/kernel-fallocate.m4 deleted file mode 100644 index 95186dada453..000000000000 --- a/config/kernel-fallocate.m4 +++ /dev/null @@ -1,44 +0,0 @@ -dnl # -dnl # Linux 2.6.38 - 3.x API -dnl # The fallocate callback was moved from the inode_operations -dnl # structure to the file_operations structure. -dnl # -dnl # -dnl # Linux 3.15+ -dnl # fallocate learned a new flag, FALLOC_FL_ZERO_RANGE -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_FALLOCATE], [ - ZFS_LINUX_TEST_SRC([file_fallocate], [ - #include - - static long test_fallocate(struct file *file, int mode, - loff_t offset, loff_t len) { return 0; } - - static const struct file_operations - fops __attribute__ ((unused)) = { - .fallocate = test_fallocate, - }; - ], []) - ZFS_LINUX_TEST_SRC([falloc_fl_zero_range], [ - #include - ],[ - int flags __attribute__ ((unused)); - flags = FALLOC_FL_ZERO_RANGE; - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_FALLOCATE], [ - AC_MSG_CHECKING([whether fops->fallocate() exists]) - ZFS_LINUX_TEST_RESULT([file_fallocate], [ - AC_MSG_RESULT(yes) - AC_MSG_CHECKING([whether FALLOC_FL_ZERO_RANGE exists]) - ZFS_LINUX_TEST_RESULT([falloc_fl_zero_range], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_FALLOC_FL_ZERO_RANGE, 1, [FALLOC_FL_ZERO_RANGE is defined]) - ],[ - AC_MSG_RESULT(no) - ]) - ],[ - ZFS_LINUX_TEST_ERROR([file_fallocate]) - ]) -]) diff --git a/config/kernel-file-dentry.m4 b/config/kernel-file-dentry.m4 deleted file mode 100644 index 9cb5869c3821..000000000000 --- a/config/kernel-file-dentry.m4 +++ /dev/null @@ -1,24 +0,0 @@ -dnl # -dnl # 4.1 API change -dnl # struct access file->f_path.dentry was replaced by accessor function -dnl # since fix torvalds/linux@4bacc9c9234c ("overlayfs: Make f_path always -dnl # point to the overlay and f_inode to the underlay"). -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_FILE_DENTRY], [ - ZFS_LINUX_TEST_SRC([file_dentry], [ - #include - ],[ - struct file *f = NULL; - file_dentry(f); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_FILE_DENTRY], [ - AC_MSG_CHECKING([whether file_dentry() is available]) - ZFS_LINUX_TEST_RESULT([file_dentry], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_FILE_DENTRY, 1, [file_dentry() is available]) - ],[ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-file-inode.m4 b/config/kernel-file-inode.m4 deleted file mode 100644 index 00a3621657ad..000000000000 --- a/config/kernel-file-inode.m4 +++ /dev/null @@ -1,23 +0,0 @@ -dnl # -dnl # 3.19 API change -dnl # struct access f->f_dentry->d_inode was replaced by accessor function -dnl # file_inode(f) -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_FILE_INODE], [ - ZFS_LINUX_TEST_SRC([file_inode], [ - #include - ],[ - struct file *f = NULL; - file_inode(f); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_FILE_INODE], [ - AC_MSG_CHECKING([whether file_inode() is available]) - ZFS_LINUX_TEST_RESULT([file_inode], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_FILE_INODE, 1, [file_inode() is available]) - ],[ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-filemap.m4 b/config/kernel-filemap.m4 deleted file mode 100644 index 0b7da828d299..000000000000 --- a/config/kernel-filemap.m4 +++ /dev/null @@ -1,27 +0,0 @@ -dnl # -dnl # filemap_range_has_page was not available till 4.13 -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_FILEMAP], [ - ZFS_LINUX_TEST_SRC([filemap_range_has_page], [ - #include - #include - ],[ - struct address_space *mapping = NULL; - loff_t lstart = 0; - loff_t lend = 0; - bool ret __attribute__ ((unused)); - - ret = filemap_range_has_page(mapping, lstart, lend); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_FILEMAP], [ - AC_MSG_CHECKING([whether filemap_range_has_page() is available]) - ZFS_LINUX_TEST_RESULT([filemap_range_has_page], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_FILEMAP_RANGE_HAS_PAGE, 1, - [filemap_range_has_page() is available]) - ],[ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-fsync.m4 b/config/kernel-fsync.m4 deleted file mode 100644 index c155f8af81a8..000000000000 --- a/config/kernel-fsync.m4 +++ /dev/null @@ -1,53 +0,0 @@ -dnl # -dnl # Check file_operations->fsync interface. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_FSYNC], [ - ZFS_LINUX_TEST_SRC([fsync_without_dentry], [ - #include - - static int test_fsync(struct file *f, int x) { return 0; } - - static const struct file_operations - fops __attribute__ ((unused)) = { - .fsync = test_fsync, - }; - ],[]) - - ZFS_LINUX_TEST_SRC([fsync_range], [ - #include - - static int test_fsync(struct file *f, loff_t a, loff_t b, int c) - { return 0; } - - static const struct file_operations - fops __attribute__ ((unused)) = { - .fsync = test_fsync, - }; - ],[]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_FSYNC], [ - dnl # - dnl # Linux 2.6.35 - Linux 3.0 API - dnl # - AC_MSG_CHECKING([whether fops->fsync() wants no dentry]) - ZFS_LINUX_TEST_RESULT([fsync_without_dentry], [ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_FSYNC_WITHOUT_DENTRY, 1, - [fops->fsync() without dentry]) - ],[ - AC_MSG_RESULT([no]) - - dnl # - dnl # Linux 3.1 - 3.x API - dnl # - AC_MSG_CHECKING([whether fops->fsync() wants range]) - ZFS_LINUX_TEST_RESULT([fsync_range], [ - AC_MSG_RESULT([range]) - AC_DEFINE(HAVE_FSYNC_RANGE, 1, - [fops->fsync() with range]) - ],[ - ZFS_LINUX_TEST_ERROR([fops->fsync]) - ]) - ]) -]) diff --git a/config/kernel-generic_fillattr.m4 b/config/kernel-generic_fillattr.m4 index f5323f0dcb9f..d355f9006bd3 100644 --- a/config/kernel-generic_fillattr.m4 +++ b/config/kernel-generic_fillattr.m4 @@ -48,12 +48,16 @@ AC_DEFUN([ZFS_AC_KERNEL_GENERIC_FILLATTR], [ AC_DEFINE(HAVE_GENERIC_FILLATTR_IDMAP_REQMASK, 1, [generic_fillattr requires struct mnt_idmap* and u32 request_mask]) ],[ + AC_MSG_RESULT([no]) + AC_MSG_CHECKING([whether generic_fillattr requires struct mnt_idmap*]) ZFS_LINUX_TEST_RESULT([generic_fillattr_mnt_idmap], [ AC_MSG_RESULT([yes]) AC_DEFINE(HAVE_GENERIC_FILLATTR_IDMAP, 1, [generic_fillattr requires struct mnt_idmap*]) ],[ + AC_MSG_RESULT([no]) + AC_MSG_CHECKING([whether generic_fillattr requires struct user_namespace*]) ZFS_LINUX_TEST_RESULT([generic_fillattr_userns], [ AC_MSG_RESULT([yes]) diff --git a/config/kernel-generic_io_acct.m4 b/config/kernel-generic_io_acct.m4 index a6a109004294..da92aad058cb 100644 --- a/config/kernel-generic_io_acct.m4 +++ b/config/kernel-generic_io_acct.m4 @@ -49,18 +49,6 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_IO_ACCT], [ bio_end_io_acct(bio, start_time); ]) - ZFS_LINUX_TEST_SRC([generic_acct_3args], [ - #include - - void (*generic_start_io_acct_f)(int, unsigned long, - struct hd_struct *) = &generic_start_io_acct; - void (*generic_end_io_acct_f)(int, struct hd_struct *, - unsigned long) = &generic_end_io_acct; - ], [ - generic_start_io_acct(0, 0, NULL); - generic_end_io_acct(0, NULL, 0); - ]) - ZFS_LINUX_TEST_SRC([generic_acct_4args], [ #include @@ -138,23 +126,6 @@ AC_DEFUN([ZFS_AC_KERNEL_GENERIC_IO_ACCT], [ [generic_*_io_acct() 4 arg available]) ], [ AC_MSG_RESULT(no) - - dnl # - dnl # 3.19 API addition - dnl # - dnl # torvalds/linux@394ffa50 allows us to increment - dnl # iostat counters without generic_make_request(). - dnl # - AC_MSG_CHECKING( - [whether generic_*_io_acct wants 3 args]) - ZFS_LINUX_TEST_RESULT_SYMBOL([generic_acct_3args], - [generic_start_io_acct], [block/bio.c], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_GENERIC_IO_ACCT_3ARG, 1, - [generic_*_io_acct() 3 arg available]) - ], [ - AC_MSG_RESULT(no) - ]) ]) ]) ]) diff --git a/config/kernel-generic_readlink.m4 b/config/kernel-generic_readlink.m4 deleted file mode 100644 index a7a33b408abd..000000000000 --- a/config/kernel-generic_readlink.m4 +++ /dev/null @@ -1,25 +0,0 @@ -dnl # -dnl # 4.10 API -dnl # -dnl # NULL inode_operations.readlink implies generic_readlink(), which -dnl # has been made static. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_READLINK_GLOBAL], [ - ZFS_LINUX_TEST_SRC([generic_readlink_global], [ - #include - ],[ - int i __attribute__ ((unused)); - i = generic_readlink(NULL, NULL, 0); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_GENERIC_READLINK_GLOBAL], [ - AC_MSG_CHECKING([whether generic_readlink is global]) - ZFS_LINUX_TEST_RESULT([generic_readlink_global], [ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_GENERIC_READLINK, 1, - [generic_readlink is global]) - ],[ - AC_MSG_RESULT([no]) - ]) -]) diff --git a/config/kernel-genhd-flags.m4 b/config/kernel-genhd-flags.m4 index af6a8a086bc9..60cc3173397c 100644 --- a/config/kernel-genhd-flags.m4 +++ b/config/kernel-genhd-flags.m4 @@ -17,12 +17,6 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_GENHD_FLAGS], [ ], [ int flags __attribute__ ((unused)) = GENHD_FL_NO_PART; ]) - - ZFS_LINUX_TEST_SRC([genhd_fl_no_part_scan], [ - #include - ], [ - int flags __attribute__ ((unused)) = GENHD_FL_NO_PART_SCAN; - ]) ]) AC_DEFUN([ZFS_AC_KERNEL_GENHD_FLAGS], [ @@ -30,29 +24,18 @@ AC_DEFUN([ZFS_AC_KERNEL_GENHD_FLAGS], [ AC_MSG_CHECKING([whether GENHD_FL_EXT_DEVT flag is available]) ZFS_LINUX_TEST_RESULT([genhd_fl_ext_devt], [ AC_MSG_RESULT(yes) - AC_DEFINE(ZFS_GENHD_FL_EXT_DEVT, GENHD_FL_EXT_DEVT, + AC_DEFINE(HAVE_GENHD_FL_EXT_DEVT, 1, [GENHD_FL_EXT_DEVT flag is available]) ], [ AC_MSG_RESULT(no) - AC_DEFINE(ZFS_GENHD_FL_EXT_DEVT, 0, - [GENHD_FL_EXT_DEVT flag is not available]) ]) AC_MSG_CHECKING([whether GENHD_FL_NO_PART flag is available]) ZFS_LINUX_TEST_RESULT([genhd_fl_no_part], [ AC_MSG_RESULT(yes) - AC_DEFINE(ZFS_GENHD_FL_NO_PART, GENHD_FL_NO_PART, + AC_DEFINE(HAVE_GENHD_FL_NO_PART, 1, [GENHD_FL_NO_PART flag is available]) ], [ AC_MSG_RESULT(no) - - AC_MSG_CHECKING([whether GENHD_FL_NO_PART_SCAN flag is available]) - ZFS_LINUX_TEST_RESULT([genhd_fl_no_part_scan], [ - AC_MSG_RESULT(yes) - AC_DEFINE(ZFS_GENHD_FL_NO_PART, GENHD_FL_NO_PART_SCAN, - [GENHD_FL_NO_PART_SCAN flag is available]) - ], [ - ZFS_LINUX_TEST_ERROR([GENHD_FL_NO_PART|GENHD_FL_NO_PART_SCAN]) - ]) ]) ]) diff --git a/config/kernel-get-link.m4 b/config/kernel-get-link.m4 deleted file mode 100644 index 1f8f5b0c8b72..000000000000 --- a/config/kernel-get-link.m4 +++ /dev/null @@ -1,104 +0,0 @@ -dnl # -dnl # Supported get_link() interfaces checked newest to oldest. -dnl # Note this interface used to be named follow_link. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_GET_LINK], [ - ZFS_LINUX_TEST_SRC([inode_operations_get_link], [ - #include - static const char *get_link(struct dentry *de, struct inode *ip, - struct delayed_call *done) { return "symlink"; } - static struct inode_operations - iops __attribute__ ((unused)) = { - .get_link = get_link, - }; - ],[]) - - ZFS_LINUX_TEST_SRC([inode_operations_get_link_cookie], [ - #include - static const char *get_link(struct dentry *de, struct - inode *ip, void **cookie) { return "symlink"; } - static struct inode_operations - iops __attribute__ ((unused)) = { - .get_link = get_link, - }; - ],[]) - - ZFS_LINUX_TEST_SRC([inode_operations_follow_link], [ - #include - static const char *follow_link(struct dentry *de, - void **cookie) { return "symlink"; } - static struct inode_operations - iops __attribute__ ((unused)) = { - .follow_link = follow_link, - }; - ],[]) - - ZFS_LINUX_TEST_SRC([inode_operations_follow_link_nameidata], [ - #include - static void *follow_link(struct dentry *de, struct - nameidata *nd) { return (void *)NULL; } - static struct inode_operations - iops __attribute__ ((unused)) = { - .follow_link = follow_link, - }; - ],[]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_GET_LINK], [ - dnl # - dnl # 4.5 API change - dnl # The get_link interface has added a delayed done call and - dnl # used it to retire the put_link() interface. - dnl # - AC_MSG_CHECKING([whether iops->get_link() passes delayed]) - ZFS_LINUX_TEST_RESULT([inode_operations_get_link], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_GET_LINK_DELAYED, 1, [iops->get_link() delayed]) - ],[ - AC_MSG_RESULT(no) - - dnl # - dnl # 4.5 API change - dnl # The follow_link() interface has been replaced by - dnl # get_link() which behaves the same as before except: - dnl # - An inode is passed as a separate argument - dnl # - When called in RCU mode a NULL dentry is passed. - dnl # - AC_MSG_CHECKING([whether iops->get_link() passes cookie]) - ZFS_LINUX_TEST_RESULT([inode_operations_get_link_cookie], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_GET_LINK_COOKIE, 1, - [iops->get_link() cookie]) - ],[ - AC_MSG_RESULT(no) - - dnl # - dnl # 4.2 API change - dnl # This kernel retired the nameidata structure. - dnl # - AC_MSG_CHECKING( - [whether iops->follow_link() passes cookie]) - ZFS_LINUX_TEST_RESULT([inode_operations_follow_link], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_FOLLOW_LINK_COOKIE, 1, - [iops->follow_link() cookie]) - ],[ - AC_MSG_RESULT(no) - - dnl # - dnl # 2.6.32 API - dnl # - AC_MSG_CHECKING( - [whether iops->follow_link() passes nameidata]) - ZFS_LINUX_TEST_RESULT( - [inode_operations_follow_link_nameidata],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_FOLLOW_LINK_NAMEIDATA, 1, - [iops->follow_link() nameidata]) - ],[ - ZFS_LINUX_TEST_ERROR([get_link]) - ]) - ]) - ]) - ]) -]) diff --git a/config/kernel-get-user-pages.m4 b/config/kernel-get-user-pages.m4 new file mode 100644 index 000000000000..f9d02b66a178 --- /dev/null +++ b/config/kernel-get-user-pages.m4 @@ -0,0 +1,179 @@ +dnl # +dnl # get_user_pages_unlocked() function was not available till 4.0. +dnl # In earlier kernels (< 4.0) get_user_pages() is available(). +dnl # +dnl # 4.0 API change, +dnl # long get_user_pages_unlocked(struct task_struct *tsk, +dnl # struct mm_struct *mm, unsigned long start, unsigned long nr_pages, +dnl # int write, int force, struct page **pages) +dnl # +dnl # 4.8 API change, +dnl # long get_user_pages_unlocked(unsigned long start, +dnl # unsigned long nr_pages, int write, int force, struct page **page) +dnl # +dnl # 4.9 API change, +dnl # long get_user_pages_unlocked(usigned long start, int nr_pages, +dnl # struct page **pages, unsigned int gup_flags) +dnl # + +dnl# +dnl# Check available get_user_pages/_unlocked interfaces. +dnl# +AC_DEFUN([ZFS_AC_KERNEL_SRC_GET_USER_PAGES], [ + ZFS_LINUX_TEST_SRC([get_user_pages_unlocked_gup_flags], [ + #include + ], [ + unsigned long start = 0; + unsigned long nr_pages = 1; + unsigned int gup_flags = 0; + struct page **pages = NULL; + long ret __attribute__ ((unused)); + + ret = get_user_pages_unlocked(start, nr_pages, pages, + gup_flags); + ]) + + ZFS_LINUX_TEST_SRC([get_user_pages_unlocked_write_flag], [ + #include + ], [ + unsigned long start = 0; + unsigned long nr_pages = 1; + int write = 0; + int force = 0; + long ret __attribute__ ((unused)); + struct page **pages = NULL; + + ret = get_user_pages_unlocked(start, nr_pages, write, force, + pages); + ]) + + ZFS_LINUX_TEST_SRC([get_user_pages_unlocked_task_struct], [ + #include + ], [ + struct task_struct *tsk = NULL; + struct mm_struct *mm = NULL; + unsigned long start = 0; + unsigned long nr_pages = 1; + int write = 0; + int force = 0; + struct page **pages = NULL; + long ret __attribute__ ((unused)); + + ret = get_user_pages_unlocked(tsk, mm, start, nr_pages, write, + force, pages); + ]) + + ZFS_LINUX_TEST_SRC([get_user_pages_unlocked_task_struct_gup_flags], [ + #include + ], [ + struct task_struct *tsk = NULL; + struct mm_struct *mm = NULL; + unsigned long start = 0; + unsigned long nr_pages = 1; + struct page **pages = NULL; + unsigned int gup_flags = 0; + long ret __attribute__ ((unused)); + + ret = get_user_pages_unlocked(tsk, mm, start, nr_pages, + pages, gup_flags); + ]) + + ZFS_LINUX_TEST_SRC([get_user_pages_task_struct], [ + #include + ], [ + struct task_struct *tsk = NULL; + struct mm_struct *mm = NULL; + struct vm_area_struct **vmas = NULL; + unsigned long start = 0; + unsigned long nr_pages = 1; + int write = 0; + int force = 0; + struct page **pages = NULL; + int ret __attribute__ ((unused)); + + ret = get_user_pages(tsk, mm, start, nr_pages, write, + force, pages, vmas); + ]) +]) + +dnl # +dnl # Supported get_user_pages/_unlocked interfaces checked newest to oldest. +dnl # We first check for get_user_pages_unlocked as that is available in +dnl # newer kernels. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_GET_USER_PAGES], [ + dnl # + dnl # Current API (as of 4.9) of get_user_pages_unlocked + dnl # + AC_MSG_CHECKING([whether get_user_pages_unlocked() takes gup flags]) + ZFS_LINUX_TEST_RESULT([get_user_pages_unlocked_gup_flags], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_GET_USER_PAGES_UNLOCKED_GUP_FLAGS, 1, + [get_user_pages_unlocked() takes gup flags]) + ], [ + AC_MSG_RESULT(no) + + dnl # + dnl # 4.8 API change, get_user_pages_unlocked + dnl # + AC_MSG_CHECKING( + [whether get_user_pages_unlocked() takes write flag]) + ZFS_LINUX_TEST_RESULT([get_user_pages_unlocked_write_flag], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_GET_USER_PAGES_UNLOCKED_WRITE_FLAG, 1, + [get_user_pages_unlocked() takes write flag]) + ], [ + AC_MSG_RESULT(no) + + dnl # + dnl # 4.0-4.3, 4.5-4.7 API, get_user_pages_unlocked + dnl # + AC_MSG_CHECKING( + [whether get_user_pages_unlocked() takes task_struct]) + ZFS_LINUX_TEST_RESULT( + [get_user_pages_unlocked_task_struct], [ + AC_MSG_RESULT(yes) + AC_DEFINE( + HAVE_GET_USER_PAGES_UNLOCKED_TASK_STRUCT, 1, + [get_user_pages_unlocked() takes task_struct]) + ], [ + AC_MSG_RESULT(no) + + dnl # + dnl # 4.4 API, get_user_pages_unlocked + dnl # + AC_MSG_CHECKING( + [whether get_user_pages_unlocked() takes task_struct, gup_flags]) + ZFS_LINUX_TEST_RESULT( + [get_user_pages_unlocked_task_struct_gup_flags], [ + AC_MSG_RESULT(yes) + AC_DEFINE( + HAVE_GET_USER_PAGES_UNLOCKED_TASK_STRUCT_GUP_FLAGS, 1, + [get_user_pages_unlocked() takes task_struct, gup_flags]) + ], [ + AC_MSG_RESULT(no) + + dnl # + dnl # get_user_pages + dnl # + AC_MSG_CHECKING( + [whether get_user_pages() takes struct task_struct]) + ZFS_LINUX_TEST_RESULT( + [get_user_pages_task_struct], [ + AC_MSG_RESULT(yes) + AC_DEFINE( + HAVE_GET_USER_PAGES_TASK_STRUCT, 1, + [get_user_pages() takes task_struct]) + ], [ + dnl # + dnl # If we cannot map the user's + dnl # pages in then we cannot do + dnl # Direct I/O + dnl # + ZFS_LINUX_TEST_ERROR([Direct I/O]) + ]) + ]) + ]) + ]) + ]) +]) diff --git a/config/kernel-global_page_state.m4 b/config/kernel-global_page_state.m4 deleted file mode 100644 index 76f2bba202a1..000000000000 --- a/config/kernel-global_page_state.m4 +++ /dev/null @@ -1,128 +0,0 @@ -dnl # -dnl # 4.8 API change -dnl # -dnl # 75ef71840539 mm, vmstat: add infrastructure for per-node vmstats -dnl # 599d0c954f91 mm, vmscan: move LRU lists to node -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_GLOBAL_NODE_PAGE_STATE], [ - ZFS_LINUX_TEST_SRC([global_node_page_state], [ - #include - #include - ],[ - (void) global_node_page_state(0); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_GLOBAL_NODE_PAGE_STATE], [ - AC_MSG_CHECKING([whether global_node_page_state() exists]) - ZFS_LINUX_TEST_RESULT([global_node_page_state], [ - AC_MSG_RESULT(yes) - AC_DEFINE(ZFS_GLOBAL_NODE_PAGE_STATE, 1, - [global_node_page_state() exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # 4.14 API change -dnl # -dnl # c41f012ade0b mm: rename global_page_state to global_zone_page_state -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_GLOBAL_ZONE_PAGE_STATE], [ - ZFS_LINUX_TEST_SRC([global_zone_page_state], [ - #include - #include - ],[ - (void) global_zone_page_state(0); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_GLOBAL_ZONE_PAGE_STATE], [ - AC_MSG_CHECKING([whether global_zone_page_state() exists]) - ZFS_LINUX_TEST_RESULT([global_zone_page_state], [ - AC_MSG_RESULT(yes) - AC_DEFINE(ZFS_GLOBAL_ZONE_PAGE_STATE, 1, - [global_zone_page_state() exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # Create a define and autoconf variable for an enum member -dnl # -AC_DEFUN([ZFS_AC_KERNEL_ENUM_MEMBER], [ - AC_MSG_CHECKING([whether enum $2 contains $1]) - AS_IF([AC_TRY_COMMAND( - "${srcdir}/scripts/enum-extract.pl" "$2" "$3" | grep -Eqx $1)],[ - AC_MSG_RESULT([yes]) - AC_DEFINE(m4_join([_], [ZFS_ENUM], m4_toupper($2), $1), 1, - [enum $2 contains $1]) - m4_join([_], [ZFS_ENUM], m4_toupper($2), $1)=1 - ],[ - AC_MSG_RESULT([no]) - ]) -]) - -dnl # -dnl # Sanity check helpers -dnl # -AC_DEFUN([ZFS_AC_KERNEL_GLOBAL_PAGE_STATE_ENUM_ERROR],[ - AC_MSG_RESULT(no) - AC_MSG_RESULT([$1 in either node_stat_item or zone_stat_item: $2]) - ZFS_LINUX_TEST_ERROR([global page state]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_GLOBAL_PAGE_STATE_ENUM_CHECK], [ - enum_check_a="m4_join([_], [$ZFS_ENUM_NODE_STAT_ITEM], $1)" - enum_check_b="m4_join([_], [$ZFS_ENUM_ZONE_STAT_ITEM], $1)" - AS_IF([test -n "$enum_check_a" -a -n "$enum_check_b"],[ - ZFS_AC_KERNEL_GLOBAL_PAGE_STATE_ENUM_ERROR([$1], [DUPLICATE]) - ]) - AS_IF([test -z "$enum_check_a" -a -z "$enum_check_b"],[ - ZFS_AC_KERNEL_GLOBAL_PAGE_STATE_ENUM_ERROR([$1], [NOT FOUND]) - ]) -]) - -dnl # -dnl # Ensure the config tests are finding one and only one of each enum. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_GLOBAL_ZONE_PAGE_STATE_SANITY], [ - AC_MSG_CHECKING([whether global_page_state enums are sane]) - - ZFS_AC_KERNEL_GLOBAL_PAGE_STATE_ENUM_CHECK([NR_FILE_PAGES]) - ZFS_AC_KERNEL_GLOBAL_PAGE_STATE_ENUM_CHECK([NR_INACTIVE_ANON]) - ZFS_AC_KERNEL_GLOBAL_PAGE_STATE_ENUM_CHECK([NR_INACTIVE_FILE]) - - AC_MSG_RESULT(yes) -]) - -AC_DEFUN([ZFS_AC_KERNEL_SRC_GLOBAL_PAGE_STATE], [ - ZFS_AC_KERNEL_SRC_GLOBAL_NODE_PAGE_STATE - ZFS_AC_KERNEL_SRC_GLOBAL_ZONE_PAGE_STATE -]) - -dnl # -dnl # enum members in which we're interested -dnl # -AC_DEFUN([ZFS_AC_KERNEL_GLOBAL_PAGE_STATE], [ - ZFS_AC_KERNEL_GLOBAL_NODE_PAGE_STATE - ZFS_AC_KERNEL_GLOBAL_ZONE_PAGE_STATE - - ZFS_AC_KERNEL_ENUM_MEMBER([NR_FILE_PAGES], - [node_stat_item], [$LINUX/include/linux/mmzone.h]) - ZFS_AC_KERNEL_ENUM_MEMBER([NR_INACTIVE_ANON], - [node_stat_item], [$LINUX/include/linux/mmzone.h]) - ZFS_AC_KERNEL_ENUM_MEMBER([NR_INACTIVE_FILE], - [node_stat_item], [$LINUX/include/linux/mmzone.h]) - - ZFS_AC_KERNEL_ENUM_MEMBER([NR_FILE_PAGES], - [zone_stat_item], [$LINUX/include/linux/mmzone.h]) - ZFS_AC_KERNEL_ENUM_MEMBER([NR_INACTIVE_ANON], - [zone_stat_item], [$LINUX/include/linux/mmzone.h]) - ZFS_AC_KERNEL_ENUM_MEMBER([NR_INACTIVE_FILE], - [zone_stat_item], [$LINUX/include/linux/mmzone.h]) - - ZFS_AC_KERNEL_GLOBAL_ZONE_PAGE_STATE_SANITY -]) diff --git a/config/kernel-group-info.m4 b/config/kernel-group-info.m4 deleted file mode 100644 index 6941d62da017..000000000000 --- a/config/kernel-group-info.m4 +++ /dev/null @@ -1,22 +0,0 @@ -dnl # -dnl # 4.9 API change -dnl # group_info changed from 2d array via >blocks to 1d array via ->gid -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_GROUP_INFO_GID], [ - ZFS_LINUX_TEST_SRC([group_info_gid], [ - #include - ],[ - struct group_info gi __attribute__ ((unused)) = {}; - gi.gid[0] = KGIDT_INIT(0); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_GROUP_INFO_GID], [ - AC_MSG_CHECKING([whether group_info->gid exists]) - ZFS_LINUX_TEST_RESULT([group_info_gid], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_GROUP_INFO_GID, 1, [group_info->gid exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-hotplug.m4 b/config/kernel-hotplug.m4 deleted file mode 100644 index e796a6d2e8e8..000000000000 --- a/config/kernel-hotplug.m4 +++ /dev/null @@ -1,26 +0,0 @@ -dnl # -dnl # 4.6 API change -dnl # Added CPU hotplug APIs -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_CPU_HOTPLUG], [ - ZFS_LINUX_TEST_SRC([cpu_hotplug], [ - #include - ],[ - enum cpuhp_state state = CPUHP_ONLINE; - int (*fp)(unsigned int, struct hlist_node *) = NULL; - cpuhp_state_add_instance_nocalls(0, (struct hlist_node *)NULL); - cpuhp_state_remove_instance_nocalls(0, (struct hlist_node *)NULL); - cpuhp_setup_state_multi(state, "", fp, fp); - cpuhp_remove_multi_state(0); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_CPU_HOTPLUG], [ - AC_MSG_CHECKING([whether CPU hotplug APIs exist]) - ZFS_LINUX_TEST_RESULT([cpu_hotplug], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_CPU_HOTPLUG, 1, [yes]) - ],[ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-in-compat-syscall.m4 b/config/kernel-in-compat-syscall.m4 deleted file mode 100644 index baaac8c4fda2..000000000000 --- a/config/kernel-in-compat-syscall.m4 +++ /dev/null @@ -1,24 +0,0 @@ -dnl # -dnl # 4.5 API change -dnl # Added in_compat_syscall() which can be overridden on a per- -dnl # architecture basis. Prior to this is_compat_task() was the -dnl # provided interface. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_IN_COMPAT_SYSCALL], [ - ZFS_LINUX_TEST_SRC([in_compat_syscall], [ - #include - ],[ - in_compat_syscall(); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_IN_COMPAT_SYSCALL], [ - AC_MSG_CHECKING([whether in_compat_syscall() is available]) - ZFS_LINUX_TEST_RESULT([in_compat_syscall], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_IN_COMPAT_SYSCALL, 1, - [in_compat_syscall() is available]) - ],[ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-inode-getattr.m4 b/config/kernel-inode-getattr.m4 index 5f7ce1ad9a5d..73b8213109fb 100644 --- a/config/kernel-inode-getattr.m4 +++ b/config/kernel-inode-getattr.m4 @@ -57,20 +57,6 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_GETATTR], [ .getattr = test_getattr, }; ],[]) - - ZFS_LINUX_TEST_SRC([inode_operations_getattr_vfsmount], [ - #include - - static int test_getattr( - struct vfsmount *mnt, struct dentry *d, - struct kstat *k) - { return 0; } - - static const struct inode_operations - iops __attribute__ ((unused)) = { - .getattr = test_getattr, - }; - ],[]) ]) AC_DEFUN([ZFS_AC_KERNEL_INODE_GETATTR], [ @@ -105,18 +91,6 @@ AC_DEFUN([ZFS_AC_KERNEL_INODE_GETATTR], [ [iops->getattr() takes a path]) ],[ AC_MSG_RESULT(no) - - dnl # - dnl # Kernel < 4.11 test - dnl # - AC_MSG_CHECKING([whether iops->getattr() takes a vfsmount]) - ZFS_LINUX_TEST_RESULT([inode_operations_getattr_vfsmount], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_VFSMOUNT_IOPS_GETATTR, 1, - [iops->getattr() takes a vfsmount]) - ],[ - AC_MSG_RESULT(no) - ]) ]) ]) ]) diff --git a/config/kernel-inode-lock.m4 b/config/kernel-inode-lock.m4 deleted file mode 100644 index 5eb04af78771..000000000000 --- a/config/kernel-inode-lock.m4 +++ /dev/null @@ -1,24 +0,0 @@ -dnl # -dnl # 4.7 API change -dnl # i_mutex is changed to i_rwsem. Instead of directly using -dnl # i_mutex/i_rwsem, we should use inode_lock() and inode_lock_shared() -dnl # We test inode_lock_shared because inode_lock is introduced earlier. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_LOCK], [ - ZFS_LINUX_TEST_SRC([inode_lock], [ - #include - ],[ - struct inode *inode = NULL; - inode_lock_shared(inode); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_INODE_LOCK], [ - AC_MSG_CHECKING([whether inode_lock_shared() exists]) - ZFS_LINUX_TEST_RESULT([inode_lock], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_INODE_LOCK_SHARED, 1, [yes]) - ],[ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-inode-permission.m4 b/config/kernel-inode-permission.m4 index f7fc16439093..286f73bb047e 100644 --- a/config/kernel-inode-permission.m4 +++ b/config/kernel-inode-permission.m4 @@ -42,6 +42,8 @@ AC_DEFUN([ZFS_AC_KERNEL_PERMISSION], [ AC_DEFINE(HAVE_IOPS_PERMISSION_IDMAP, 1, [iops->permission() takes struct mnt_idmap*]) ],[ + AC_MSG_RESULT(no) + AC_MSG_CHECKING([whether iops->permission() takes struct user_namespace*]) ZFS_LINUX_TEST_RESULT([permission_userns], [ AC_MSG_RESULT(yes) diff --git a/config/kernel-inode-set-flags.m4 b/config/kernel-inode-set-flags.m4 deleted file mode 100644 index 133f666a9517..000000000000 --- a/config/kernel-inode-set-flags.m4 +++ /dev/null @@ -1,22 +0,0 @@ -dnl # -dnl # 3.15 API change -dnl # inode_set_flags introduced to set i_flags -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_SET_FLAGS], [ - ZFS_LINUX_TEST_SRC([inode_set_flags], [ - #include - ],[ - struct inode inode; - inode_set_flags(&inode, S_IMMUTABLE, S_IMMUTABLE); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_INODE_SET_FLAGS], [ - AC_MSG_CHECKING([whether inode_set_flags() exists]) - ZFS_LINUX_TEST_RESULT([inode_set_flags], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_INODE_SET_FLAGS, 1, [inode_set_flags() exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-inode-set-iversion.m4 b/config/kernel-inode-set-iversion.m4 deleted file mode 100644 index dd415de324a7..000000000000 --- a/config/kernel-inode-set-iversion.m4 +++ /dev/null @@ -1,23 +0,0 @@ -dnl # -dnl # 4.16 API change -dnl # inode_set_iversion introduced to set i_version -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_SET_IVERSION], [ - ZFS_LINUX_TEST_SRC([inode_set_iversion], [ - #include - ],[ - struct inode inode; - inode_set_iversion(&inode, 1); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_INODE_SET_IVERSION], [ - AC_MSG_CHECKING([whether inode_set_iversion() exists]) - ZFS_LINUX_TEST_RESULT([inode_set_iversion], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_INODE_SET_IVERSION, 1, - [inode_set_iversion() exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-inode-setattr.m4 b/config/kernel-inode-setattr.m4 index 69289e897be6..9a12acc95a3f 100644 --- a/config/kernel-inode-setattr.m4 +++ b/config/kernel-inode-setattr.m4 @@ -37,19 +37,6 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_SETATTR], [ .setattr = test_setattr, }; ],[]) - - ZFS_LINUX_TEST_SRC([inode_operations_setattr], [ - #include - - static int test_setattr( - struct dentry *de, struct iattr *ia) - { return 0; } - - static const struct inode_operations - iops __attribute__ ((unused)) = { - .setattr = test_setattr, - }; - ],[]) ]) AC_DEFUN([ZFS_AC_KERNEL_INODE_SETATTR], [ @@ -73,15 +60,6 @@ AC_DEFUN([ZFS_AC_KERNEL_INODE_SETATTR], [ [iops->setattr() takes struct user_namespace*]) ],[ AC_MSG_RESULT(no) - - AC_MSG_CHECKING([whether iops->setattr() exists]) - ZFS_LINUX_TEST_RESULT([inode_operations_setattr], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_IOPS_SETATTR, 1, - [iops->setattr() exists]) - ],[ - AC_MSG_RESULT(no) - ]) ]) ]) ]) diff --git a/config/kernel-inode-times.m4 b/config/kernel-inode-times.m4 index 4d861596ed0b..59988e937929 100644 --- a/config/kernel-inode-times.m4 +++ b/config/kernel-inode-times.m4 @@ -14,20 +14,6 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_TIMES], [ ts = timestamp_truncate(ts, &ip); ]) - dnl # - dnl # 4.18 API change - dnl # i_atime, i_mtime, and i_ctime changed from timespec to timespec64. - dnl # - ZFS_LINUX_TEST_SRC([inode_times], [ - #include - ],[ - struct inode ip; - struct timespec ts; - - memset(&ip, 0, sizeof(ip)); - ts = ip.i_mtime; - ]) - dnl # dnl # 6.6 API change dnl # i_ctime no longer directly accessible, must use @@ -106,15 +92,6 @@ AC_DEFUN([ZFS_AC_KERNEL_INODE_TIMES], [ AC_MSG_RESULT(no) ]) - AC_MSG_CHECKING([whether inode->i_*time's are timespec64]) - ZFS_LINUX_TEST_RESULT([inode_times], [ - AC_MSG_RESULT(no) - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_INODE_TIMESPEC64_TIMES, 1, - [inode->i_*time's are timespec64]) - ]) - AC_MSG_CHECKING([whether inode_get_ctime() exists]) ZFS_LINUX_TEST_RESULT([inode_get_ctime], [ AC_MSG_RESULT(yes) diff --git a/config/kernel-kmem-cache.m4 b/config/kernel-kmem-cache.m4 deleted file mode 100644 index 0e9fe9eb2a90..000000000000 --- a/config/kernel-kmem-cache.m4 +++ /dev/null @@ -1,41 +0,0 @@ -dnl # -dnl # grsecurity API change, -dnl # kmem_cache_create() with SLAB_USERCOPY flag replaced by -dnl # kmem_cache_create_usercopy(). -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_KMEM_CACHE_CREATE_USERCOPY], [ - ZFS_LINUX_TEST_SRC([kmem_cache_create_usercopy], [ - #include - static void ctor(void *foo) { /* fake ctor */ } - ],[ - struct kmem_cache *skc_linux_cache; - const char *name = "test"; - size_t size = 4096; - size_t align = 8; - unsigned long flags = 0; - size_t useroffset = 0; - size_t usersize = size - useroffset; - - skc_linux_cache = kmem_cache_create_usercopy( - name, size, align, flags, useroffset, usersize, ctor); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_KMEM_CACHE_CREATE_USERCOPY], [ - AC_MSG_CHECKING([whether kmem_cache_create_usercopy() exists]) - ZFS_LINUX_TEST_RESULT([kmem_cache_create_usercopy], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_KMEM_CACHE_CREATE_USERCOPY, 1, - [kmem_cache_create_usercopy() exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_SRC_KMEM_CACHE], [ - ZFS_AC_KERNEL_SRC_KMEM_CACHE_CREATE_USERCOPY -]) - -AC_DEFUN([ZFS_AC_KERNEL_KMEM_CACHE], [ - ZFS_AC_KERNEL_KMEM_CACHE_CREATE_USERCOPY -]) diff --git a/config/kernel-kmem.m4 b/config/kernel-kmem.m4 index 03c2a41fbdb2..f1c0d24125ce 100644 --- a/config/kernel-kmem.m4 +++ b/config/kernel-kmem.m4 @@ -57,31 +57,6 @@ AC_DEFUN([SPL_AC_DEBUG_KMEM_TRACKING], [ AC_MSG_RESULT([$enable_debug_kmem_tracking]) ]) -dnl # -dnl # 4.12 API, -dnl # Added kvmalloc allocation strategy -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_KVMALLOC], [ - ZFS_LINUX_TEST_SRC([kvmalloc], [ - #include - #include - ],[ - void *p __attribute__ ((unused)); - - p = kvmalloc(0, GFP_KERNEL); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_KVMALLOC], [ - AC_MSG_CHECKING([whether kvmalloc(ptr, flags) is available]) - ZFS_LINUX_TEST_RESULT([kvmalloc], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_KVMALLOC, 1, [kvmalloc exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - dnl # dnl # 5.8 API, dnl # __vmalloc PAGE_KERNEL removal @@ -106,4 +81,4 @@ AC_DEFUN([ZFS_AC_KERNEL_VMALLOC_PAGE_KERNEL], [ AC_MSG_RESULT(no) ]) ]) -- \ No newline at end of file +- diff --git a/config/kernel-kstrtoul.m4 b/config/kernel-kstrtoul.m4 deleted file mode 100644 index 8e4b542978a9..000000000000 --- a/config/kernel-kstrtoul.m4 +++ /dev/null @@ -1,21 +0,0 @@ -dnl # -dnl # 2.6.39 API change -dnl # Added kstrtoul() -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_KSTRTOUL], [ - ZFS_LINUX_TEST_SRC([kstrtoul], [ - #include - ],[ - int ret __attribute__ ((unused)) = kstrtoul(NULL, 10, NULL); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_KSTRTOUL], [ - AC_MSG_CHECKING([whether kstrtoul() exists]) - ZFS_LINUX_TEST_RESULT([kstrtoul], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_KSTRTOUL, 1, [kstrtoul() exists]) - ],[ - ZFS_LINUX_TEST_ERROR([kstrtoul()]) - ]) -]) diff --git a/config/kernel-ktime.m4 b/config/kernel-ktime.m4 deleted file mode 100644 index 64c3b5f90328..000000000000 --- a/config/kernel-ktime.m4 +++ /dev/null @@ -1,55 +0,0 @@ -dnl # -dnl # 4.18: ktime_get_coarse_real_ts64() replaces current_kernel_time64(). -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_KTIME_GET_COARSE_REAL_TS64], [ - ZFS_LINUX_TEST_SRC([ktime_get_coarse_real_ts64], [ - #include - ], [ - struct timespec64 ts; - ktime_get_coarse_real_ts64(&ts); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_KTIME_GET_COARSE_REAL_TS64], [ - AC_MSG_CHECKING([whether ktime_get_coarse_real_ts64() exists]) - ZFS_LINUX_TEST_RESULT([ktime_get_coarse_real_ts64], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_KTIME_GET_COARSE_REAL_TS64, 1, - [ktime_get_coarse_real_ts64() exists]) - ], [ - AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # 4.18: ktime_get_raw_ts64() replaces getrawmonotonic64(). -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_KTIME_GET_RAW_TS64], [ - ZFS_LINUX_TEST_SRC([ktime_get_raw_ts64], [ - #include - ], [ - struct timespec64 ts; - ktime_get_raw_ts64(&ts); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_KTIME_GET_RAW_TS64], [ - AC_MSG_CHECKING([whether ktime_get_raw_ts64() exists]) - ZFS_LINUX_TEST_RESULT([ktime_get_raw_ts64], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_KTIME_GET_RAW_TS64, 1, - [ktime_get_raw_ts64() exists]) - ], [ - AC_MSG_RESULT(no) - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_SRC_KTIME], [ - ZFS_AC_KERNEL_SRC_KTIME_GET_COARSE_REAL_TS64 - ZFS_AC_KERNEL_SRC_KTIME_GET_RAW_TS64 -]) - -AC_DEFUN([ZFS_AC_KERNEL_KTIME], [ - ZFS_AC_KERNEL_KTIME_GET_COARSE_REAL_TS64 - ZFS_AC_KERNEL_KTIME_GET_RAW_TS64 -]) diff --git a/config/kernel-lseek-execute.m4 b/config/kernel-lseek-execute.m4 deleted file mode 100644 index 652f611f8da4..000000000000 --- a/config/kernel-lseek-execute.m4 +++ /dev/null @@ -1,27 +0,0 @@ -dnl # -dnl # 3.11 API change -dnl # lseek_execute helper exported -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_LSEEK_EXECUTE], [ - ZFS_LINUX_TEST_SRC([lseek_execute], [ - #include - ], [ - struct file *fp __attribute__ ((unused)) = NULL; - struct inode *ip __attribute__ ((unused)) = NULL; - loff_t offset __attribute__ ((unused)) = 0; - loff_t maxsize __attribute__ ((unused)) = 0; - - lseek_execute(fp, ip, offset, maxsize); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_LSEEK_EXECUTE], [ - AC_MSG_CHECKING([whether lseek_execute() is available]) - ZFS_LINUX_TEST_RESULT_SYMBOL([lseek_execute], - [lseek_exclusive], [fs/read_write.c], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_LSEEK_EXECUTE, 1, [lseek_execute() is available]) - ], [ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-make-request-fn.m4 b/config/kernel-make-request-fn.m4 index 4c54bdd6d4a2..66d6a18cd976 100644 --- a/config/kernel-make-request-fn.m4 +++ b/config/kernel-make-request-fn.m4 @@ -2,14 +2,6 @@ dnl # dnl # Check for make_request_fn interface. dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_MAKE_REQUEST_FN], [ - ZFS_LINUX_TEST_SRC([make_request_fn_void], [ - #include - static void make_request(struct request_queue *q, - struct bio *bio) { return; } - ],[ - blk_queue_make_request(NULL, &make_request); - ]) - ZFS_LINUX_TEST_SRC([make_request_fn_blk_qc_t], [ #include static blk_qc_t make_request(struct request_queue *q, @@ -197,36 +189,20 @@ AC_DEFUN([ZFS_AC_KERNEL_MAKE_REQUEST_FN], [ AC_MSG_RESULT(no) dnl # - dnl # Linux 3.2 API Change - dnl # make_request_fn returns void. + dnl # Linux 4.4 API Change + dnl # make_request_fn returns blk_qc_t. dnl # AC_MSG_CHECKING( - [whether make_request_fn() returns void]) - ZFS_LINUX_TEST_RESULT([make_request_fn_void], [ + [whether make_request_fn() returns blk_qc_t]) + ZFS_LINUX_TEST_RESULT([make_request_fn_blk_qc_t], [ AC_MSG_RESULT(yes) - AC_DEFINE(MAKE_REQUEST_FN_RET, void, + AC_DEFINE(MAKE_REQUEST_FN_RET, blk_qc_t, [make_request_fn() return type]) - AC_DEFINE(HAVE_MAKE_REQUEST_FN_RET_VOID, 1, - [Noting that make_request_fn() returns void]) + AC_DEFINE(HAVE_MAKE_REQUEST_FN_RET_QC, 1, + [Noting that make_request_fn() ] + [returns blk_qc_t]) ],[ - AC_MSG_RESULT(no) - - dnl # - dnl # Linux 4.4 API Change - dnl # make_request_fn returns blk_qc_t. - dnl # - AC_MSG_CHECKING( - [whether make_request_fn() returns blk_qc_t]) - ZFS_LINUX_TEST_RESULT([make_request_fn_blk_qc_t], [ - AC_MSG_RESULT(yes) - AC_DEFINE(MAKE_REQUEST_FN_RET, blk_qc_t, - [make_request_fn() return type]) - AC_DEFINE(HAVE_MAKE_REQUEST_FN_RET_QC, 1, - [Noting that make_request_fn() ] - [returns blk_qc_t]) - ],[ - ZFS_LINUX_TEST_ERROR([make_request_fn]) - ]) + ZFS_LINUX_TEST_ERROR([make_request_fn]) ]) ]) ]) diff --git a/config/kernel-mkdir.m4 b/config/kernel-mkdir.m4 index 367f100094d3..8e084443c7b4 100644 --- a/config/kernel-mkdir.m4 +++ b/config/kernel-mkdir.m4 @@ -68,6 +68,8 @@ AC_DEFUN([ZFS_AC_KERNEL_MKDIR], [ AC_DEFINE(HAVE_IOPS_MKDIR_IDMAP, 1, [iops->mkdir() takes struct mnt_idmap*]) ],[ + AC_MSG_RESULT(no) + dnl # dnl # 5.12 API change dnl # The struct user_namespace arg was added as the first argument to @@ -80,15 +82,6 @@ AC_DEFUN([ZFS_AC_KERNEL_MKDIR], [ [iops->mkdir() takes struct user_namespace*]) ],[ AC_MSG_RESULT(no) - - AC_MSG_CHECKING([whether iops->mkdir() takes umode_t]) - ZFS_LINUX_TEST_RESULT([inode_operations_mkdir], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_MKDIR_UMODE_T, 1, - [iops->mkdir() takes umode_t]) - ],[ - ZFS_LINUX_TEST_ERROR([mkdir()]) - ]) ]) ]) ]) diff --git a/config/kernel-mm-pagemap.m4 b/config/kernel-mm-pagemap.m4 index 466b6fa07d9a..def6f5f4b3aa 100644 --- a/config/kernel-mm-pagemap.m4 +++ b/config/kernel-mm-pagemap.m4 @@ -21,8 +21,8 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_MM_PAGE_MAPPING], [ ZFS_LINUX_TEST_SRC([page_mapping], [ #include ],[ - struct page *p = NULL; - struct address_space *m = page_mapping(NULL); + struct address_space *m; + m = page_mapping(NULL); ]) ]) AC_DEFUN([ZFS_AC_KERNEL_MM_PAGE_MAPPING], [ diff --git a/config/kernel-percpu.m4 b/config/kernel-percpu.m4 index 5125dd5c5bb8..12e81892cb6b 100644 --- a/config/kernel-percpu.m4 +++ b/config/kernel-percpu.m4 @@ -1,55 +1,3 @@ -dnl # -dnl # 3.18 API change, -dnl # The function percpu_counter_init now must be passed a GFP mask. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_PERCPU_COUNTER_INIT], [ - ZFS_LINUX_TEST_SRC([percpu_counter_init_with_gfp], [ - #include - #include - ],[ - struct percpu_counter counter; - int error; - - error = percpu_counter_init(&counter, 0, GFP_KERNEL); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_PERCPU_COUNTER_INIT], [ - AC_MSG_CHECKING([whether percpu_counter_init() wants gfp_t]) - ZFS_LINUX_TEST_RESULT([percpu_counter_init_with_gfp], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_PERCPU_COUNTER_INIT_WITH_GFP, 1, - [percpu_counter_init() wants gfp_t]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # 4.13 API change, -dnl # __percpu_counter_add() was renamed to percpu_counter_add_batch(). -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_PERCPU_COUNTER_ADD_BATCH], [ - ZFS_LINUX_TEST_SRC([percpu_counter_add_batch], [ - #include - ],[ - struct percpu_counter counter; - - percpu_counter_add_batch(&counter, 1, 1); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_PERCPU_COUNTER_ADD_BATCH], [ - AC_MSG_CHECKING([whether percpu_counter_add_batch() is defined]) - ZFS_LINUX_TEST_RESULT([percpu_counter_add_batch], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_PERCPU_COUNTER_ADD_BATCH, 1, - [percpu_counter_add_batch() is defined]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - dnl # dnl # 5.10 API change, dnl # The "count" was moved into ref->data, from ref @@ -75,13 +23,9 @@ AC_DEFUN([ZFS_AC_KERNEL_PERCPU_REF_COUNT_IN_DATA], [ ]) ]) AC_DEFUN([ZFS_AC_KERNEL_SRC_PERCPU], [ - ZFS_AC_KERNEL_SRC_PERCPU_COUNTER_INIT - ZFS_AC_KERNEL_SRC_PERCPU_COUNTER_ADD_BATCH ZFS_AC_KERNEL_SRC_PERCPU_REF_COUNT_IN_DATA ]) AC_DEFUN([ZFS_AC_KERNEL_PERCPU], [ - ZFS_AC_KERNEL_PERCPU_COUNTER_INIT - ZFS_AC_KERNEL_PERCPU_COUNTER_ADD_BATCH ZFS_AC_KERNEL_PERCPU_REF_COUNT_IN_DATA ]) diff --git a/config/kernel-put-link.m4 b/config/kernel-put-link.m4 deleted file mode 100644 index 8ab318cbff8c..000000000000 --- a/config/kernel-put-link.m4 +++ /dev/null @@ -1,61 +0,0 @@ -dnl # -dnl # Supported symlink APIs -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_PUT_LINK], [ - ZFS_LINUX_TEST_SRC([put_link_cookie], [ - #include - static void put_link(struct inode *ip, void *cookie) - { return; } - static struct inode_operations - iops __attribute__ ((unused)) = { - .put_link = put_link, - }; - ],[]) - - ZFS_LINUX_TEST_SRC([put_link_nameidata], [ - #include - static void put_link(struct dentry *de, struct - nameidata *nd, void *ptr) { return; } - static struct inode_operations - iops __attribute__ ((unused)) = { - .put_link = put_link, - }; - ],[]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_PUT_LINK], [ - dnl # - dnl # 4.5 API change - dnl # get_link() uses delayed done, there is no put_link() interface. - dnl # This check initially uses the inode_operations_get_link result - dnl # - ZFS_LINUX_TEST_RESULT([inode_operations_get_link], [ - AC_DEFINE(HAVE_PUT_LINK_DELAYED, 1, [iops->put_link() delayed]) - ],[ - dnl # - dnl # 4.2 API change - dnl # This kernel retired the nameidata structure. - dnl # - AC_MSG_CHECKING([whether iops->put_link() passes cookie]) - ZFS_LINUX_TEST_RESULT([put_link_cookie], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_PUT_LINK_COOKIE, 1, - [iops->put_link() cookie]) - ],[ - AC_MSG_RESULT(no) - - dnl # - dnl # 2.6.32 API - dnl # - AC_MSG_CHECKING( - [whether iops->put_link() passes nameidata]) - ZFS_LINUX_TEST_RESULT([put_link_nameidata], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_PUT_LINK_NAMEIDATA, 1, - [iops->put_link() nameidata]) - ],[ - ZFS_LINUX_TEST_ERROR([put_link]) - ]) - ]) - ]) -]) diff --git a/config/kernel-rename.m4 b/config/kernel-rename.m4 index ce881502d1b1..1c47222bdc30 100644 --- a/config/kernel-rename.m4 +++ b/config/kernel-rename.m4 @@ -1,23 +1,4 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_RENAME], [ - dnl # - dnl # 3.9 (to 4.9) API change, - dnl # - dnl # A new version of iops->rename() was added (rename2) that takes a flag - dnl # argument (to support renameat2). However this separate function was - dnl # merged back into iops->rename() in Linux 4.9. - dnl # - ZFS_LINUX_TEST_SRC([inode_operations_rename2], [ - #include - static int rename2_fn(struct inode *sip, struct dentry *sdp, - struct inode *tip, struct dentry *tdp, - unsigned int flags) { return 0; } - - static const struct inode_operations - iops __attribute__ ((unused)) = { - .rename2 = rename2_fn, - }; - ],[]) - dnl # dnl # 4.9 API change, dnl # @@ -36,24 +17,6 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_RENAME], [ }; ],[]) - dnl # - dnl # EL7 compatibility - dnl # - dnl # EL7 has backported renameat2 support, but it's done by defining a - dnl # separate iops wrapper structure that takes the .renameat2 function. - dnl # - ZFS_LINUX_TEST_SRC([dir_inode_operations_wrapper_rename2], [ - #include - static int rename2_fn(struct inode *sip, struct dentry *sdp, - struct inode *tip, struct dentry *tdp, - unsigned int flags) { return 0; } - - static const struct inode_operations_wrapper - iops __attribute__ ((unused)) = { - .rename2 = rename2_fn, - }; - ],[]) - dnl # dnl # 5.12 API change, dnl # @@ -95,6 +58,8 @@ AC_DEFUN([ZFS_AC_KERNEL_RENAME], [ AC_DEFINE(HAVE_IOPS_RENAME_IDMAP, 1, [iops->rename() takes struct mnt_idmap*]) ],[ + AC_MSG_RESULT(no) + AC_MSG_CHECKING([whether iops->rename() takes struct user_namespace*]) ZFS_LINUX_TEST_RESULT([inode_operations_rename_userns], [ AC_MSG_RESULT(yes) @@ -103,30 +68,13 @@ AC_DEFUN([ZFS_AC_KERNEL_RENAME], [ ],[ AC_MSG_RESULT(no) - AC_MSG_CHECKING([whether iops->rename2() exists]) - ZFS_LINUX_TEST_RESULT([inode_operations_rename2], [ + AC_MSG_CHECKING([whether iops->rename() wants flags]) + ZFS_LINUX_TEST_RESULT([inode_operations_rename_flags], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_RENAME2, 1, [iops->rename2() exists]) + AC_DEFINE(HAVE_RENAME_WANTS_FLAGS, 1, + [iops->rename() wants flags]) ],[ AC_MSG_RESULT(no) - - AC_MSG_CHECKING([whether iops->rename() wants flags]) - ZFS_LINUX_TEST_RESULT([inode_operations_rename_flags], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_RENAME_WANTS_FLAGS, 1, - [iops->rename() wants flags]) - ],[ - AC_MSG_RESULT(no) - - AC_MSG_CHECKING([whether struct inode_operations_wrapper takes .rename2()]) - ZFS_LINUX_TEST_RESULT([dir_inode_operations_wrapper_rename2], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_RENAME2_OPERATIONS_WRAPPER, 1, - [struct inode_operations_wrapper takes .rename2()]) - ],[ - AC_MSG_RESULT(no) - ]) - ]) ]) ]) ]) diff --git a/config/kernel-rw.m4 b/config/kernel-rw.m4 deleted file mode 100644 index 85b47d5c6fc2..000000000000 --- a/config/kernel-rw.m4 +++ /dev/null @@ -1,69 +0,0 @@ -dnl # -dnl # 4.14 API change -dnl # kernel_write() which was introduced in 3.9 was updated to take -dnl # the offset as a pointer which is needed by vn_rdwr(). -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_WRITE], [ - ZFS_LINUX_TEST_SRC([kernel_write], [ - #include - ],[ - struct file *file = NULL; - const void *buf = NULL; - size_t count = 0; - loff_t *pos = NULL; - ssize_t ret; - - ret = kernel_write(file, buf, count, pos); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_WRITE], [ - AC_MSG_CHECKING([whether kernel_write() takes loff_t pointer]) - ZFS_LINUX_TEST_RESULT([kernel_write], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_KERNEL_WRITE_PPOS, 1, - [kernel_write() take loff_t pointer]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # 4.14 API change -dnl # kernel_read() which has existed for forever was updated to take -dnl # the offset as a pointer which is needed by vn_rdwr(). -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_READ], [ - ZFS_LINUX_TEST_SRC([kernel_read], [ - #include - ],[ - struct file *file = NULL; - void *buf = NULL; - size_t count = 0; - loff_t *pos = NULL; - ssize_t ret; - - ret = kernel_read(file, buf, count, pos); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_READ], [ - AC_MSG_CHECKING([whether kernel_read() takes loff_t pointer]) - ZFS_LINUX_TEST_RESULT([kernel_read], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_KERNEL_READ_PPOS, 1, - [kernel_read() take loff_t pointer]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_SRC_RW], [ - ZFS_AC_KERNEL_SRC_WRITE - ZFS_AC_KERNEL_SRC_READ -]) - -AC_DEFUN([ZFS_AC_KERNEL_RW], [ - ZFS_AC_KERNEL_WRITE - ZFS_AC_KERNEL_READ -]) diff --git a/config/kernel-rwsem.m4 b/config/kernel-rwsem.m4 deleted file mode 100644 index d3a64a8efa19..000000000000 --- a/config/kernel-rwsem.m4 +++ /dev/null @@ -1,60 +0,0 @@ -dnl # -dnl # 3.16 API Change -dnl # -dnl # rwsem-spinlock "->activity" changed to "->count" -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_RWSEM_ACTIVITY], [ - ZFS_LINUX_TEST_SRC([rwsem_activity], [ - #include - ],[ - struct rw_semaphore dummy_semaphore __attribute__ ((unused)); - dummy_semaphore.activity = 0; - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_RWSEM_ACTIVITY], [ - AC_MSG_CHECKING([whether struct rw_semaphore has member activity]) - ZFS_LINUX_TEST_RESULT([rwsem_activity], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_RWSEM_ACTIVITY, 1, - [struct rw_semaphore has member activity]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # 4.8 API Change -dnl # -dnl # rwsem "->count" changed to atomic_long_t type -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_RWSEM_ATOMIC_LONG_COUNT], [ - ZFS_LINUX_TEST_SRC([rwsem_atomic_long_count], [ - #include - ],[ - DECLARE_RWSEM(dummy_semaphore); - (void) atomic_long_read(&dummy_semaphore.count); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_RWSEM_ATOMIC_LONG_COUNT], [ - AC_MSG_CHECKING( - [whether struct rw_semaphore has atomic_long_t member count]) - ZFS_LINUX_TEST_RESULT([rwsem_atomic_long_count], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_RWSEM_ATOMIC_LONG_COUNT, 1, - [struct rw_semaphore has atomic_long_t member count]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_SRC_RWSEM], [ - ZFS_AC_KERNEL_SRC_RWSEM_ACTIVITY - ZFS_AC_KERNEL_SRC_RWSEM_ATOMIC_LONG_COUNT -]) - -AC_DEFUN([ZFS_AC_KERNEL_RWSEM], [ - ZFS_AC_KERNEL_RWSEM_ACTIVITY - ZFS_AC_KERNEL_RWSEM_ATOMIC_LONG_COUNT -]) diff --git a/config/kernel-sched.m4 b/config/kernel-sched.m4 index 17e49fbdf472..8ef4cc6ee4cc 100644 --- a/config/kernel-sched.m4 +++ b/config/kernel-sched.m4 @@ -20,63 +20,10 @@ AC_DEFUN([ZFS_AC_KERNEL_SCHED_RT_HEADER], [ ]) ]) -dnl # -dnl # 4.11 API change, -dnl # Moved things from linux/sched.h to linux/sched/signal.h -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_SCHED_SIGNAL_HEADER], [ - ZFS_LINUX_TEST_SRC([sched_signal_header], [ - #include - #include - ],[ - return 0; - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_SCHED_SIGNAL_HEADER], [ - AC_MSG_CHECKING([whether header linux/sched/signal.h exists]) - ZFS_LINUX_TEST_RESULT([sched_signal_header], [ - AC_DEFINE(HAVE_SCHED_SIGNAL_HEADER, 1, - [linux/sched/signal.h exists]) - AC_MSG_RESULT(yes) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # 3.19 API change -dnl # The io_schedule_timeout() function is present in all 2.6.32 kernels -dnl # but it was not exported until Linux 3.19. The RHEL 7.x kernels which -dnl # are based on a 3.10 kernel do export this symbol. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_IO_SCHEDULE_TIMEOUT], [ - ZFS_LINUX_TEST_SRC([io_schedule_timeout], [ - #include - ], [ - (void) io_schedule_timeout(1); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_IO_SCHEDULE_TIMEOUT], [ - AC_MSG_CHECKING([whether io_schedule_timeout() is available]) - ZFS_LINUX_TEST_RESULT_SYMBOL([io_schedule_timeout], - [io_schedule_timeout], [], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_IO_SCHEDULE_TIMEOUT, 1, [yes]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - AC_DEFUN([ZFS_AC_KERNEL_SRC_SCHED], [ ZFS_AC_KERNEL_SRC_SCHED_RT_HEADER - ZFS_AC_KERNEL_SRC_SCHED_SIGNAL_HEADER - ZFS_AC_KERNEL_SRC_IO_SCHEDULE_TIMEOUT ]) AC_DEFUN([ZFS_AC_KERNEL_SCHED], [ ZFS_AC_KERNEL_SCHED_RT_HEADER - ZFS_AC_KERNEL_SCHED_SIGNAL_HEADER - ZFS_AC_KERNEL_IO_SCHEDULE_TIMEOUT ]) diff --git a/config/kernel-setattr-prepare.m4 b/config/kernel-setattr-prepare.m4 index e02d6263e9c9..b10ddafc054b 100644 --- a/config/kernel-setattr-prepare.m4 +++ b/config/kernel-setattr-prepare.m4 @@ -51,6 +51,8 @@ AC_DEFUN([ZFS_AC_KERNEL_SETATTR_PREPARE], [ AC_DEFINE(HAVE_SETATTR_PREPARE_IDMAP, 1, [setattr_prepare() accepts mnt_idmap]) ], [ + AC_MSG_RESULT(no) + AC_MSG_CHECKING([whether setattr_prepare() is available and accepts struct user_namespace*]) ZFS_LINUX_TEST_RESULT_SYMBOL([setattr_prepare_userns], [setattr_prepare], [fs/attr.c], [ diff --git a/config/kernel-shrink.m4 b/config/kernel-shrink.m4 index 6580b08d5ff2..c4258f4e40d6 100644 --- a/config/kernel-shrink.m4 +++ b/config/kernel-shrink.m4 @@ -58,31 +58,10 @@ AC_DEFUN([ZFS_AC_KERNEL_SUPER_BLOCK_S_SHRINK], [ ]) dnl # -dnl # 3.12 API change -dnl # The nid member was added to struct shrink_control to support -dnl # NUMA-aware shrinkers. +dnl # 6.0 API change +dnl # register_shrinker() becomes a var-arg function that takes +dnl # a printf-style format string as args > 0 dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINK_CONTROL_HAS_NID], [ - ZFS_LINUX_TEST_SRC([shrink_control_nid], [ - #include - ],[ - struct shrink_control sc __attribute__ ((unused)); - unsigned long scnidsize __attribute__ ((unused)) = - sizeof(sc.nid); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_SHRINK_CONTROL_HAS_NID], [ - AC_MSG_CHECKING([whether shrink_control has nid]) - ZFS_LINUX_TEST_RESULT([shrink_control_nid], [ - AC_MSG_RESULT(yes) - AC_DEFINE(SHRINK_CONTROL_HAS_NID, 1, - [struct shrink_control has nid]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - AC_DEFUN([ZFS_AC_KERNEL_SRC_REGISTER_SHRINKER_VARARG], [ ZFS_LINUX_TEST_SRC([register_shrinker_vararg], [ #include @@ -98,30 +77,14 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_REGISTER_SHRINKER_VARARG], [ ]) ]) -AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINKER_CALLBACK], [ - ZFS_LINUX_TEST_SRC([shrinker_cb_shrink_control], [ - #include - static int shrinker_cb(struct shrinker *shrink, - struct shrink_control *sc) { return 0; } - ],[ - struct shrinker cache_shrinker = { - .shrink = shrinker_cb, - .seeks = DEFAULT_SEEKS, - }; - register_shrinker(&cache_shrinker); - ]) - - ZFS_LINUX_TEST_SRC([shrinker_cb_shrink_control_split], [ - #include - static unsigned long shrinker_cb(struct shrinker *shrink, - struct shrink_control *sc) { return 0; } +AC_DEFUN([ZFS_AC_KERNEL_REGISTER_SHRINKER_VARARG],[ + AC_MSG_CHECKING([whether new var-arg register_shrinker() exists]) + ZFS_LINUX_TEST_RESULT([register_shrinker_vararg], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_REGISTER_SHRINKER_VARARG, 1, + [register_shrinker is vararg]) ],[ - struct shrinker cache_shrinker = { - .count_objects = shrinker_cb, - .scan_objects = shrinker_cb, - .seeks = DEFAULT_SEEKS, - }; - register_shrinker(&cache_shrinker); + AC_MSG_RESULT(no) ]) ]) @@ -144,117 +107,25 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINKER_REGISTER], [ ]) ]) -AC_DEFUN([ZFS_AC_KERNEL_SHRINKER_CALLBACK],[ - dnl # - dnl # 6.0 API change - dnl # register_shrinker() becomes a var-arg function that takes - dnl # a printf-style format string as args > 0 - dnl # - AC_MSG_CHECKING([whether new var-arg register_shrinker() exists]) - ZFS_LINUX_TEST_RESULT([register_shrinker_vararg], [ +AC_DEFUN([ZFS_AC_KERNEL_SHRINKER_REGISTER], [ + AC_MSG_CHECKING([whether shrinker_register() exists]) + ZFS_LINUX_TEST_RESULT([shrinker_register], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_REGISTER_SHRINKER_VARARG, 1, - [register_shrinker is vararg]) - - dnl # We assume that the split shrinker callback exists if the - dnl # vararg register_shrinker() exists, because the latter is - dnl # a much more recent addition, and the macro test for the - dnl # var-arg version only works if the callback is split - AC_DEFINE(HAVE_SPLIT_SHRINKER_CALLBACK, 1, - [cs->count_objects exists]) - ],[ + AC_DEFINE(HAVE_SHRINKER_REGISTER, 1, [shrinker_register exists]) + ], [ AC_MSG_RESULT(no) - dnl # - dnl # 3.0 - 3.11 API change - dnl # cs->shrink(struct shrinker *, struct shrink_control *sc) - dnl # - AC_MSG_CHECKING([whether new 2-argument shrinker exists]) - ZFS_LINUX_TEST_RESULT([shrinker_cb_shrink_control], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_SINGLE_SHRINKER_CALLBACK, 1, - [new shrinker callback wants 2 args]) - ],[ - AC_MSG_RESULT(no) - - dnl # - dnl # 3.12 API change, - dnl # cs->shrink() is logically split in to - dnl # cs->count_objects() and cs->scan_objects() - dnl # - AC_MSG_CHECKING( - [whether cs->count_objects callback exists]) - ZFS_LINUX_TEST_RESULT( - [shrinker_cb_shrink_control_split],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_SPLIT_SHRINKER_CALLBACK, 1, - [cs->count_objects exists]) - ],[ - AC_MSG_RESULT(no) - - AC_MSG_CHECKING( - [whether shrinker_register exists]) - ZFS_LINUX_TEST_RESULT([shrinker_register], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_SHRINKER_REGISTER, 1, - [shrinker_register exists]) - - dnl # We assume that the split shrinker - dnl # callback exists if - dnl # shrinker_register() exists, - dnl # because the latter is a much more - dnl # recent addition, and the macro - dnl # test for shrinker_register() only - dnl # works if the callback is split - AC_DEFINE(HAVE_SPLIT_SHRINKER_CALLBACK, - 1, [cs->count_objects exists]) - ],[ - AC_MSG_RESULT(no) - ZFS_LINUX_TEST_ERROR([shrinker]) - ]) - ]) - ]) - ]) -]) - -dnl # -dnl # 2.6.39 API change, -dnl # Shrinker adjust to use common shrink_control structure. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINK_CONTROL_STRUCT], [ - ZFS_LINUX_TEST_SRC([shrink_control_struct], [ - #include - ],[ - struct shrink_control sc __attribute__ ((unused)); - - sc.nr_to_scan = 0; - sc.gfp_mask = GFP_KERNEL; - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_SHRINK_CONTROL_STRUCT], [ - AC_MSG_CHECKING([whether struct shrink_control exists]) - ZFS_LINUX_TEST_RESULT([shrink_control_struct], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_SHRINK_CONTROL_STRUCT, 1, - [struct shrink_control exists]) - ],[ - ZFS_LINUX_TEST_ERROR([shrink_control]) ]) ]) AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINKER], [ ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_SHRINK ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_SHRINK_PTR - ZFS_AC_KERNEL_SRC_SHRINK_CONTROL_HAS_NID - ZFS_AC_KERNEL_SRC_SHRINKER_CALLBACK - ZFS_AC_KERNEL_SRC_SHRINK_CONTROL_STRUCT ZFS_AC_KERNEL_SRC_REGISTER_SHRINKER_VARARG ZFS_AC_KERNEL_SRC_SHRINKER_REGISTER ]) AC_DEFUN([ZFS_AC_KERNEL_SHRINKER], [ ZFS_AC_KERNEL_SUPER_BLOCK_S_SHRINK - ZFS_AC_KERNEL_SHRINK_CONTROL_HAS_NID - ZFS_AC_KERNEL_SHRINKER_CALLBACK - ZFS_AC_KERNEL_SHRINK_CONTROL_STRUCT + ZFS_AC_KERNEL_REGISTER_SHRINKER_VARARG + ZFS_AC_KERNEL_SHRINKER_REGISTER ]) diff --git a/config/kernel-signal-stop.m4 b/config/kernel-signal-stop.m4 deleted file mode 100644 index 6cb86e7c4cde..000000000000 --- a/config/kernel-signal-stop.m4 +++ /dev/null @@ -1,21 +0,0 @@ -dnl # -dnl # 4.4 API change -dnl # Added kernel_signal_stop -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_SIGNAL_STOP], [ - ZFS_LINUX_TEST_SRC([signal_stop], [ - #include - ],[ - kernel_signal_stop(); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_SIGNAL_STOP], [ - AC_MSG_CHECKING([whether signal_stop() exists]) - ZFS_LINUX_TEST_RESULT([signal_stop], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_SIGNAL_STOP, 1, [signal_stop() exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-special-state.m4 b/config/kernel-special-state.m4 deleted file mode 100644 index aa60aabebc43..000000000000 --- a/config/kernel-special-state.m4 +++ /dev/null @@ -1,21 +0,0 @@ -dnl # -dnl # 4.17 API change -dnl # Added set_special_state() function -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_SET_SPECIAL_STATE], [ - ZFS_LINUX_TEST_SRC([set_special_state], [ - #include - ],[ - set_special_state(TASK_STOPPED); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_SET_SPECIAL_STATE], [ - AC_MSG_CHECKING([whether set_special_state() exists]) - ZFS_LINUX_TEST_RESULT([set_special_state], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_SET_SPECIAL_STATE, 1, [set_special_state() exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-strlcpy.m4 b/config/kernel-strlcpy.m4 index c31cf52d78b0..d50b0035e9d9 100644 --- a/config/kernel-strlcpy.m4 +++ b/config/kernel-strlcpy.m4 @@ -1,6 +1,5 @@ dnl # -dnl # 6.8.x replaced strlcpy with strscpy. Check for both so we can provide -dnl # appropriate fallbacks. +dnl # 6.8 removed strlcpy. dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_STRLCPY], [ ZFS_LINUX_TEST_SRC([kernel_has_strlcpy], [ @@ -13,17 +12,6 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_STRLCPY], [ ]) ]) -AC_DEFUN([ZFS_AC_KERNEL_SRC_STRSCPY], [ - ZFS_LINUX_TEST_SRC([kernel_has_strscpy], [ - #include - ], [ - const char *src = "goodbye"; - char dst[32]; - ssize_t len; - len = strscpy(dst, src, sizeof (dst)); - ]) -]) - AC_DEFUN([ZFS_AC_KERNEL_STRLCPY], [ AC_MSG_CHECKING([whether strlcpy() exists]) ZFS_LINUX_TEST_RESULT([kernel_has_strlcpy], [ @@ -34,14 +22,3 @@ AC_DEFUN([ZFS_AC_KERNEL_STRLCPY], [ AC_MSG_RESULT([no]) ]) ]) - -AC_DEFUN([ZFS_AC_KERNEL_STRSCPY], [ - AC_MSG_CHECKING([whether strscpy() exists]) - ZFS_LINUX_TEST_RESULT([kernel_has_strscpy], [ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_KERNEL_STRSCPY, 1, - [strscpy() exists]) - ], [ - AC_MSG_RESULT([no]) - ]) -]) diff --git a/config/kernel-super-userns.m4 b/config/kernel-super-userns.m4 deleted file mode 100644 index 1ad35f2d19ba..000000000000 --- a/config/kernel-super-userns.m4 +++ /dev/null @@ -1,25 +0,0 @@ -dnl # -dnl # 4.8 API change -dnl # struct user_namespace was added to struct super_block as -dnl # super->s_user_ns member -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_SUPER_USER_NS], [ - ZFS_LINUX_TEST_SRC([super_user_ns], [ - #include - #include - ], [ - struct super_block super; - super.s_user_ns = (struct user_namespace *)NULL; - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_SUPER_USER_NS], [ - AC_MSG_CHECKING([whether super_block->s_user_ns exists]) - ZFS_LINUX_TEST_RESULT([super_user_ns], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_SUPER_USER_NS, 1, - [super_block->s_user_ns exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-symlink.m4 b/config/kernel-symlink.m4 index 804fceab28f0..fb6d23f61cbf 100644 --- a/config/kernel-symlink.m4 +++ b/config/kernel-symlink.m4 @@ -41,6 +41,8 @@ AC_DEFUN([ZFS_AC_KERNEL_SYMLINK], [ AC_DEFINE(HAVE_IOPS_SYMLINK_IDMAP, 1, [iops->symlink() takes struct mnt_idmap*]) ],[ + AC_MSG_RESULT(no) + AC_MSG_CHECKING([whether iops->symlink() takes struct user_namespace*]) ZFS_LINUX_TEST_RESULT([symlink_userns], [ AC_MSG_RESULT(yes) diff --git a/config/kernel-timer.m4 b/config/kernel-timer.m4 deleted file mode 100644 index c710e804be0b..000000000000 --- a/config/kernel-timer.m4 +++ /dev/null @@ -1,75 +0,0 @@ -dnl # 4.14-rc3 API change -dnl # https://lwn.net/Articles/735887/ -dnl # -dnl # Check if timer_list.func get passed a timer_list or an unsigned long -dnl # (older kernels). Also sanity check the from_timer() and timer_setup() -dnl # macros are available as well, since they will be used in the same newer -dnl # kernels that support the new timer_list.func signature. -dnl # -dnl # Also check for the existence of flags in struct timer_list, they were -dnl # added in 4.1-rc8 via 0eeda71bc30d. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_TIMER_SETUP], [ - ZFS_LINUX_TEST_SRC([timer_setup], [ - #include - - struct my_task_timer { - struct timer_list timer; - int data; - }; - - static void task_expire(struct timer_list *tl) - { - struct my_task_timer *task_timer = - from_timer(task_timer, tl, timer); - task_timer->data = 42; - } - ],[ - struct my_task_timer task_timer; - timer_setup(&task_timer.timer, task_expire, 0); - ]) - - ZFS_LINUX_TEST_SRC([timer_list_function], [ - #include - static void task_expire(struct timer_list *tl) {} - ],[ - struct timer_list tl; - tl.function = task_expire; - ]) - - ZFS_LINUX_TEST_SRC([timer_list_flags], [ - #include - ],[ - struct timer_list tl; - tl.flags = 2; - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_TIMER_SETUP], [ - AC_MSG_CHECKING([whether timer_setup() is available]) - ZFS_LINUX_TEST_RESULT([timer_setup], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_KERNEL_TIMER_SETUP, 1, - [timer_setup() is available]) - ],[ - AC_MSG_RESULT(no) - ]) - - AC_MSG_CHECKING([whether timer function expects timer_list]) - ZFS_LINUX_TEST_RESULT([timer_list_function], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_KERNEL_TIMER_FUNCTION_TIMER_LIST, 1, - [timer_list.function gets a timer_list]) - ],[ - AC_MSG_RESULT(no) - ]) - - AC_MSG_CHECKING([whether struct timer_list has flags]) - ZFS_LINUX_TEST_RESULT([timer_list_flags], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_KERNEL_TIMER_LIST_FLAGS, 1, - [struct timer_list has a flags member]) - ],[ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-tmpfile.m4 b/config/kernel-tmpfile.m4 index 7439514186e4..a711d67ed558 100644 --- a/config/kernel-tmpfile.m4 +++ b/config/kernel-tmpfile.m4 @@ -59,23 +59,19 @@ AC_DEFUN([ZFS_AC_KERNEL_TMPFILE], [ AC_MSG_CHECKING([whether i_op->tmpfile() exists]) ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile_mnt_idmap], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_TMPFILE, 1, [i_op->tmpfile() exists]) AC_DEFINE(HAVE_TMPFILE_IDMAP, 1, [i_op->tmpfile() has mnt_idmap]) ], [ ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_TMPFILE, 1, [i_op->tmpfile() exists]) AC_DEFINE(HAVE_TMPFILE_USERNS, 1, [i_op->tmpfile() has userns]) ],[ ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile_dentry_userns], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_TMPFILE, 1, [i_op->tmpfile() exists]) AC_DEFINE(HAVE_TMPFILE_USERNS, 1, [i_op->tmpfile() has userns]) AC_DEFINE(HAVE_TMPFILE_DENTRY, 1, [i_op->tmpfile() uses old dentry signature]) ],[ ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile_dentry], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_TMPFILE, 1, [i_op->tmpfile() exists]) AC_DEFINE(HAVE_TMPFILE_DENTRY, 1, [i_op->tmpfile() uses old dentry signature]) ],[ ZFS_LINUX_REQUIRE_API([i_op->tmpfile()], [3.11]) diff --git a/config/kernel-user-ns-inum.m4 b/config/kernel-user-ns-inum.m4 deleted file mode 100644 index 2207a4aa6921..000000000000 --- a/config/kernel-user-ns-inum.m4 +++ /dev/null @@ -1,23 +0,0 @@ -dnl # -dnl # 3.18 API change -dnl # struct user_namespace inum moved from .proc_inum to .ns.inum. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_USER_NS_COMMON_INUM], [ - ZFS_LINUX_TEST_SRC([user_ns_common_inum], [ - #include - ], [ - struct user_namespace uns; - uns.ns.inum = 0; - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_USER_NS_COMMON_INUM], [ - AC_MSG_CHECKING([whether user_namespace->ns.inum exists]) - ZFS_LINUX_TEST_RESULT([user_ns_common_inum], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_USER_NS_COMMON_INUM, 1, - [user_namespace->ns.inum exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-userns-capabilities.m4 b/config/kernel-userns-capabilities.m4 index 026503623a2b..f4e24fb1606a 100644 --- a/config/kernel-userns-capabilities.m4 +++ b/config/kernel-userns-capabilities.m4 @@ -19,33 +19,6 @@ AC_DEFUN([ZFS_AC_KERNEL_NS_CAPABLE], [ ]) ]) -dnl # -dnl # 4.10 API change -dnl # has_capability() was exported. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_HAS_CAPABILITY], [ - ZFS_LINUX_TEST_SRC([has_capability], [ - #include - ],[ - struct task_struct *task = NULL; - int cap = 0; - bool result __attribute__ ((unused)); - - result = has_capability(task, cap); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_HAS_CAPABILITY], [ - AC_MSG_CHECKING([whether has_capability() is available]) - ZFS_LINUX_TEST_RESULT_SYMBOL([has_capability], - [has_capability], [kernel/capability.c], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_HAS_CAPABILITY, 1, [has_capability() is available]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - dnl # dnl # 2.6.39 API change dnl # struct user_namespace was added to struct cred_t as cred->user_ns member diff --git a/config/kernel-vfs-direct_IO.m4 b/config/kernel-vfs-direct_IO.m4 index 7b7b91f979f9..17605a13fdef 100644 --- a/config/kernel-vfs-direct_IO.m4 +++ b/config/kernel-vfs-direct_IO.m4 @@ -1,5 +1,5 @@ dnl # -dnl # Check for direct IO interfaces. +dnl # Check for Direct I/O interfaces. dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_DIRECT_IO], [ ZFS_LINUX_TEST_SRC([direct_io_iter], [ @@ -25,31 +25,6 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_DIRECT_IO], [ .direct_IO = test_direct_IO, }; ],[]) - - ZFS_LINUX_TEST_SRC([direct_io_iter_rw_offset], [ - #include - - static ssize_t test_direct_IO(int rw, struct kiocb *kiocb, - struct iov_iter *iter, loff_t offset) { return 0; } - - static const struct address_space_operations - aops __attribute__ ((unused)) = { - .direct_IO = test_direct_IO, - }; - ],[]) - - ZFS_LINUX_TEST_SRC([direct_io_iovec], [ - #include - - static ssize_t test_direct_IO(int rw, struct kiocb *kiocb, - const struct iovec *iov, loff_t offset, - unsigned long nr_segs) { return 0; } - - static const struct address_space_operations - aops __attribute__ ((unused)) = { - .direct_IO = test_direct_IO, - }; - ],[]) ]) AC_DEFUN([ZFS_AC_KERNEL_VFS_DIRECT_IO], [ @@ -76,34 +51,7 @@ AC_DEFUN([ZFS_AC_KERNEL_VFS_DIRECT_IO], [ ],[ AC_MSG_RESULT([no]) - - dnl # - dnl # Linux 3.16.x API change - dnl # - AC_MSG_CHECKING( - [whether aops->direct_IO() uses rw and offset]) - ZFS_LINUX_TEST_RESULT([direct_io_iter_rw_offset], [ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_VFS_DIRECT_IO_ITER_RW_OFFSET, 1, - [aops->direct_IO() uses iov_iter with ] - [rw and offset]) - ],[ - AC_MSG_RESULT([no]) - - dnl # - dnl # Ancient Linux API (predates git) - dnl # - AC_MSG_CHECKING( - [whether aops->direct_IO() uses iovec]) - ZFS_LINUX_TEST_RESULT([direct_io_iovec], [ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_VFS_DIRECT_IO_IOVEC, 1, - [aops->direct_IO() uses iovec]) - ],[ - ZFS_LINUX_TEST_ERROR([direct IO]) - AC_MSG_RESULT([no]) - ]) - ]) + ZFS_LINUX_TEST_ERROR([Direct I/O]) ]) ]) ]) diff --git a/config/kernel-vfs-extended-file_range.m4 b/config/kernel-vfs-extended-file_range.m4 deleted file mode 100644 index a2622313129e..000000000000 --- a/config/kernel-vfs-extended-file_range.m4 +++ /dev/null @@ -1,50 +0,0 @@ -dnl # -dnl # EL7 have backported copy_file_range and clone_file_range and -dnl # added them to an "extended" file_operations struct. -dnl # -dnl # We're testing for both functions in one here, because they will only -dnl # ever appear together and we don't want to match a similar method in -dnl # some future vendor kernel. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_FILE_OPERATIONS_EXTEND], [ - ZFS_LINUX_TEST_SRC([vfs_file_operations_extend], [ - #include - - static ssize_t test_copy_file_range(struct file *src_file, - loff_t src_off, struct file *dst_file, loff_t dst_off, - size_t len, unsigned int flags) { - (void) src_file; (void) src_off; - (void) dst_file; (void) dst_off; - (void) len; (void) flags; - return (0); - } - - static int test_clone_file_range(struct file *src_file, - loff_t src_off, struct file *dst_file, loff_t dst_off, - u64 len) { - (void) src_file; (void) src_off; - (void) dst_file; (void) dst_off; - (void) len; - return (0); - } - - static const struct file_operations_extend - fops __attribute__ ((unused)) = { - .kabi_fops = {}, - .copy_file_range = test_copy_file_range, - .clone_file_range = test_clone_file_range, - }; - ],[]) -]) -AC_DEFUN([ZFS_AC_KERNEL_VFS_FILE_OPERATIONS_EXTEND], [ - AC_MSG_CHECKING([whether file_operations_extend takes \ -.copy_file_range() and .clone_file_range()]) - ZFS_LINUX_TEST_RESULT([vfs_file_operations_extend], [ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_VFS_FILE_OPERATIONS_EXTEND, 1, - [file_operations_extend takes .copy_file_range() - and .clone_file_range()]) - ],[ - AC_MSG_RESULT([no]) - ]) -]) diff --git a/config/kernel-vfs-file_range.m4 b/config/kernel-vfs-file_range.m4 index 8a5cbe2eeeed..936f7b4eba4c 100644 --- a/config/kernel-vfs-file_range.m4 +++ b/config/kernel-vfs-file_range.m4 @@ -19,36 +19,6 @@ dnl # dnl # 6.8: generic_copy_file_range() removed, replaced by dnl # splice_copy_file_range() dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_COPY_FILE_RANGE], [ - ZFS_LINUX_TEST_SRC([vfs_copy_file_range], [ - #include - - static ssize_t test_copy_file_range(struct file *src_file, - loff_t src_off, struct file *dst_file, loff_t dst_off, - size_t len, unsigned int flags) { - (void) src_file; (void) src_off; - (void) dst_file; (void) dst_off; - (void) len; (void) flags; - return (0); - } - - static const struct file_operations - fops __attribute__ ((unused)) = { - .copy_file_range = test_copy_file_range, - }; - ],[]) -]) -AC_DEFUN([ZFS_AC_KERNEL_VFS_COPY_FILE_RANGE], [ - AC_MSG_CHECKING([whether fops->copy_file_range() is available]) - ZFS_LINUX_TEST_RESULT([vfs_copy_file_range], [ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_VFS_COPY_FILE_RANGE, 1, - [fops->copy_file_range() is available]) - ],[ - AC_MSG_RESULT([no]) - ]) -]) - AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_GENERIC_COPY_FILE_RANGE], [ ZFS_LINUX_TEST_SRC([generic_copy_file_range], [ #include diff --git a/config/kernel-vfs-getattr.m4 b/config/kernel-vfs-getattr.m4 deleted file mode 100644 index eb07853cc4b9..000000000000 --- a/config/kernel-vfs-getattr.m4 +++ /dev/null @@ -1,86 +0,0 @@ -dnl # -dnl # 4.11 API, a528d35e@torvalds/linux -dnl # vfs_getattr(const struct path *p, struct kstat *s, u32 m, unsigned int f) -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_GETATTR_4ARGS], [ - ZFS_LINUX_TEST_SRC([vfs_getattr_4args], [ - #include - ],[ - vfs_getattr((const struct path *)NULL, - (struct kstat *)NULL, - (u32)0, - (unsigned int)0); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_VFS_GETATTR_4ARGS], [ - AC_MSG_CHECKING([whether vfs_getattr() wants 4 args]) - ZFS_LINUX_TEST_RESULT([vfs_getattr_4args], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_4ARGS_VFS_GETATTR, 1, - [vfs_getattr wants 4 args]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # 3.9 API -dnl # vfs_getattr(struct path *p, struct kstat *s) -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_GETATTR_2ARGS], [ - ZFS_LINUX_TEST_SRC([vfs_getattr_2args], [ - #include - ],[ - vfs_getattr((struct path *) NULL, - (struct kstat *)NULL); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_VFS_GETATTR_2ARGS], [ - AC_MSG_CHECKING([whether vfs_getattr() wants 2 args]) - ZFS_LINUX_TEST_RESULT([vfs_getattr_2args], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_2ARGS_VFS_GETATTR, 1, - [vfs_getattr wants 2 args]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # <3.9 API -dnl # vfs_getattr(struct vfsmount *v, struct dentry *d, struct kstat *k) -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_GETATTR_3ARGS], [ - ZFS_LINUX_TEST_SRC([vfs_getattr_3args], [ - #include - ],[ - vfs_getattr((struct vfsmount *)NULL, - (struct dentry *)NULL, - (struct kstat *)NULL); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_VFS_GETATTR_3ARGS], [ - AC_MSG_CHECKING([whether vfs_getattr() wants 3 args]) - ZFS_LINUX_TEST_RESULT([vfs_getattr_3args], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_3ARGS_VFS_GETATTR, 1, - [vfs_getattr wants 3 args]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_GETATTR], [ - ZFS_AC_KERNEL_SRC_VFS_GETATTR_4ARGS - ZFS_AC_KERNEL_SRC_VFS_GETATTR_2ARGS - ZFS_AC_KERNEL_SRC_VFS_GETATTR_3ARGS -]) - -AC_DEFUN([ZFS_AC_KERNEL_VFS_GETATTR], [ - ZFS_AC_KERNEL_VFS_GETATTR_4ARGS - ZFS_AC_KERNEL_VFS_GETATTR_2ARGS - ZFS_AC_KERNEL_VFS_GETATTR_3ARGS -]) diff --git a/config/kernel-vfs-iov_iter.m4 b/config/kernel-vfs-iov_iter.m4 index ff560ff3eef0..88c22a555d55 100644 --- a/config/kernel-vfs-iov_iter.m4 +++ b/config/kernel-vfs-iov_iter.m4 @@ -2,44 +2,6 @@ dnl # dnl # Check for available iov_iter functionality. dnl # AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_IOV_ITER], [ - ZFS_LINUX_TEST_SRC([iov_iter_types], [ - #include - #include - ],[ - int type __attribute__ ((unused)) = ITER_KVEC; - ]) - - ZFS_LINUX_TEST_SRC([iov_iter_advance], [ - #include - #include - ],[ - struct iov_iter iter = { 0 }; - size_t advance = 512; - - iov_iter_advance(&iter, advance); - ]) - - ZFS_LINUX_TEST_SRC([iov_iter_revert], [ - #include - #include - ],[ - struct iov_iter iter = { 0 }; - size_t revert = 512; - - iov_iter_revert(&iter, revert); - ]) - - ZFS_LINUX_TEST_SRC([iov_iter_fault_in_readable], [ - #include - #include - ],[ - struct iov_iter iter = { 0 }; - size_t size = 512; - int error __attribute__ ((unused)); - - error = iov_iter_fault_in_readable(&iter, size); - ]) - ZFS_LINUX_TEST_SRC([fault_in_iov_iter_readable], [ #include #include @@ -51,38 +13,32 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_IOV_ITER], [ error = fault_in_iov_iter_readable(&iter, size); ]) - ZFS_LINUX_TEST_SRC([iov_iter_count], [ - #include + ZFS_LINUX_TEST_SRC([iov_iter_get_pages2], [ #include - ],[ + ], [ struct iov_iter iter = { 0 }; - size_t bytes __attribute__ ((unused)); + struct page **pages = NULL; + size_t maxsize = 4096; + unsigned maxpages = 1; + size_t start; + size_t ret __attribute__ ((unused)); - bytes = iov_iter_count(&iter); + ret = iov_iter_get_pages2(&iter, pages, maxsize, maxpages, + &start); ]) - ZFS_LINUX_TEST_SRC([copy_to_iter], [ - #include + ZFS_LINUX_TEST_SRC([iov_iter_get_pages], [ #include - ],[ + ], [ struct iov_iter iter = { 0 }; - char buf[512] = { 0 }; - size_t size = 512; - size_t bytes __attribute__ ((unused)); + struct page **pages = NULL; + size_t maxsize = 4096; + unsigned maxpages = 1; + size_t start; + size_t ret __attribute__ ((unused)); - bytes = copy_to_iter((const void *)&buf, size, &iter); - ]) - - ZFS_LINUX_TEST_SRC([copy_from_iter], [ - #include - #include - ],[ - struct iov_iter iter = { 0 }; - char buf[512] = { 0 }; - size_t size = 512; - size_t bytes __attribute__ ((unused)); - - bytes = copy_from_iter((void *)&buf, size, &iter); + ret = iov_iter_get_pages(&iter, pages, maxsize, maxpages, + &start); ]) ZFS_LINUX_TEST_SRC([iov_iter_type], [ @@ -105,85 +61,37 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_IOV_ITER], [ AC_DEFUN([ZFS_AC_KERNEL_VFS_IOV_ITER], [ enable_vfs_iov_iter="yes" - AC_MSG_CHECKING([whether iov_iter types are available]) - ZFS_LINUX_TEST_RESULT([iov_iter_types], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_IOV_ITER_TYPES, 1, - [iov_iter types are available]) - ],[ - AC_MSG_RESULT(no) - enable_vfs_iov_iter="no" - ]) - - AC_MSG_CHECKING([whether iov_iter_advance() is available]) - ZFS_LINUX_TEST_RESULT([iov_iter_advance], [ + AC_MSG_CHECKING([whether fault_in_iov_iter_readable() is available]) + ZFS_LINUX_TEST_RESULT([fault_in_iov_iter_readable], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_IOV_ITER_ADVANCE, 1, - [iov_iter_advance() is available]) + AC_DEFINE(HAVE_FAULT_IN_IOV_ITER_READABLE, 1, + [fault_in_iov_iter_readable() is available]) ],[ AC_MSG_RESULT(no) enable_vfs_iov_iter="no" ]) - AC_MSG_CHECKING([whether iov_iter_revert() is available]) - ZFS_LINUX_TEST_RESULT([iov_iter_revert], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_IOV_ITER_REVERT, 1, - [iov_iter_revert() is available]) - ],[ - AC_MSG_RESULT(no) - enable_vfs_iov_iter="no" - ]) - - AC_MSG_CHECKING([whether iov_iter_fault_in_readable() is available]) - ZFS_LINUX_TEST_RESULT([iov_iter_fault_in_readable], [ + dnl # + dnl # Kernel 6.0 changed iov_iter_get_pages() to iov_iter_page_pages2(). + dnl # + AC_MSG_CHECKING([whether iov_iter_get_pages2() is available]) + ZFS_LINUX_TEST_RESULT([iov_iter_get_pages2], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_IOV_ITER_FAULT_IN_READABLE, 1, - [iov_iter_fault_in_readable() is available]) - ],[ + AC_DEFINE(HAVE_IOV_ITER_GET_PAGES2, 1, + [iov_iter_get_pages2() is available]) + ], [ AC_MSG_RESULT(no) - - AC_MSG_CHECKING([whether fault_in_iov_iter_readable() is available]) - ZFS_LINUX_TEST_RESULT([fault_in_iov_iter_readable], [ + AC_MSG_CHECKING([whether iov_iter_get_pages() is available]) + ZFS_LINUX_TEST_RESULT([iov_iter_get_pages], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_FAULT_IN_IOV_ITER_READABLE, 1, - [fault_in_iov_iter_readable() is available]) - ],[ + AC_DEFINE(HAVE_IOV_ITER_GET_PAGES, 1, + [iov_iter_get_pages() is available]) + ], [ AC_MSG_RESULT(no) enable_vfs_iov_iter="no" ]) ]) - AC_MSG_CHECKING([whether iov_iter_count() is available]) - ZFS_LINUX_TEST_RESULT([iov_iter_count], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_IOV_ITER_COUNT, 1, - [iov_iter_count() is available]) - ],[ - AC_MSG_RESULT(no) - enable_vfs_iov_iter="no" - ]) - - AC_MSG_CHECKING([whether copy_to_iter() is available]) - ZFS_LINUX_TEST_RESULT([copy_to_iter], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_COPY_TO_ITER, 1, - [copy_to_iter() is available]) - ],[ - AC_MSG_RESULT(no) - enable_vfs_iov_iter="no" - ]) - - AC_MSG_CHECKING([whether copy_from_iter() is available]) - ZFS_LINUX_TEST_RESULT([copy_from_iter], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_COPY_FROM_ITER, 1, - [copy_from_iter() is available]) - ],[ - AC_MSG_RESULT(no) - enable_vfs_iov_iter="no" - ]) - dnl # dnl # This checks for iov_iter_type() in linux/uio.h. It is not dnl # required, however, and the module will compiled without it diff --git a/config/kernel-vfs-iterate.m4 b/config/kernel-vfs-iterate.m4 deleted file mode 100644 index 2e396daa1c0f..000000000000 --- a/config/kernel-vfs-iterate.m4 +++ /dev/null @@ -1,83 +0,0 @@ -AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_ITERATE], [ - ZFS_LINUX_TEST_SRC([file_operations_iterate_shared], [ - #include - static int iterate(struct file *filp, struct dir_context * context) - { return 0; } - - static const struct file_operations fops - __attribute__ ((unused)) = { - .iterate_shared = iterate, - }; - ],[]) - - ZFS_LINUX_TEST_SRC([file_operations_iterate], [ - #include - static int iterate(struct file *filp, - struct dir_context *context) { return 0; } - - static const struct file_operations fops - __attribute__ ((unused)) = { - .iterate = iterate, - }; - - #if defined(FMODE_KABI_ITERATE) - #error "RHEL 7.5, FMODE_KABI_ITERATE interface" - #endif - ],[]) - - ZFS_LINUX_TEST_SRC([file_operations_readdir], [ - #include - static int readdir(struct file *filp, void *entry, - filldir_t func) { return 0; } - - static const struct file_operations fops - __attribute__ ((unused)) = { - .readdir = readdir, - }; - ],[]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_VFS_ITERATE], [ - dnl # - dnl # 4.7 API change - dnl # - AC_MSG_CHECKING([whether fops->iterate_shared() is available]) - ZFS_LINUX_TEST_RESULT([file_operations_iterate_shared], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_VFS_ITERATE_SHARED, 1, - [fops->iterate_shared() is available]) - ],[ - AC_MSG_RESULT(no) - - dnl # - dnl # 3.11 API change - dnl # - dnl # RHEL 7.5 compatibility; the fops.iterate() method was - dnl # added to the file_operations structure but in order to - dnl # maintain KABI compatibility all callers must set - dnl # FMODE_KABI_ITERATE which is checked in iterate_dir(). - dnl # When detected ignore this interface and fallback to - dnl # to using fops.readdir() to retain KABI compatibility. - dnl # - AC_MSG_CHECKING([whether fops->iterate() is available]) - ZFS_LINUX_TEST_RESULT([file_operations_iterate], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_VFS_ITERATE, 1, - [fops->iterate() is available]) - ],[ - AC_MSG_RESULT(no) - - dnl # - dnl # readdir interface introduced - dnl # - AC_MSG_CHECKING([whether fops->readdir() is available]) - ZFS_LINUX_TEST_RESULT([file_operations_readdir], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_VFS_READDIR, 1, - [fops->readdir() is available]) - ],[ - ZFS_LINUX_TEST_ERROR([vfs_iterate]) - ]) - ]) - ]) -]) diff --git a/config/kernel-vfs-rw-iterate.m4 b/config/kernel-vfs-rw-iterate.m4 deleted file mode 100644 index cb20ed03099a..000000000000 --- a/config/kernel-vfs-rw-iterate.m4 +++ /dev/null @@ -1,80 +0,0 @@ -dnl # -dnl # Linux 3.16 API -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_RW_ITERATE], [ - ZFS_LINUX_TEST_SRC([file_operations_rw], [ - #include - - static ssize_t test_read(struct kiocb *kiocb, struct iov_iter *to) - { return 0; } - static ssize_t test_write(struct kiocb *kiocb, struct iov_iter *from) - { return 0; } - - static const struct file_operations - fops __attribute__ ((unused)) = { - .read_iter = test_read, - .write_iter = test_write, - }; - ],[]) - - ZFS_LINUX_TEST_SRC([new_sync_rw], [ - #include - ],[ - ssize_t ret __attribute__ ((unused)); - struct file *filp = NULL; - char __user *rbuf = NULL; - const char __user *wbuf = NULL; - size_t len = 0; - loff_t ppos; - - ret = new_sync_read(filp, rbuf, len, &ppos); - ret = new_sync_write(filp, wbuf, len, &ppos); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_VFS_RW_ITERATE], [ - AC_MSG_CHECKING([whether fops->read/write_iter() are available]) - ZFS_LINUX_TEST_RESULT([file_operations_rw], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_VFS_RW_ITERATE, 1, - [fops->read/write_iter() are available]) - - dnl # - dnl # Linux 4.1 API - dnl # - AC_MSG_CHECKING([whether new_sync_read/write() are available]) - ZFS_LINUX_TEST_RESULT([new_sync_rw], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_NEW_SYNC_READ, 1, - [new_sync_read()/new_sync_write() are available]) - ],[ - AC_MSG_RESULT(no) - ]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # Linux 4.1.x API -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_GENERIC_WRITE_CHECKS], [ - ZFS_LINUX_TEST_SRC([generic_write_checks], [ - #include - ],[ - struct kiocb *iocb = NULL; - struct iov_iter *iov = NULL; - generic_write_checks(iocb, iov); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_VFS_GENERIC_WRITE_CHECKS], [ - AC_MSG_CHECKING([whether generic_write_checks() takes kiocb]) - ZFS_LINUX_TEST_RESULT([generic_write_checks], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_GENERIC_WRITE_CHECKS_KIOCB, 1, - [generic_write_checks() takes kiocb]) - ],[ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-wait.m4 b/config/kernel-wait.m4 deleted file mode 100644 index 0414242bf6d4..000000000000 --- a/config/kernel-wait.m4 +++ /dev/null @@ -1,99 +0,0 @@ -dnl # -dnl # 4.13 API change -dnl # Renamed struct wait_queue -> struct wait_queue_entry. -dnl # -dnl # N.B. The type check is performed before all other checks -dnl # since ZFS_AC_KERNEL_SRC_WAIT_QUEUE_HEAD_ENTRY depends on -dnl # HAVE_WAIT_QUEUE_ENTRY_T being set in confdefs.h. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_WAIT_QUEUE_ENTRY_T], [ - AC_MSG_CHECKING([whether wait_queue_entry_t exists]) - ZFS_LINUX_TRY_COMPILE([ - #include - ],[ - wait_queue_entry_t *entry __attribute__ ((unused)); - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_WAIT_QUEUE_ENTRY_T, 1, - [wait_queue_entry_t exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # 3.17 API change, -dnl # wait_on_bit() no longer requires an action argument. The former -dnl # "wait_on_bit" interface required an 'action' function to be provided -dnl # which does the actual waiting. There were over 20 such functions in the -dnl # kernel, many of them identical, though most cases can be satisfied by one -dnl # of just two functions: one which uses io_schedule() and one which just -dnl # uses schedule(). This API change was made to consolidate all of those -dnl # redundant wait functions. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_WAIT_ON_BIT], [ - ZFS_LINUX_TEST_SRC([wait_on_bit], [ - #include - ],[ - int (*action)(void *) = NULL; - wait_on_bit(NULL, 0, action, 0); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_WAIT_ON_BIT], [ - AC_MSG_CHECKING([whether wait_on_bit() takes an action]) - ZFS_LINUX_TEST_RESULT([wait_on_bit], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_WAIT_ON_BIT_ACTION, 1, [yes]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # 4.13 API change -dnl # Renamed wait_queue_head::task_list -> wait_queue_head::head -dnl # Renamed wait_queue_entry::task_list -> wait_queue_entry::entry -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_WAIT_QUEUE_HEAD_ENTRY], [ - ZFS_LINUX_TEST_SRC([wait_queue_head_entry], [ - #include - - #ifdef HAVE_WAIT_QUEUE_ENTRY_T - typedef wait_queue_head_t spl_wait_queue_head_t; - typedef wait_queue_entry_t spl_wait_queue_entry_t; - #else - typedef wait_queue_head_t spl_wait_queue_head_t; - typedef wait_queue_t spl_wait_queue_entry_t; - #endif - ],[ - spl_wait_queue_head_t wq_head; - spl_wait_queue_entry_t wq_entry; - struct list_head *head __attribute__ ((unused)); - struct list_head *entry __attribute__ ((unused)); - - head = &wq_head.head; - entry = &wq_entry.entry; - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_WAIT_QUEUE_HEAD_ENTRY], [ - AC_MSG_CHECKING([whether wq_head->head and wq_entry->entry exist]) - ZFS_LINUX_TEST_RESULT([wait_queue_head_entry], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_WAIT_QUEUE_HEAD_ENTRY, 1, - [wq_head->head and wq_entry->entry exist]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_SRC_WAIT], [ - ZFS_AC_KERNEL_SRC_WAIT_ON_BIT - ZFS_AC_KERNEL_SRC_WAIT_QUEUE_HEAD_ENTRY -]) - -AC_DEFUN([ZFS_AC_KERNEL_WAIT], [ - ZFS_AC_KERNEL_WAIT_ON_BIT - ZFS_AC_KERNEL_WAIT_QUEUE_HEAD_ENTRY -]) diff --git a/config/kernel-xattr-handler.m4 b/config/kernel-xattr-handler.m4 index 32f58c70a500..d933cff7a4b9 100644 --- a/config/kernel-xattr-handler.m4 +++ b/config/kernel-xattr-handler.m4 @@ -34,73 +34,11 @@ AC_DEFUN([ZFS_AC_KERNEL_CONST_XATTR_HANDLER], [ ]) dnl # -dnl # 4.5 API change, -dnl # struct xattr_handler added new member "name". -dnl # xattr_handler which matches to whole name rather than prefix should use -dnl # "name" instead of "prefix", e.g. "system.posix_acl_access" +dnl # Android API change, +dnl # The xattr_handler->get() callback was +dnl # changed to take dentry, inode and flags. dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_NAME], [ - ZFS_LINUX_TEST_SRC([xattr_handler_name], [ - #include - - static const struct xattr_handler - xops __attribute__ ((unused)) = { - .name = XATTR_NAME_POSIX_ACL_ACCESS, - }; - ],[]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_NAME], [ - AC_MSG_CHECKING([whether xattr_handler has name]) - ZFS_LINUX_TEST_RESULT([xattr_handler_name], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_XATTR_HANDLER_NAME, 1, - [xattr_handler has name]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -dnl # -dnl # Supported xattr handler get() interfaces checked newest to oldest. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_GET], [ - ZFS_LINUX_TEST_SRC([xattr_handler_get_dentry_inode], [ - #include - - static int get(const struct xattr_handler *handler, - struct dentry *dentry, struct inode *inode, - const char *name, void *buffer, size_t size) { return 0; } - static const struct xattr_handler - xops __attribute__ ((unused)) = { - .get = get, - }; - ],[]) - - ZFS_LINUX_TEST_SRC([xattr_handler_get_xattr_handler], [ - #include - - static int get(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) { return 0; } - static const struct xattr_handler - xops __attribute__ ((unused)) = { - .get = get, - }; - ],[]) - - ZFS_LINUX_TEST_SRC([xattr_handler_get_dentry], [ - #include - - static int get(struct dentry *dentry, const char *name, - void *buffer, size_t size, int handler_flags) - { return 0; } - static const struct xattr_handler - xops __attribute__ ((unused)) = { - .get = get, - }; - ],[]) - +AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_GET_DENTRY_INODE_FLAGS], [ ZFS_LINUX_TEST_SRC([xattr_handler_get_dentry_inode_flags], [ #include @@ -115,63 +53,16 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_GET], [ ],[]) ]) -AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_GET], [ - dnl # - dnl # 4.7 API change, - dnl # The xattr_handler->get() callback was changed to take both - dnl # dentry and inode. - dnl # - AC_MSG_CHECKING([whether xattr_handler->get() wants dentry and inode]) - ZFS_LINUX_TEST_RESULT([xattr_handler_get_dentry_inode], [ +AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_GET_DENTRY_INODE_FLAGS], [ + AC_MSG_RESULT(no) + AC_MSG_CHECKING( + [whether xattr_handler->get() wants dentry and inode and flags]) + ZFS_LINUX_TEST_RESULT([xattr_handler_get_dentry_inode_flags], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_XATTR_GET_DENTRY_INODE, 1, - [xattr_handler->get() wants both dentry and inode]) + AC_DEFINE(HAVE_XATTR_GET_DENTRY_INODE_FLAGS, 1, + [xattr_handler->get() wants dentry and inode and flags]) ],[ - dnl # - dnl # 4.4 API change, - dnl # The xattr_handler->get() callback was changed to take a - dnl # attr_handler, and handler_flags argument was removed and - dnl # should be accessed by handler->flags. - dnl # AC_MSG_RESULT(no) - AC_MSG_CHECKING( - [whether xattr_handler->get() wants xattr_handler]) - ZFS_LINUX_TEST_RESULT([xattr_handler_get_xattr_handler], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_XATTR_GET_HANDLER, 1, - [xattr_handler->get() wants xattr_handler]) - ],[ - dnl # - dnl # 2.6.33 API change, - dnl # The xattr_handler->get() callback was changed - dnl # to take a dentry instead of an inode, and a - dnl # handler_flags argument was added. - dnl # - AC_MSG_RESULT(no) - AC_MSG_CHECKING( - [whether xattr_handler->get() wants dentry]) - ZFS_LINUX_TEST_RESULT([xattr_handler_get_dentry], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_XATTR_GET_DENTRY, 1, - [xattr_handler->get() wants dentry]) - ],[ - dnl # - dnl # Android API change, - dnl # The xattr_handler->get() callback was - dnl # changed to take dentry, inode and flags. - dnl # - AC_MSG_RESULT(no) - AC_MSG_CHECKING( - [whether xattr_handler->get() wants dentry and inode and flags]) - ZFS_LINUX_TEST_RESULT([xattr_handler_get_dentry_inode_flags], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_XATTR_GET_DENTRY_INODE_FLAGS, 1, - [xattr_handler->get() wants dentry and inode and flags]) - ],[ - ZFS_LINUX_TEST_ERROR([xattr get()]) - ]) - ]) - ]) ]) ]) @@ -222,31 +113,6 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_SET], [ .set = set, }; ],[]) - - ZFS_LINUX_TEST_SRC([xattr_handler_set_xattr_handler], [ - #include - - static int set(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - const void *buffer, size_t size, int flags) - { return 0; } - static const struct xattr_handler - xops __attribute__ ((unused)) = { - .set = set, - }; - ],[]) - - ZFS_LINUX_TEST_SRC([xattr_handler_set_dentry], [ - #include - - static int set(struct dentry *dentry, const char *name, - const void *buffer, size_t size, int flags, - int handler_flags) { return 0; } - static const struct xattr_handler - xops __attribute__ ((unused)) = { - .set = set, - }; - ],[]) ]) AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_SET], [ @@ -264,6 +130,7 @@ AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_SET], [ AC_DEFINE(HAVE_XATTR_SET_IDMAP, 1, [xattr_handler->set() takes mnt_idmap]) ], [ + AC_MSG_RESULT(no) AC_MSG_CHECKING([whether xattr_handler->set() wants dentry, inode, and user_namespace]) ZFS_LINUX_TEST_RESULT([xattr_handler_set_userns], [ AC_MSG_RESULT(yes) @@ -282,152 +149,12 @@ AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_SET], [ AC_DEFINE(HAVE_XATTR_SET_DENTRY_INODE, 1, [xattr_handler->set() wants both dentry and inode]) ],[ - dnl # - dnl # 4.4 API change, - dnl # The xattr_handler->set() callback was changed to take a - dnl # xattr_handler, and handler_flags argument was removed and - dnl # should be accessed by handler->flags. - dnl # - AC_MSG_RESULT(no) - AC_MSG_CHECKING( - [whether xattr_handler->set() wants xattr_handler]) - ZFS_LINUX_TEST_RESULT([xattr_handler_set_xattr_handler], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_XATTR_SET_HANDLER, 1, - [xattr_handler->set() wants xattr_handler]) - ],[ - dnl # - dnl # 2.6.33 API change, - dnl # The xattr_handler->set() callback was changed - dnl # to take a dentry instead of an inode, and a - dnl # handler_flags argument was added. - dnl # - AC_MSG_RESULT(no) - AC_MSG_CHECKING( - [whether xattr_handler->set() wants dentry]) - ZFS_LINUX_TEST_RESULT([xattr_handler_set_dentry], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_XATTR_SET_DENTRY, 1, - [xattr_handler->set() wants dentry]) - ],[ - ZFS_LINUX_TEST_ERROR([xattr set()]) - ]) - ]) + ZFS_LINUX_TEST_ERROR([xattr set()]) ]) ]) ]) ]) -dnl # -dnl # Supported xattr handler list() interfaces checked newest to oldest. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_LIST], [ - ZFS_LINUX_TEST_SRC([xattr_handler_list_simple], [ - #include - - static bool list(struct dentry *dentry) { return 0; } - static const struct xattr_handler - xops __attribute__ ((unused)) = { - .list = list, - }; - ],[]) - - ZFS_LINUX_TEST_SRC([xattr_handler_list_xattr_handler], [ - #include - - static size_t list(const struct xattr_handler *handler, - struct dentry *dentry, char *list, size_t list_size, - const char *name, size_t name_len) { return 0; } - static const struct xattr_handler - xops __attribute__ ((unused)) = { - .list = list, - }; - ],[]) - - ZFS_LINUX_TEST_SRC([xattr_handler_list_dentry], [ - #include - - static size_t list(struct dentry *dentry, - char *list, size_t list_size, - const char *name, size_t name_len, - int handler_flags) { return 0; } - static const struct xattr_handler - xops __attribute__ ((unused)) = { - .list = list, - }; - ],[]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_LIST], [ - dnl # 4.5 API change, - dnl # The xattr_handler->list() callback was changed to take only a - dnl # dentry and it only needs to return if it's accessible. - AC_MSG_CHECKING([whether xattr_handler->list() wants simple]) - ZFS_LINUX_TEST_RESULT([xattr_handler_list_simple], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_XATTR_LIST_SIMPLE, 1, - [xattr_handler->list() wants simple]) - ],[ - dnl # - dnl # 4.4 API change, - dnl # The xattr_handler->list() callback was changed to take a - dnl # xattr_handler, and handler_flags argument was removed - dnl # and should be accessed by handler->flags. - dnl # - AC_MSG_RESULT(no) - AC_MSG_CHECKING( - [whether xattr_handler->list() wants xattr_handler]) - ZFS_LINUX_TEST_RESULT([xattr_handler_list_xattr_handler], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_XATTR_LIST_HANDLER, 1, - [xattr_handler->list() wants xattr_handler]) - ],[ - dnl # - dnl # 2.6.33 API change, - dnl # The xattr_handler->list() callback was changed - dnl # to take a dentry instead of an inode, and a - dnl # handler_flags argument was added. - dnl # - AC_MSG_RESULT(no) - AC_MSG_CHECKING( - [whether xattr_handler->list() wants dentry]) - ZFS_LINUX_TEST_RESULT([xattr_handler_list_dentry], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_XATTR_LIST_DENTRY, 1, - [xattr_handler->list() wants dentry]) - ],[ - ZFS_LINUX_TEST_ERROR([xattr list()]) - ]) - ]) - ]) -]) - -dnl # -dnl # 3.7 API change, -dnl # The posix_acl_{from,to}_xattr functions gained a new -dnl # parameter: user_ns -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_POSIX_ACL_FROM_XATTR_USERNS], [ - ZFS_LINUX_TEST_SRC([posix_acl_from_xattr_userns], [ - #include - #include - #include - ],[ - posix_acl_from_xattr(&init_user_ns, NULL, 0); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_POSIX_ACL_FROM_XATTR_USERNS], [ - AC_MSG_CHECKING([whether posix_acl_from_xattr() needs user_ns]) - ZFS_LINUX_TEST_RESULT([posix_acl_from_xattr_userns], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_POSIX_ACL_FROM_XATTR_USERNS, 1, - [posix_acl_from_xattr() needs user_ns]) - ],[ - ZFS_LINUX_TEST_ERROR([posix_acl_from_xattr()]) - ]) -]) - dnl # dnl # 4.9 API change, dnl # iops->{set,get,remove}xattr and generic_{set,get,remove}xattr are @@ -445,33 +172,14 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_SETXATTR], [ ],[]) ]) -AC_DEFUN([ZFS_AC_KERNEL_GENERIC_SETXATTR], [ - AC_MSG_CHECKING([whether generic_setxattr() exists]) - ZFS_LINUX_TEST_RESULT([have_generic_setxattr], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_GENERIC_SETXATTR, 1, - [generic_setxattr() exists]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR], [ ZFS_AC_KERNEL_SRC_CONST_XATTR_HANDLER - ZFS_AC_KERNEL_SRC_XATTR_HANDLER_NAME - ZFS_AC_KERNEL_SRC_XATTR_HANDLER_GET + ZFS_AC_KERNEL_SRC_XATTR_HANDLER_GET_DENTRY_INODE_FLAGS ZFS_AC_KERNEL_SRC_XATTR_HANDLER_SET - ZFS_AC_KERNEL_SRC_XATTR_HANDLER_LIST - ZFS_AC_KERNEL_SRC_POSIX_ACL_FROM_XATTR_USERNS - ZFS_AC_KERNEL_SRC_GENERIC_SETXATTR ]) AC_DEFUN([ZFS_AC_KERNEL_XATTR], [ ZFS_AC_KERNEL_CONST_XATTR_HANDLER - ZFS_AC_KERNEL_XATTR_HANDLER_NAME - ZFS_AC_KERNEL_XATTR_HANDLER_GET + ZFS_AC_KERNEL_XATTR_HANDLER_GET_DENTRY_INODE_FLAGS ZFS_AC_KERNEL_XATTR_HANDLER_SET - ZFS_AC_KERNEL_XATTR_HANDLER_LIST - ZFS_AC_KERNEL_POSIX_ACL_FROM_XATTR_USERNS - ZFS_AC_KERNEL_GENERIC_SETXATTR ]) diff --git a/config/kernel-zlib.m4 b/config/kernel-zlib.m4 deleted file mode 100644 index 752d388389cb..000000000000 --- a/config/kernel-zlib.m4 +++ /dev/null @@ -1,26 +0,0 @@ -dnl # -dnl # 2.6.39 API compat, -dnl -dnl # The function zlib_deflate_workspacesize() now take 2 arguments. -dnl # This was done to avoid always having to allocate the maximum size -dnl # workspace (268K). The caller can now specific the windowBits and -dnl # memLevel compression parameters to get a smaller workspace. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE], [ - ZFS_LINUX_TEST_SRC([2args_zlib_deflate_workspacesize], [ - #include - ],[ - return zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL); - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE], [ - AC_MSG_CHECKING([whether zlib_deflate_workspacesize() wants 2 args]) - ZFS_LINUX_TEST_RESULT([2args_zlib_deflate_workspacesize], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE, 1, - [zlib_deflate_workspacesize() wants 2 args]) - ],[ - ZFS_LINUX_TEST_ERROR([zlib_deflate_workspacesize()]) - ]) -]) diff --git a/config/kernel.m4 b/config/kernel.m4 index 4d471358d242..761fea5fe121 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -14,7 +14,6 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [ dnl # Sequential ZFS_LINUX_TRY_COMPILE tests ZFS_AC_KERNEL_FPU_HEADER ZFS_AC_KERNEL_OBJTOOL_HEADER - ZFS_AC_KERNEL_WAIT_QUEUE_ENTRY_T ZFS_AC_KERNEL_MISC_MINOR ZFS_AC_KERNEL_DECLARE_EVENT_CLASS @@ -39,26 +38,13 @@ dnl # AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_TYPES ZFS_AC_KERNEL_SRC_OBJTOOL - ZFS_AC_KERNEL_SRC_GLOBAL_PAGE_STATE ZFS_AC_KERNEL_SRC_ACCESS_OK_TYPE ZFS_AC_KERNEL_SRC_PDE_DATA - ZFS_AC_KERNEL_SRC_FALLOCATE - ZFS_AC_KERNEL_SRC_FADVISE ZFS_AC_KERNEL_SRC_GENERIC_FADVISE - ZFS_AC_KERNEL_SRC_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE - ZFS_AC_KERNEL_SRC_RWSEM ZFS_AC_KERNEL_SRC_SCHED ZFS_AC_KERNEL_SRC_USLEEP_RANGE - ZFS_AC_KERNEL_SRC_KMEM_CACHE - ZFS_AC_KERNEL_SRC_KVMALLOC ZFS_AC_KERNEL_SRC_VMALLOC_PAGE_KERNEL - ZFS_AC_KERNEL_SRC_WAIT ZFS_AC_KERNEL_SRC_INODE_TIMES - ZFS_AC_KERNEL_SRC_INODE_LOCK - ZFS_AC_KERNEL_SRC_GROUP_INFO_GID - ZFS_AC_KERNEL_SRC_RW - ZFS_AC_KERNEL_SRC_TIMER_SETUP - ZFS_AC_KERNEL_SRC_SUPER_USER_NS ZFS_AC_KERNEL_SRC_PROC_OPERATIONS ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS ZFS_AC_KERNEL_SRC_BIO @@ -67,63 +53,41 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_GENHD_FLAGS ZFS_AC_KERNEL_SRC_REVALIDATE_DISK ZFS_AC_KERNEL_SRC_GET_DISK_RO - ZFS_AC_KERNEL_SRC_GENERIC_READLINK_GLOBAL ZFS_AC_KERNEL_SRC_DISCARD_GRANULARITY ZFS_AC_KERNEL_SRC_INODE_OWNER_OR_CAPABLE ZFS_AC_KERNEL_SRC_XATTR ZFS_AC_KERNEL_SRC_ACL ZFS_AC_KERNEL_SRC_INODE_SETATTR ZFS_AC_KERNEL_SRC_INODE_GETATTR - ZFS_AC_KERNEL_SRC_INODE_SET_FLAGS - ZFS_AC_KERNEL_SRC_INODE_SET_IVERSION ZFS_AC_KERNEL_SRC_SHOW_OPTIONS - ZFS_AC_KERNEL_SRC_FILE_INODE - ZFS_AC_KERNEL_SRC_FILE_DENTRY - ZFS_AC_KERNEL_SRC_FSYNC - ZFS_AC_KERNEL_SRC_AIO_FSYNC - ZFS_AC_KERNEL_SRC_EVICT_INODE - ZFS_AC_KERNEL_SRC_DIRTY_INODE ZFS_AC_KERNEL_SRC_SHRINKER ZFS_AC_KERNEL_SRC_MKDIR ZFS_AC_KERNEL_SRC_LOOKUP_FLAGS ZFS_AC_KERNEL_SRC_CREATE ZFS_AC_KERNEL_SRC_PERMISSION - ZFS_AC_KERNEL_SRC_GET_LINK - ZFS_AC_KERNEL_SRC_PUT_LINK ZFS_AC_KERNEL_SRC_TMPFILE ZFS_AC_KERNEL_SRC_AUTOMOUNT - ZFS_AC_KERNEL_SRC_ENCODE_FH_WITH_INODE ZFS_AC_KERNEL_SRC_COMMIT_METADATA - ZFS_AC_KERNEL_SRC_CLEAR_INODE ZFS_AC_KERNEL_SRC_SETATTR_PREPARE ZFS_AC_KERNEL_SRC_INSERT_INODE_LOCKED - ZFS_AC_KERNEL_SRC_DENTRY - ZFS_AC_KERNEL_SRC_DENTRY_ALIAS_D_U ZFS_AC_KERNEL_SRC_TRUNCATE_SETSIZE ZFS_AC_KERNEL_SRC_SECURITY_INODE ZFS_AC_KERNEL_SRC_FST_MOUNT - ZFS_AC_KERNEL_SRC_BDI ZFS_AC_KERNEL_SRC_SET_NLINK ZFS_AC_KERNEL_SRC_SGET - ZFS_AC_KERNEL_SRC_LSEEK_EXECUTE ZFS_AC_KERNEL_SRC_VFS_FILEMAP_DIRTY_FOLIO ZFS_AC_KERNEL_SRC_VFS_READ_FOLIO - ZFS_AC_KERNEL_SRC_VFS_GETATTR ZFS_AC_KERNEL_SRC_VFS_FSYNC_2ARGS - ZFS_AC_KERNEL_SRC_VFS_ITERATE + ZFS_AC_KERNEL_SRC_GET_USER_PAGES ZFS_AC_KERNEL_SRC_VFS_DIRECT_IO ZFS_AC_KERNEL_SRC_VFS_READPAGES ZFS_AC_KERNEL_SRC_VFS_SET_PAGE_DIRTY_NOBUFFERS - ZFS_AC_KERNEL_SRC_VFS_RW_ITERATE - ZFS_AC_KERNEL_SRC_VFS_GENERIC_WRITE_CHECKS ZFS_AC_KERNEL_SRC_VFS_IOV_ITER - ZFS_AC_KERNEL_SRC_VFS_COPY_FILE_RANGE ZFS_AC_KERNEL_SRC_VFS_GENERIC_COPY_FILE_RANGE ZFS_AC_KERNEL_SRC_VFS_SPLICE_COPY_FILE_RANGE ZFS_AC_KERNEL_SRC_VFS_REMAP_FILE_RANGE ZFS_AC_KERNEL_SRC_VFS_CLONE_FILE_RANGE ZFS_AC_KERNEL_SRC_VFS_DEDUPE_FILE_RANGE - ZFS_AC_KERNEL_SRC_VFS_FILE_OPERATIONS_EXTEND ZFS_AC_KERNEL_SRC_KMAP_ATOMIC_ARGS ZFS_AC_KERNEL_SRC_KMAP_LOCAL_PAGE ZFS_AC_KERNEL_SRC_FOLLOW_DOWN_ONE @@ -134,36 +98,25 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_KUIDGID_T ZFS_AC_KERNEL_SRC_KUID_HELPERS ZFS_AC_KERNEL_SRC_RENAME - ZFS_AC_KERNEL_SRC_CURRENT_TIME - ZFS_AC_KERNEL_SRC_USERNS_CAPABILITIES - ZFS_AC_KERNEL_SRC_IN_COMPAT_SYSCALL - ZFS_AC_KERNEL_SRC_KTIME ZFS_AC_KERNEL_SRC_TOTALRAM_PAGES_FUNC ZFS_AC_KERNEL_SRC_TOTALHIGH_PAGES - ZFS_AC_KERNEL_SRC_KSTRTOUL ZFS_AC_KERNEL_SRC_PERCPU - ZFS_AC_KERNEL_SRC_CPU_HOTPLUG ZFS_AC_KERNEL_SRC_GENERIC_FILLATTR ZFS_AC_KERNEL_SRC_MKNOD ZFS_AC_KERNEL_SRC_SYMLINK ZFS_AC_KERNEL_SRC_BIO_MAX_SEGS - ZFS_AC_KERNEL_SRC_SIGNAL_STOP ZFS_AC_KERNEL_SRC_SIGINFO ZFS_AC_KERNEL_SRC_SYSFS - ZFS_AC_KERNEL_SRC_SET_SPECIAL_STATE ZFS_AC_KERNEL_SRC_STANDALONE_LINUX_STDARG ZFS_AC_KERNEL_SRC_STRLCPY - ZFS_AC_KERNEL_SRC_STRSCPY ZFS_AC_KERNEL_SRC_PAGEMAP_FOLIO_WAIT_BIT ZFS_AC_KERNEL_SRC_ADD_DISK ZFS_AC_KERNEL_SRC_KTHREAD ZFS_AC_KERNEL_SRC_ZERO_PAGE ZFS_AC_KERNEL_SRC___COPY_FROM_USER_INATOMIC - ZFS_AC_KERNEL_SRC_USER_NS_COMMON_INUM ZFS_AC_KERNEL_SRC_IDMAP_MNT_API ZFS_AC_KERNEL_SRC_IDMAP_NO_USERNS ZFS_AC_KERNEL_SRC_IATTR_VFSID - ZFS_AC_KERNEL_SRC_FILEMAP ZFS_AC_KERNEL_SRC_WRITEPAGE_T ZFS_AC_KERNEL_SRC_RECLAIMED ZFS_AC_KERNEL_SRC_REGISTER_SYSCTL_TABLE @@ -194,26 +147,13 @@ dnl # AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_TYPES ZFS_AC_KERNEL_ACCESS_OK_TYPE - ZFS_AC_KERNEL_GLOBAL_PAGE_STATE ZFS_AC_KERNEL_OBJTOOL ZFS_AC_KERNEL_PDE_DATA - ZFS_AC_KERNEL_FALLOCATE - ZFS_AC_KERNEL_FADVISE ZFS_AC_KERNEL_GENERIC_FADVISE - ZFS_AC_KERNEL_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE - ZFS_AC_KERNEL_RWSEM ZFS_AC_KERNEL_SCHED ZFS_AC_KERNEL_USLEEP_RANGE - ZFS_AC_KERNEL_KMEM_CACHE - ZFS_AC_KERNEL_KVMALLOC ZFS_AC_KERNEL_VMALLOC_PAGE_KERNEL - ZFS_AC_KERNEL_WAIT ZFS_AC_KERNEL_INODE_TIMES - ZFS_AC_KERNEL_INODE_LOCK - ZFS_AC_KERNEL_GROUP_INFO_GID - ZFS_AC_KERNEL_RW - ZFS_AC_KERNEL_TIMER_SETUP - ZFS_AC_KERNEL_SUPER_USER_NS ZFS_AC_KERNEL_PROC_OPERATIONS ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS ZFS_AC_KERNEL_BIO @@ -222,63 +162,41 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_GENHD_FLAGS ZFS_AC_KERNEL_REVALIDATE_DISK ZFS_AC_KERNEL_GET_DISK_RO - ZFS_AC_KERNEL_GENERIC_READLINK_GLOBAL ZFS_AC_KERNEL_DISCARD_GRANULARITY ZFS_AC_KERNEL_INODE_OWNER_OR_CAPABLE ZFS_AC_KERNEL_XATTR ZFS_AC_KERNEL_ACL ZFS_AC_KERNEL_INODE_SETATTR ZFS_AC_KERNEL_INODE_GETATTR - ZFS_AC_KERNEL_INODE_SET_FLAGS - ZFS_AC_KERNEL_INODE_SET_IVERSION ZFS_AC_KERNEL_SHOW_OPTIONS - ZFS_AC_KERNEL_FILE_INODE - ZFS_AC_KERNEL_FILE_DENTRY - ZFS_AC_KERNEL_FSYNC - ZFS_AC_KERNEL_AIO_FSYNC - ZFS_AC_KERNEL_EVICT_INODE - ZFS_AC_KERNEL_DIRTY_INODE ZFS_AC_KERNEL_SHRINKER ZFS_AC_KERNEL_MKDIR ZFS_AC_KERNEL_LOOKUP_FLAGS ZFS_AC_KERNEL_CREATE ZFS_AC_KERNEL_PERMISSION - ZFS_AC_KERNEL_GET_LINK - ZFS_AC_KERNEL_PUT_LINK ZFS_AC_KERNEL_TMPFILE ZFS_AC_KERNEL_AUTOMOUNT - ZFS_AC_KERNEL_ENCODE_FH_WITH_INODE ZFS_AC_KERNEL_COMMIT_METADATA - ZFS_AC_KERNEL_CLEAR_INODE ZFS_AC_KERNEL_SETATTR_PREPARE ZFS_AC_KERNEL_INSERT_INODE_LOCKED - ZFS_AC_KERNEL_DENTRY - ZFS_AC_KERNEL_DENTRY_ALIAS_D_U ZFS_AC_KERNEL_TRUNCATE_SETSIZE ZFS_AC_KERNEL_SECURITY_INODE ZFS_AC_KERNEL_FST_MOUNT - ZFS_AC_KERNEL_BDI ZFS_AC_KERNEL_SET_NLINK ZFS_AC_KERNEL_SGET - ZFS_AC_KERNEL_LSEEK_EXECUTE ZFS_AC_KERNEL_VFS_FILEMAP_DIRTY_FOLIO ZFS_AC_KERNEL_VFS_READ_FOLIO - ZFS_AC_KERNEL_VFS_GETATTR ZFS_AC_KERNEL_VFS_FSYNC_2ARGS - ZFS_AC_KERNEL_VFS_ITERATE + ZFS_AC_KERNEL_GET_USER_PAGES ZFS_AC_KERNEL_VFS_DIRECT_IO ZFS_AC_KERNEL_VFS_READPAGES ZFS_AC_KERNEL_VFS_SET_PAGE_DIRTY_NOBUFFERS - ZFS_AC_KERNEL_VFS_RW_ITERATE - ZFS_AC_KERNEL_VFS_GENERIC_WRITE_CHECKS ZFS_AC_KERNEL_VFS_IOV_ITER - ZFS_AC_KERNEL_VFS_COPY_FILE_RANGE ZFS_AC_KERNEL_VFS_GENERIC_COPY_FILE_RANGE ZFS_AC_KERNEL_VFS_SPLICE_COPY_FILE_RANGE ZFS_AC_KERNEL_VFS_REMAP_FILE_RANGE ZFS_AC_KERNEL_VFS_CLONE_FILE_RANGE ZFS_AC_KERNEL_VFS_DEDUPE_FILE_RANGE - ZFS_AC_KERNEL_VFS_FILE_OPERATIONS_EXTEND ZFS_AC_KERNEL_KMAP_ATOMIC_ARGS ZFS_AC_KERNEL_KMAP_LOCAL_PAGE ZFS_AC_KERNEL_FOLLOW_DOWN_ONE @@ -289,36 +207,25 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_KUIDGID_T ZFS_AC_KERNEL_KUID_HELPERS ZFS_AC_KERNEL_RENAME - ZFS_AC_KERNEL_CURRENT_TIME - ZFS_AC_KERNEL_USERNS_CAPABILITIES - ZFS_AC_KERNEL_IN_COMPAT_SYSCALL - ZFS_AC_KERNEL_KTIME ZFS_AC_KERNEL_TOTALRAM_PAGES_FUNC ZFS_AC_KERNEL_TOTALHIGH_PAGES - ZFS_AC_KERNEL_KSTRTOUL ZFS_AC_KERNEL_PERCPU - ZFS_AC_KERNEL_CPU_HOTPLUG ZFS_AC_KERNEL_GENERIC_FILLATTR ZFS_AC_KERNEL_MKNOD ZFS_AC_KERNEL_SYMLINK ZFS_AC_KERNEL_BIO_MAX_SEGS - ZFS_AC_KERNEL_SIGNAL_STOP ZFS_AC_KERNEL_SIGINFO ZFS_AC_KERNEL_SYSFS - ZFS_AC_KERNEL_SET_SPECIAL_STATE ZFS_AC_KERNEL_STANDALONE_LINUX_STDARG ZFS_AC_KERNEL_STRLCPY - ZFS_AC_KERNEL_STRSCPY ZFS_AC_KERNEL_PAGEMAP_FOLIO_WAIT_BIT ZFS_AC_KERNEL_ADD_DISK ZFS_AC_KERNEL_KTHREAD ZFS_AC_KERNEL_ZERO_PAGE ZFS_AC_KERNEL___COPY_FROM_USER_INATOMIC - ZFS_AC_KERNEL_USER_NS_COMMON_INUM ZFS_AC_KERNEL_IDMAP_MNT_API ZFS_AC_KERNEL_IDMAP_NO_USERNS ZFS_AC_KERNEL_IATTR_VFSID - ZFS_AC_KERNEL_FILEMAP ZFS_AC_KERNEL_WRITEPAGE_T ZFS_AC_KERNEL_RECLAIMED ZFS_AC_KERNEL_REGISTER_SYSCTL_TABLE diff --git a/contrib/debian/not-installed b/contrib/debian/not-installed index ad14776f3b7e..88557f76fcae 100644 --- a/contrib/debian/not-installed +++ b/contrib/debian/not-installed @@ -1,5 +1,4 @@ usr/bin/arc_summary.py -usr/share/zfs/enum-extract.pl usr/share/zfs/zfs-helpers.sh etc/default/zfs etc/init.d diff --git a/contrib/debian/rules.in b/contrib/debian/rules.in index a3a05efacb50..b3b6d9b921d3 100755 --- a/contrib/debian/rules.in +++ b/contrib/debian/rules.in @@ -86,7 +86,6 @@ override_dh_auto_install: @# Install the DKMS source. @# We only want the files needed to build the modules install -D -t '$(CURDIR)/debian/tmp/usr/src/$(NAME)-$(DEB_VERSION_UPSTREAM)/scripts' \ - '$(CURDIR)/scripts/enum-extract.pl' \ '$(CURDIR)/scripts/dkms.postbuild' $(foreach file,$(DKMSFILES),mv '$(CURDIR)/$(NAME)-$(DEB_VERSION_UPSTREAM)/$(file)' '$(CURDIR)/debian/tmp/usr/src/$(NAME)-$(DEB_VERSION_UPSTREAM)' || exit 1;) diff --git a/include/os/freebsd/spl/sys/mutex.h b/include/os/freebsd/spl/sys/mutex.h index 8cfe56c75309..bbff9fe80389 100644 --- a/include/os/freebsd/spl/sys/mutex.h +++ b/include/os/freebsd/spl/sys/mutex.h @@ -70,4 +70,5 @@ typedef enum { #define mutex_exit(lock) sx_xunlock(lock) #define mutex_owned(lock) sx_xlocked(lock) #define mutex_owner(lock) sx_xholder(lock) + #endif /* _OPENSOLARIS_SYS_MUTEX_H_ */ diff --git a/include/os/freebsd/spl/sys/param.h b/include/os/freebsd/spl/sys/param.h index 92724e332d68..96440dce03bb 100644 --- a/include/os/freebsd/spl/sys/param.h +++ b/include/os/freebsd/spl/sys/param.h @@ -33,6 +33,7 @@ #include #include_next #define PAGESIZE PAGE_SIZE +#define PAGESHIFT PAGE_SHIFT #define ptob(x) ((uint64_t)(x) << PAGE_SHIFT) #ifdef _KERNEL #include diff --git a/include/os/freebsd/spl/sys/uio.h b/include/os/freebsd/spl/sys/uio.h index b9d41903ea63..2bd5bdb80d98 100644 --- a/include/os/freebsd/spl/sys/uio.h +++ b/include/os/freebsd/spl/sys/uio.h @@ -34,13 +34,30 @@ #include_next #include #include +#include + +/* + * uio_extflg: extended flags + */ +#define UIO_DIRECT 0x0001 /* Direct I/O requset */ typedef struct iovec iovec_t; typedef enum uio_seg zfs_uio_seg_t; typedef enum uio_rw zfs_uio_rw_t; +/* + * This structure is used when doing Direct I/O. + */ +typedef struct { + vm_page_t *pages; + int npages; +} zfs_uio_dio_t; + typedef struct zfs_uio { struct uio *uio; + offset_t uio_soffset; + uint16_t uio_extflg; + zfs_uio_dio_t uio_dio; } zfs_uio_t; #define GET_UIO_STRUCT(u) (u)->uio @@ -52,6 +69,7 @@ typedef struct zfs_uio { #define zfs_uio_iovbase(u, idx) GET_UIO_STRUCT(u)->uio_iov[(idx)].iov_base #define zfs_uio_td(u) GET_UIO_STRUCT(u)->uio_td #define zfs_uio_rw(u) GET_UIO_STRUCT(u)->uio_rw +#define zfs_uio_soffset(u) (u)->uio_soffset #define zfs_uio_fault_disable(u, set) #define zfs_uio_prefaultpages(size, u) (0) @@ -61,6 +79,13 @@ zfs_uio_setoffset(zfs_uio_t *uio, offset_t off) zfs_uio_offset(uio) = off; } +static inline void +zfs_uio_setsoffset(zfs_uio_t *uio, offset_t off) +{ + ASSERT3U(zfs_uio_offset(uio), ==, off); + zfs_uio_soffset(uio) = off; +} + static inline void zfs_uio_advance(zfs_uio_t *uio, ssize_t size) { @@ -71,7 +96,11 @@ zfs_uio_advance(zfs_uio_t *uio, ssize_t size) static __inline void zfs_uio_init(zfs_uio_t *uio, struct uio *uio_s) { - GET_UIO_STRUCT(uio) = uio_s; + memset(uio, 0, sizeof (zfs_uio_t)); + if (uio_s != NULL) { + GET_UIO_STRUCT(uio) = uio_s; + zfs_uio_soffset(uio) = uio_s->uio_offset; + } } int zfs_uio_fault_move(void *p, size_t n, zfs_uio_rw_t dir, zfs_uio_t *uio); diff --git a/include/os/freebsd/zfs/sys/abd_os.h b/include/os/freebsd/zfs/sys/abd_os.h index 57122ee83e8d..be825b3b8a43 100644 --- a/include/os/freebsd/zfs/sys/abd_os.h +++ b/include/os/freebsd/zfs/sys/abd_os.h @@ -26,10 +26,15 @@ #ifndef _ABD_OS_H #define _ABD_OS_H +#include +#include + #ifdef __cplusplus extern "C" { #endif +struct abd; + struct abd_scatter { uint_t abd_offset; void *abd_chunks[1]; /* actually variable-length */ @@ -37,8 +42,14 @@ struct abd_scatter { struct abd_linear { void *abd_buf; +#if defined(_KERNEL) + struct sf_buf *sf; /* for LINEAR_PAGE FreeBSD */ +#endif }; +__attribute__((malloc)) +struct abd *abd_alloc_from_pages(vm_page_t *, unsigned long, uint64_t); + #ifdef __cplusplus } #endif diff --git a/include/os/linux/Makefile.am b/include/os/linux/Makefile.am index 9100aebb541e..b7bdd892ec1d 100644 --- a/include/os/linux/Makefile.am +++ b/include/os/linux/Makefile.am @@ -8,7 +8,6 @@ kernel_linux_HEADERS = \ %D%/kernel/linux/mm_compat.h \ %D%/kernel/linux/mod_compat.h \ %D%/kernel/linux/page_compat.h \ - %D%/kernel/linux/percpu_compat.h \ %D%/kernel/linux/simd.h \ %D%/kernel/linux/simd_aarch64.h \ %D%/kernel/linux/simd_arm.h \ diff --git a/include/os/linux/kernel/linux/blkdev_compat.h b/include/os/linux/kernel/linux/blkdev_compat.h index c2e818b4d4ee..d96708c600ac 100644 --- a/include/os/linux/kernel/linux/blkdev_compat.h +++ b/include/os/linux/kernel/linux/blkdev_compat.h @@ -35,50 +35,19 @@ #include #include /* for SECTOR_* */ #include - -#ifdef HAVE_BLK_MQ #include -#endif - -#ifndef HAVE_BLK_QUEUE_FLAG_SET -static inline void -blk_queue_flag_set(unsigned int flag, struct request_queue *q) -{ - queue_flag_set(flag, q); -} -#endif - -#ifndef HAVE_BLK_QUEUE_FLAG_CLEAR -static inline void -blk_queue_flag_clear(unsigned int flag, struct request_queue *q) -{ - queue_flag_clear(flag, q); -} -#endif /* * 6.11 API * Setting the flush flags directly is no longer possible; flush flags are set * on the queue_limits structure and passed to blk_disk_alloc(). In this case * we remove this function entirely. - * - * 4.7 API, - * The blk_queue_write_cache() interface has replaced blk_queue_flush() - * interface. However, the new interface is GPL-only thus we implement - * our own trivial wrapper when the GPL-only version is detected. - * - * 2.6.36 - 4.6 API, - * The blk_queue_flush() interface has replaced blk_queue_ordered() - * interface. However, while the old interface was available to all the - * new one is GPL-only. Thus if the GPL-only version is detected we - * implement our own trivial helper. */ #if !defined(HAVE_BLK_ALLOC_DISK_2ARG) || \ !defined(HAVE_BLKDEV_QUEUE_LIMITS_FEATURES) static inline void blk_queue_set_write_cache(struct request_queue *q, bool on) { -#if defined(HAVE_BLK_QUEUE_WRITE_CACHE_GPL_ONLY) if (on) { blk_queue_flag_set(QUEUE_FLAG_WC, q); blk_queue_flag_set(QUEUE_FLAG_FUA, q); @@ -86,18 +55,6 @@ blk_queue_set_write_cache(struct request_queue *q, bool on) blk_queue_flag_clear(QUEUE_FLAG_WC, q); blk_queue_flag_clear(QUEUE_FLAG_FUA, q); } -#elif defined(HAVE_BLK_QUEUE_WRITE_CACHE) - blk_queue_write_cache(q, on, on); -#elif defined(HAVE_BLK_QUEUE_FLUSH_GPL_ONLY) - if (on) - q->flush_flags |= REQ_FLUSH | REQ_FUA; - else - q->flush_flags &= ~(REQ_FLUSH | REQ_FUA); -#elif defined(HAVE_BLK_QUEUE_FLUSH) - blk_queue_flush(q, on ? (REQ_FLUSH | REQ_FUA) : 0); -#else -#error "Unsupported kernel" -#endif } #endif /* !HAVE_BLK_ALLOC_DISK_2ARG || !HAVE_BLKDEV_QUEUE_LIMITS_FEATURES */ @@ -143,7 +100,6 @@ blk_queue_set_read_ahead(struct request_queue *q, unsigned long ra_pages) #endif } -#ifdef HAVE_BIO_BVEC_ITER #define BIO_BI_SECTOR(bio) (bio)->bi_iter.bi_sector #define BIO_BI_SIZE(bio) (bio)->bi_iter.bi_size #define BIO_BI_IDX(bio) (bio)->bi_iter.bi_idx @@ -151,15 +107,6 @@ blk_queue_set_read_ahead(struct request_queue *q, unsigned long ra_pages) #define bio_for_each_segment4(bv, bvp, b, i) \ bio_for_each_segment((bv), (b), (i)) typedef struct bvec_iter bvec_iterator_t; -#else -#define BIO_BI_SECTOR(bio) (bio)->bi_sector -#define BIO_BI_SIZE(bio) (bio)->bi_size -#define BIO_BI_IDX(bio) (bio)->bi_idx -#define BIO_BI_SKIP(bio) (0) -#define bio_for_each_segment4(bv, bvp, b, i) \ - bio_for_each_segment((bvp), (b), (i)) -typedef int bvec_iterator_t; -#endif static inline void bio_set_flags_failfast(struct block_device *bdev, int *flags, bool dev, @@ -200,7 +147,6 @@ bio_set_flags_failfast(struct block_device *bdev, int *flags, bool dev, #define DISK_NAME_LEN 32 #endif /* DISK_NAME_LEN */ -#ifdef HAVE_BIO_BI_STATUS static inline int bi_status_to_errno(blk_status_t status) { @@ -274,42 +220,6 @@ errno_to_bi_status(int error) return (BLK_STS_IOERR); } } -#endif /* HAVE_BIO_BI_STATUS */ - -/* - * 4.3 API change - * The bio_endio() prototype changed slightly. These are helper - * macro's to ensure the prototype and invocation are handled. - */ -#ifdef HAVE_1ARG_BIO_END_IO_T -#ifdef HAVE_BIO_BI_STATUS -#define BIO_END_IO_ERROR(bio) bi_status_to_errno(bio->bi_status) -#define BIO_END_IO_PROTO(fn, x, z) static void fn(struct bio *x) -#define BIO_END_IO(bio, error) bio_set_bi_status(bio, error) -static inline void -bio_set_bi_status(struct bio *bio, int error) -{ - ASSERT3S(error, <=, 0); - bio->bi_status = errno_to_bi_status(-error); - bio_endio(bio); -} -#else -#define BIO_END_IO_ERROR(bio) (-(bio->bi_error)) -#define BIO_END_IO_PROTO(fn, x, z) static void fn(struct bio *x) -#define BIO_END_IO(bio, error) bio_set_bi_error(bio, error) -static inline void -bio_set_bi_error(struct bio *bio, int error) -{ - ASSERT3S(error, <=, 0); - bio->bi_error = error; - bio_endio(bio); -} -#endif /* HAVE_BIO_BI_STATUS */ - -#else -#define BIO_END_IO_PROTO(fn, x, z) static void fn(struct bio *x, int z) -#define BIO_END_IO(bio, error) bio_endio(bio, error); -#endif /* HAVE_1ARG_BIO_END_IO_T */ /* * 5.15 MACRO, @@ -337,6 +247,19 @@ zfs_check_disk_status(struct block_device *bdev) #endif } +/* + * 5.17 API change + * + * GENHD_FL_EXT_DEVT flag removed + * GENHD_FL_NO_PART_SCAN renamed GENHD_FL_NO_PART + */ +#ifndef HAVE_GENHD_FL_EXT_DEVT +#define GENHD_FL_EXT_DEVT (0) +#endif +#ifndef HAVE_GENHD_FL_NO_PART +#define GENHD_FL_NO_PART (GENHD_FL_NO_PART_SCAN) +#endif + /* * 4.1 API, * 3.10.0 CentOS 7.x API, @@ -399,9 +322,6 @@ zfs_check_media_change(struct block_device *bdev) * The function was exported for use, prior to this it existed but the * symbol was not exported. * - * 4.4.0-6.21 API change for Ubuntu - * lookup_bdev() gained a second argument, FMODE_*, to check inode permissions. - * * 5.11 API change * Changed to take a dev_t argument which is set on success and return a * non-zero error code on failure. @@ -419,15 +339,6 @@ vdev_lookup_bdev(const char *path, dev_t *dev) *dev = bdev->bd_dev; bdput(bdev); - return (0); -#elif defined(HAVE_MODE_LOOKUP_BDEV) - struct block_device *bdev = lookup_bdev(path, FMODE_READ); - if (IS_ERR(bdev)) - return (PTR_ERR(bdev)); - - *dev = bdev->bd_dev; - bdput(bdev); - return (0); #else #error "Unsupported kernel" @@ -447,56 +358,24 @@ vdev_lookup_bdev(const char *path, dev_t *dev) static inline void bio_set_op_attrs(struct bio *bio, unsigned rw, unsigned flags) { -#if defined(HAVE_BIO_BI_OPF) bio->bi_opf = rw | flags; -#else - bio->bi_rw |= rw | flags; -#endif /* HAVE_BIO_BI_OPF */ } #endif /* * bio_set_flush - Set the appropriate flags in a bio to guarantee * data are on non-volatile media on completion. - * - * 2.6.37 - 4.8 API, - * Introduce WRITE_FLUSH, WRITE_FUA, and WRITE_FLUSH_FUA flags as a - * replacement for WRITE_BARRIER to allow expressing richer semantics - * to the block layer. It's up to the block layer to implement the - * semantics correctly. Use the WRITE_FLUSH_FUA flag combination. - * - * 4.8 - 4.9 API, - * REQ_FLUSH was renamed to REQ_PREFLUSH. For consistency with previous - * OpenZFS releases, prefer the WRITE_FLUSH_FUA flag set if it's available. - * - * 4.10 API, - * The read/write flags and their modifiers, including WRITE_FLUSH, - * WRITE_FUA and WRITE_FLUSH_FUA were removed from fs.h in - * torvalds/linux@70fd7614 and replaced by direct flag modification - * of the REQ_ flags in bio->bi_opf. Use REQ_PREFLUSH. */ static inline void bio_set_flush(struct bio *bio) { -#if defined(HAVE_REQ_PREFLUSH) /* >= 4.10 */ bio_set_op_attrs(bio, 0, REQ_PREFLUSH | REQ_OP_WRITE); -#elif defined(WRITE_FLUSH_FUA) /* >= 2.6.37 and <= 4.9 */ - bio_set_op_attrs(bio, 0, WRITE_FLUSH_FUA); -#else -#error "Allowing the build will cause bio_set_flush requests to be ignored." -#endif } /* * 4.8 API, * REQ_OP_FLUSH * - * 4.8-rc0 - 4.8-rc1, - * REQ_PREFLUSH - * - * 2.6.36 - 4.7 API, - * REQ_FLUSH - * * in all cases but may have a performance impact for some kernels. It * has the advantage of minimizing kernel specific changes in the zvol code. * @@ -504,77 +383,40 @@ bio_set_flush(struct bio *bio) static inline boolean_t bio_is_flush(struct bio *bio) { -#if defined(HAVE_REQ_OP_FLUSH) && defined(HAVE_BIO_BI_OPF) - return ((bio_op(bio) == REQ_OP_FLUSH) || (bio->bi_opf & REQ_PREFLUSH)); -#elif defined(HAVE_REQ_PREFLUSH) && defined(HAVE_BIO_BI_OPF) - return (bio->bi_opf & REQ_PREFLUSH); -#elif defined(HAVE_REQ_PREFLUSH) && !defined(HAVE_BIO_BI_OPF) - return (bio->bi_rw & REQ_PREFLUSH); -#elif defined(HAVE_REQ_FLUSH) - return (bio->bi_rw & REQ_FLUSH); -#else -#error "Unsupported kernel" -#endif + return (bio_op(bio) == REQ_OP_FLUSH); } /* * 4.8 API, * REQ_FUA flag moved to bio->bi_opf - * - * 2.6.x - 4.7 API, - * REQ_FUA */ static inline boolean_t bio_is_fua(struct bio *bio) { -#if defined(HAVE_BIO_BI_OPF) return (bio->bi_opf & REQ_FUA); -#elif defined(REQ_FUA) - return (bio->bi_rw & REQ_FUA); -#else -#error "Allowing the build will cause fua requests to be ignored." -#endif } /* * 4.8 API, * REQ_OP_DISCARD * - * 2.6.36 - 4.7 API, - * REQ_DISCARD - * * In all cases the normal I/O path is used for discards. The only * difference is how the kernel tags individual I/Os as discards. */ static inline boolean_t bio_is_discard(struct bio *bio) { -#if defined(HAVE_REQ_OP_DISCARD) return (bio_op(bio) == REQ_OP_DISCARD); -#elif defined(HAVE_REQ_DISCARD) - return (bio->bi_rw & REQ_DISCARD); -#else -#error "Unsupported kernel" -#endif } /* * 4.8 API, * REQ_OP_SECURE_ERASE - * - * 2.6.36 - 4.7 API, - * REQ_SECURE */ static inline boolean_t bio_is_secure_erase(struct bio *bio) { -#if defined(HAVE_REQ_OP_SECURE_ERASE) return (bio_op(bio) == REQ_OP_SECURE_ERASE); -#elif defined(REQ_SECURE) - return (bio->bi_rw & REQ_SECURE); -#else - return (0); -#endif } /* @@ -615,9 +457,6 @@ bdev_discard_supported(struct block_device *bdev) * * 4.8 API, * blk_queue_secure_erase() - * - * 2.6.36 - 4.7 API, - * blk_queue_secdiscard() */ static inline boolean_t bdev_secure_discard_supported(struct block_device *bdev) @@ -626,8 +465,6 @@ bdev_secure_discard_supported(struct block_device *bdev) return (!!bdev_max_secure_erase_sectors(bdev)); #elif defined(HAVE_BLK_QUEUE_SECURE_ERASE) return (!!blk_queue_secure_erase(bdev_get_queue(bdev))); -#elif defined(HAVE_BLK_QUEUE_SECDISCARD) - return (!!blk_queue_secdiscard(bdev_get_queue(bdev))); #else #error "Unsupported kernel" #endif @@ -657,10 +494,6 @@ blk_generic_start_io_acct(struct request_queue *q __attribute__((unused)), return (disk_start_io_acct(disk, bio_sectors(bio), bio_op(bio))); #elif defined(HAVE_BIO_IO_ACCT) return (bio_start_io_acct(bio)); -#elif defined(HAVE_GENERIC_IO_ACCT_3ARG) - unsigned long start_time = jiffies; - generic_start_io_acct(rw, bio_sectors(bio), &disk->part0); - return (start_time); #elif defined(HAVE_GENERIC_IO_ACCT_4ARG) unsigned long start_time = jiffies; generic_start_io_acct(q, rw, bio_sectors(bio), &disk->part0); @@ -685,8 +518,6 @@ blk_generic_end_io_acct(struct request_queue *q __attribute__((unused)), disk_end_io_acct(disk, bio_op(bio), start_time); #elif defined(HAVE_BIO_IO_ACCT) bio_end_io_acct(bio, start_time); -#elif defined(HAVE_GENERIC_IO_ACCT_3ARG) - generic_end_io_acct(rw, &disk->part0, start_time); #elif defined(HAVE_GENERIC_IO_ACCT_4ARG) generic_end_io_acct(q, rw, &disk->part0, start_time); #endif @@ -718,7 +549,6 @@ blk_generic_alloc_queue(make_request_fn make_request, int node_id) static inline int io_data_dir(struct bio *bio, struct request *rq) { -#ifdef HAVE_BLK_MQ if (rq != NULL) { if (op_is_write(req_op(rq))) { return (WRITE); @@ -726,57 +556,38 @@ io_data_dir(struct bio *bio, struct request *rq) return (READ); } } -#else - ASSERT3P(rq, ==, NULL); -#endif return (bio_data_dir(bio)); } static inline int io_is_flush(struct bio *bio, struct request *rq) { -#ifdef HAVE_BLK_MQ if (rq != NULL) return (req_op(rq) == REQ_OP_FLUSH); -#else - ASSERT3P(rq, ==, NULL); -#endif return (bio_is_flush(bio)); } static inline int io_is_discard(struct bio *bio, struct request *rq) { -#ifdef HAVE_BLK_MQ if (rq != NULL) return (req_op(rq) == REQ_OP_DISCARD); -#else - ASSERT3P(rq, ==, NULL); -#endif return (bio_is_discard(bio)); } static inline int io_is_secure_erase(struct bio *bio, struct request *rq) { -#ifdef HAVE_BLK_MQ if (rq != NULL) return (req_op(rq) == REQ_OP_SECURE_ERASE); -#else - ASSERT3P(rq, ==, NULL); -#endif return (bio_is_secure_erase(bio)); } static inline int io_is_fua(struct bio *bio, struct request *rq) { -#ifdef HAVE_BLK_MQ if (rq != NULL) return (rq->cmd_flags & REQ_FUA); -#else - ASSERT3P(rq, ==, NULL); -#endif return (bio_is_fua(bio)); } @@ -784,36 +595,24 @@ io_is_fua(struct bio *bio, struct request *rq) static inline uint64_t io_offset(struct bio *bio, struct request *rq) { -#ifdef HAVE_BLK_MQ if (rq != NULL) return (blk_rq_pos(rq) << 9); -#else - ASSERT3P(rq, ==, NULL); -#endif return (BIO_BI_SECTOR(bio) << 9); } static inline uint64_t io_size(struct bio *bio, struct request *rq) { -#ifdef HAVE_BLK_MQ if (rq != NULL) return (blk_rq_bytes(rq)); -#else - ASSERT3P(rq, ==, NULL); -#endif return (BIO_BI_SIZE(bio)); } static inline int io_has_data(struct bio *bio, struct request *rq) { -#ifdef HAVE_BLK_MQ if (rq != NULL) return (bio_has_data(rq->bio)); -#else - ASSERT3P(rq, ==, NULL); -#endif return (bio_has_data(bio)); } #endif /* _ZFS_BLKDEV_H */ diff --git a/include/os/linux/kernel/linux/dcache_compat.h b/include/os/linux/kernel/linux/dcache_compat.h index ab1711b99f3f..de533a5fd28b 100644 --- a/include/os/linux/kernel/linux/dcache_compat.h +++ b/include/os/linux/kernel/linux/dcache_compat.h @@ -31,13 +31,7 @@ #define dname(dentry) ((char *)((dentry)->d_name.name)) #define dlen(dentry) ((int)((dentry)->d_name.len)) -#ifndef HAVE_D_MAKE_ROOT -#define d_make_root(inode) d_alloc_root(inode) -#endif /* HAVE_D_MAKE_ROOT */ - -#ifdef HAVE_DENTRY_D_U_ALIASES #define d_alias d_u.d_alias -#endif /* * Starting from Linux 5.13, flush_dcache_page() becomes an inline function diff --git a/include/os/linux/kernel/linux/kmap_compat.h b/include/os/linux/kernel/linux/kmap_compat.h index fb59c5f0267c..432c0e9913e4 100644 --- a/include/os/linux/kernel/linux/kmap_compat.h +++ b/include/os/linux/kernel/linux/kmap_compat.h @@ -38,6 +38,8 @@ #define zfs_kmap_local(page) kmap_atomic(page) #define zfs_kunmap_local(addr) kunmap_atomic(addr) #endif +#define zfs_kmap(page) kmap(page) +#define zfs_kunmap(page) kunmap(page) /* 5.0 API change - no more 'type' argument for access_ok() */ #ifdef HAVE_ACCESS_OK_TYPE @@ -46,4 +48,49 @@ #define zfs_access_ok(type, addr, size) access_ok(addr, size) #endif +/* + * read returning FOLL_WRITE is due to the fact that we are stating + * that the kernel will have write access to the user pages. So, when + * a Direct I/O read request is issued, the kernel must write to the user + * pages. + * + * get_user_pages_unlocked was not available to 4.0, so we also check + * for get_user_pages on older kernels. + */ +/* 4.9 API change - for and read flag is passed as gup flags */ +#if defined(HAVE_GET_USER_PAGES_UNLOCKED_GUP_FLAGS) +#define zfs_get_user_pages(addr, numpages, read, pages) \ + get_user_pages_unlocked(addr, numpages, pages, read ? FOLL_WRITE : 0) + +/* 4.8 API change - no longer takes struct task_struct as arguement */ +#elif defined(HAVE_GET_USER_PAGES_UNLOCKED_WRITE_FLAG) +#define zfs_get_user_pages(addr, numpages, read, pages) \ + get_user_pages_unlocked(addr, numpages, read, 0, pages) + +/* 4.0-4.3, 4.5-4.7 API */ +#elif defined(HAVE_GET_USER_PAGES_UNLOCKED_TASK_STRUCT) +#define zfs_get_user_pages(addr, numpages, read, pages) \ + get_user_pages_unlocked(current, current->mm, addr, numpages, read, 0, \ + pages) + +/* 4.4 API */ +#elif defined(HAVE_GET_USER_PAGES_UNLOCKED_TASK_STRUCT_GUP_FLAGS) +#define zfs_get_user_pages(addr, numpages, read, pages) \ + get_user_pages_unlocked(current, current->mm, addr, numpages, pages, \ + read ? FOLL_WRITE : 0) + +/* Using get_user_pages if kernel is < 4.0 */ +#elif defined(HAVE_GET_USER_PAGES_TASK_STRUCT) +#define zfs_get_user_pages(addr, numpages, read, pages) \ + get_user_pages(current, current->mm, addr, numpages, read, 0, pages, \ + NULL) +#else +/* + * This case is unreachable. We must be able to use either + * get_user_pages_unlocked() or get_user_pages() to map user pages into + * the kernel. + */ +#error "Unknown Direct I/O interface" +#endif + #endif /* _ZFS_KMAP_H */ diff --git a/include/os/linux/kernel/linux/page_compat.h b/include/os/linux/kernel/linux/page_compat.h index bd6cb398b0c0..963b96ba6351 100644 --- a/include/os/linux/kernel/linux/page_compat.h +++ b/include/os/linux/kernel/linux/page_compat.h @@ -1,67 +1,11 @@ #ifndef _ZFS_PAGE_COMPAT_H #define _ZFS_PAGE_COMPAT_H -/* - * We have various enum members moving between two separate enum types, - * and accessed by different functions at various times. Centralise the - * insanity. - * - * < v4.8: all enums in zone_stat_item, via global_page_state() - * v4.8: some enums moved to node_stat_item, global_node_page_state() introduced - * v4.13: some enums moved from zone_stat_item to node_state_item - * v4.14: global_page_state() rename to global_zone_page_state() - * - * The defines used here are created by config/kernel-global_page_state.m4 - */ - /* * Create our own accessor functions to follow the Linux API changes */ -#if defined(ZFS_GLOBAL_ZONE_PAGE_STATE) - -/* global_zone_page_state() introduced */ -#if defined(ZFS_ENUM_NODE_STAT_ITEM_NR_FILE_PAGES) -#define nr_file_pages() global_node_page_state(NR_FILE_PAGES) -#else -#define nr_file_pages() global_zone_page_state(NR_FILE_PAGES) -#endif -#if defined(ZFS_ENUM_NODE_STAT_ITEM_NR_INACTIVE_ANON) -#define nr_inactive_anon_pages() global_node_page_state(NR_INACTIVE_ANON) -#else -#define nr_inactive_anon_pages() global_zone_page_state(NR_INACTIVE_ANON) -#endif -#if defined(ZFS_ENUM_NODE_STAT_ITEM_NR_INACTIVE_FILE) -#define nr_inactive_file_pages() global_node_page_state(NR_INACTIVE_FILE) -#else -#define nr_inactive_file_pages() global_zone_page_state(NR_INACTIVE_FILE) -#endif - -#elif defined(ZFS_GLOBAL_NODE_PAGE_STATE) - -/* global_node_page_state() introduced */ -#if defined(ZFS_ENUM_NODE_STAT_ITEM_NR_FILE_PAGES) #define nr_file_pages() global_node_page_state(NR_FILE_PAGES) -#else -#define nr_file_pages() global_page_state(NR_FILE_PAGES) -#endif -#if defined(ZFS_ENUM_NODE_STAT_ITEM_NR_INACTIVE_ANON) #define nr_inactive_anon_pages() global_node_page_state(NR_INACTIVE_ANON) -#else -#define nr_inactive_anon_pages() global_page_state(NR_INACTIVE_ANON) -#endif -#if defined(ZFS_ENUM_NODE_STAT_ITEM_NR_INACTIVE_FILE) #define nr_inactive_file_pages() global_node_page_state(NR_INACTIVE_FILE) -#else -#define nr_inactive_file_pages() global_page_state(NR_INACTIVE_FILE) -#endif - -#else - -/* global_page_state() only */ -#define nr_file_pages() global_page_state(NR_FILE_PAGES) -#define nr_inactive_anon_pages() global_page_state(NR_INACTIVE_ANON) -#define nr_inactive_file_pages() global_page_state(NR_INACTIVE_FILE) - -#endif /* ZFS_GLOBAL_ZONE_PAGE_STATE */ #endif /* _ZFS_PAGE_COMPAT_H */ diff --git a/include/os/linux/kernel/linux/percpu_compat.h b/include/os/linux/kernel/linux/percpu_compat.h deleted file mode 100644 index bf3a5a01c27e..000000000000 --- a/include/os/linux/kernel/linux/percpu_compat.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or https://opensource.org/licenses/CDDL-1.0. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 2020 by Delphix. All rights reserved. - */ - -#ifndef _ZFS_PERCPU_H -#define _ZFS_PERCPU_H - -#include - -/* - * 3.18 API change, - * percpu_counter_init() now must be passed a gfp mask which will be - * used for the dynamic allocation of the actual counter. - */ -#ifdef HAVE_PERCPU_COUNTER_INIT_WITH_GFP -#define percpu_counter_init_common(counter, n, gfp) \ - percpu_counter_init(counter, n, gfp) -#else -#define percpu_counter_init_common(counter, n, gfp) \ - percpu_counter_init(counter, n) -#endif - -#endif /* _ZFS_PERCPU_H */ diff --git a/include/os/linux/kernel/linux/vfs_compat.h b/include/os/linux/kernel/linux/vfs_compat.h index aea8bd5ed22c..075b9e111b10 100644 --- a/include/os/linux/kernel/linux/vfs_compat.h +++ b/include/os/linux/kernel/linux/vfs_compat.h @@ -32,82 +32,6 @@ #include #include -/* - * 2.6.34 - 3.19, bdi_setup_and_register() takes 3 arguments. - * 4.0 - 4.11, bdi_setup_and_register() takes 2 arguments. - * 4.12 - x.y, super_setup_bdi_name() new interface. - */ -#if defined(HAVE_SUPER_SETUP_BDI_NAME) -extern atomic_long_t zfs_bdi_seq; - -static inline int -zpl_bdi_setup(struct super_block *sb, char *name) -{ - return super_setup_bdi_name(sb, "%.28s-%ld", name, - atomic_long_inc_return(&zfs_bdi_seq)); -} -static inline void -zpl_bdi_destroy(struct super_block *sb) -{ -} -#elif defined(HAVE_2ARGS_BDI_SETUP_AND_REGISTER) -static inline int -zpl_bdi_setup(struct super_block *sb, char *name) -{ - struct backing_dev_info *bdi; - int error; - - bdi = kmem_zalloc(sizeof (struct backing_dev_info), KM_SLEEP); - error = bdi_setup_and_register(bdi, name); - if (error) { - kmem_free(bdi, sizeof (struct backing_dev_info)); - return (error); - } - - sb->s_bdi = bdi; - - return (0); -} -static inline void -zpl_bdi_destroy(struct super_block *sb) -{ - struct backing_dev_info *bdi = sb->s_bdi; - - bdi_destroy(bdi); - kmem_free(bdi, sizeof (struct backing_dev_info)); - sb->s_bdi = NULL; -} -#elif defined(HAVE_3ARGS_BDI_SETUP_AND_REGISTER) -static inline int -zpl_bdi_setup(struct super_block *sb, char *name) -{ - struct backing_dev_info *bdi; - int error; - - bdi = kmem_zalloc(sizeof (struct backing_dev_info), KM_SLEEP); - error = bdi_setup_and_register(bdi, name, BDI_CAP_MAP_COPY); - if (error) { - kmem_free(sb->s_bdi, sizeof (struct backing_dev_info)); - return (error); - } - - sb->s_bdi = bdi; - - return (0); -} -static inline void -zpl_bdi_destroy(struct super_block *sb) -{ - struct backing_dev_info *bdi = sb->s_bdi; - - bdi_destroy(bdi); - kmem_free(bdi, sizeof (struct backing_dev_info)); - sb->s_bdi = NULL; -} -#else -#error "Unsupported kernel" -#endif - /* * 4.14 adds SB_* flag definitions, define them to MS_* equivalents * if not set. @@ -136,17 +60,7 @@ zpl_bdi_destroy(struct super_block *sb) #define SB_NOATIME MS_NOATIME #endif -/* - * 3.5 API change, - * The clear_inode() function replaces end_writeback() and introduces an - * ordering change regarding when the inode_sync_wait() occurs. See the - * configure check in config/kernel-clear-inode.m4 for full details. - */ -#if defined(HAVE_EVICT_INODE) && !defined(HAVE_CLEAR_INODE) -#define clear_inode(ip) end_writeback(ip) -#endif /* HAVE_EVICT_INODE && !HAVE_CLEAR_INODE */ - -#if defined(SEEK_HOLE) && defined(SEEK_DATA) && !defined(HAVE_LSEEK_EXECUTE) +#if defined(SEEK_HOLE) && defined(SEEK_DATA) static inline loff_t lseek_execute( struct file *filp, @@ -169,7 +83,7 @@ lseek_execute( return (offset); } -#endif /* SEEK_HOLE && SEEK_DATA && !HAVE_LSEEK_EXECUTE */ +#endif /* SEEK_HOLE && SEEK_DATA */ #if defined(CONFIG_FS_POSIX_ACL) /* @@ -184,9 +98,6 @@ lseek_execute( #include -#if defined(HAVE_POSIX_ACL_RELEASE) && !defined(HAVE_POSIX_ACL_RELEASE_GPL_ONLY) -#define zpl_posix_acl_release(arg) posix_acl_release(arg) -#else void zpl_posix_acl_release_impl(struct posix_acl *); static inline void @@ -194,106 +105,11 @@ zpl_posix_acl_release(struct posix_acl *acl) { if ((acl == NULL) || (acl == ACL_NOT_CACHED)) return; -#ifdef HAVE_ACL_REFCOUNT if (refcount_dec_and_test(&acl->a_refcount)) zpl_posix_acl_release_impl(acl); -#else - if (atomic_dec_and_test(&acl->a_refcount)) - zpl_posix_acl_release_impl(acl); -#endif -} -#endif /* HAVE_POSIX_ACL_RELEASE */ - -#ifdef HAVE_SET_CACHED_ACL_USABLE -#define zpl_set_cached_acl(ip, ty, n) set_cached_acl(ip, ty, n) -#define zpl_forget_cached_acl(ip, ty) forget_cached_acl(ip, ty) -#else -static inline void -zpl_set_cached_acl(struct inode *ip, int type, struct posix_acl *newer) -{ - struct posix_acl *older = NULL; - - spin_lock(&ip->i_lock); - - if ((newer != ACL_NOT_CACHED) && (newer != NULL)) - posix_acl_dup(newer); - - switch (type) { - case ACL_TYPE_ACCESS: - older = ip->i_acl; - rcu_assign_pointer(ip->i_acl, newer); - break; - case ACL_TYPE_DEFAULT: - older = ip->i_default_acl; - rcu_assign_pointer(ip->i_default_acl, newer); - break; - } - - spin_unlock(&ip->i_lock); - - zpl_posix_acl_release(older); } - -static inline void -zpl_forget_cached_acl(struct inode *ip, int type) -{ - zpl_set_cached_acl(ip, type, (struct posix_acl *)ACL_NOT_CACHED); -} -#endif /* HAVE_SET_CACHED_ACL_USABLE */ - -/* - * 3.1 API change, - * posix_acl_chmod() was added as the preferred interface. - * - * 3.14 API change, - * posix_acl_chmod() was changed to __posix_acl_chmod() - */ -#ifndef HAVE___POSIX_ACL_CHMOD -#ifdef HAVE_POSIX_ACL_CHMOD -#define __posix_acl_chmod(acl, gfp, mode) posix_acl_chmod(acl, gfp, mode) -#define __posix_acl_create(acl, gfp, mode) posix_acl_create(acl, gfp, mode) -#else -#error "Unsupported kernel" -#endif /* HAVE_POSIX_ACL_CHMOD */ -#endif /* HAVE___POSIX_ACL_CHMOD */ - -/* - * 4.8 API change, - * posix_acl_valid() now must be passed a namespace, the namespace from - * from super block associated with the given inode is used for this purpose. - */ -#ifdef HAVE_POSIX_ACL_VALID_WITH_NS -#define zpl_posix_acl_valid(ip, acl) posix_acl_valid(ip->i_sb->s_user_ns, acl) -#else -#define zpl_posix_acl_valid(ip, acl) posix_acl_valid(acl) -#endif - #endif /* CONFIG_FS_POSIX_ACL */ -/* - * 3.19 API change - * struct access f->f_dentry->d_inode was replaced by accessor function - * file_inode(f) - */ -#ifndef HAVE_FILE_INODE -static inline struct inode *file_inode(const struct file *f) -{ - return (f->f_dentry->d_inode); -} -#endif /* HAVE_FILE_INODE */ - -/* - * 4.1 API change - * struct access file->f_path.dentry was replaced by accessor function - * file_dentry(f) - */ -#ifndef HAVE_FILE_DENTRY -static inline struct dentry *file_dentry(const struct file *f) -{ - return (f->f_path.dentry); -} -#endif /* HAVE_FILE_DENTRY */ - static inline uid_t zfs_uid_read_impl(struct inode *ip) { return (from_kuid(kcred->user_ns, ip->i_uid)); @@ -371,16 +187,7 @@ setattr_prepare(struct dentry *dentry, struct iattr *ia) * 4.11 takes struct path *, < 4.11 takes vfsmount * */ -#ifdef HAVE_VFSMOUNT_IOPS_GETATTR -#define ZPL_GETATTR_WRAPPER(func) \ -static int \ -func(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) \ -{ \ - struct path path = { .mnt = mnt, .dentry = dentry }; \ - return func##_impl(&path, stat, STATX_BASIC_STATS, \ - AT_STATX_SYNC_AS_STAT); \ -} -#elif defined(HAVE_PATH_IOPS_GETATTR) +#if defined(HAVE_PATH_IOPS_GETATTR) #define ZPL_GETATTR_WRAPPER(func) \ static int \ func(const struct path *path, struct kstat *stat, u32 request_mask, \ @@ -410,32 +217,6 @@ func(struct mnt_idmap *user_ns, const struct path *path, \ #error #endif -/* - * 4.9 API change - * Preferred interface to get the current FS time. - */ -#if !defined(HAVE_CURRENT_TIME) -static inline struct timespec -current_time(struct inode *ip) -{ - return (timespec_trunc(current_kernel_time(), ip->i_sb->s_time_gran)); -} -#endif - -/* - * 4.16 API change - * Added iversion interface for managing inode version field. - */ -#ifdef HAVE_INODE_SET_IVERSION -#include -#else -static inline void -inode_set_iversion(struct inode *ip, u64 val) -{ - ip->i_version = val; -} -#endif - /* * Returns true when called in the context of a 32-bit system call. */ @@ -443,11 +224,7 @@ static inline int zpl_is_32bit_api(void) { #ifdef CONFIG_COMPAT -#ifdef HAVE_IN_COMPAT_SYSCALL return (in_compat_syscall()); -#else - return (is_compat_task()); -#endif #else return (BITS_PER_LONG == 32); #endif diff --git a/include/os/linux/kernel/linux/xattr_compat.h b/include/os/linux/kernel/linux/xattr_compat.h index bcc7289ad857..f7e62da62007 100644 --- a/include/os/linux/kernel/linux/xattr_compat.h +++ b/include/os/linux/kernel/linux/xattr_compat.h @@ -40,97 +40,34 @@ typedef const struct xattr_handler xattr_handler_t; /* * 4.5 API change, */ -#if defined(HAVE_XATTR_LIST_SIMPLE) #define ZPL_XATTR_LIST_WRAPPER(fn) \ static bool \ fn(struct dentry *dentry) \ { \ return (!!__ ## fn(dentry->d_inode, NULL, 0, NULL, 0)); \ } -/* - * 4.4 API change, - */ -#elif defined(HAVE_XATTR_LIST_DENTRY) -#define ZPL_XATTR_LIST_WRAPPER(fn) \ -static size_t \ -fn(struct dentry *dentry, char *list, size_t list_size, \ - const char *name, size_t name_len, int type) \ -{ \ - return (__ ## fn(dentry->d_inode, \ - list, list_size, name, name_len)); \ -} -/* - * 2.6.33 API change, - */ -#elif defined(HAVE_XATTR_LIST_HANDLER) -#define ZPL_XATTR_LIST_WRAPPER(fn) \ -static size_t \ -fn(const struct xattr_handler *handler, struct dentry *dentry, \ - char *list, size_t list_size, const char *name, size_t name_len) \ -{ \ - return (__ ## fn(dentry->d_inode, \ - list, list_size, name, name_len)); \ -} -#else -#error "Unsupported kernel" -#endif +#ifdef HAVE_XATTR_GET_DENTRY_INODE_FLAGS /* - * 4.7 API change, - * The xattr_handler->get() callback was changed to take a both dentry and - * inode, because the dentry might not be attached to an inode yet. + * Android API change, + * The xattr_handler->get() callback also takes a flags arg. */ -#if defined(HAVE_XATTR_GET_DENTRY_INODE) #define ZPL_XATTR_GET_WRAPPER(fn) \ static int \ fn(const struct xattr_handler *handler, struct dentry *dentry, \ - struct inode *inode, const char *name, void *buffer, size_t size) \ + struct inode *inode, const char *name, void *buffer, \ + size_t size, int flags) \ { \ return (__ ## fn(inode, name, buffer, size)); \ } -/* - * 4.4 API change, - * The xattr_handler->get() callback was changed to take a xattr_handler, - * and handler_flags argument was removed and should be accessed by - * handler->flags. - */ -#elif defined(HAVE_XATTR_GET_HANDLER) -#define ZPL_XATTR_GET_WRAPPER(fn) \ -static int \ -fn(const struct xattr_handler *handler, struct dentry *dentry, \ - const char *name, void *buffer, size_t size) \ -{ \ - return (__ ## fn(dentry->d_inode, name, buffer, size)); \ -} -/* - * 2.6.33 API change, - * The xattr_handler->get() callback was changed to take a dentry - * instead of an inode, and a handler_flags argument was added. - */ -#elif defined(HAVE_XATTR_GET_DENTRY) -#define ZPL_XATTR_GET_WRAPPER(fn) \ -static int \ -fn(struct dentry *dentry, const char *name, void *buffer, size_t size, \ - int unused_handler_flags) \ -{ \ - return (__ ## fn(dentry->d_inode, name, buffer, size)); \ -} -/* - * Android API change, - * The xattr_handler->get() callback was changed to take a dentry and inode - * and flags, because the dentry might not be attached to an inode yet. - */ -#elif defined(HAVE_XATTR_GET_DENTRY_INODE_FLAGS) +#else #define ZPL_XATTR_GET_WRAPPER(fn) \ static int \ fn(const struct xattr_handler *handler, struct dentry *dentry, \ - struct inode *inode, const char *name, void *buffer, \ - size_t size, int flags) \ + struct inode *inode, const char *name, void *buffer, size_t size) \ { \ return (__ ## fn(inode, name, buffer, size)); \ } -#else -#error "Unsupported kernel" #endif /* @@ -177,35 +114,6 @@ fn(const struct xattr_handler *handler, struct dentry *dentry, \ { \ return (__ ## fn(kcred->user_ns, inode, name, buffer, size, flags));\ } -/* - * 4.4 API change, - * The xattr_handler->set() callback was changed to take a xattr_handler, - * and handler_flags argument was removed and should be accessed by - * handler->flags. - */ -#elif defined(HAVE_XATTR_SET_HANDLER) -#define ZPL_XATTR_SET_WRAPPER(fn) \ -static int \ -fn(const struct xattr_handler *handler, struct dentry *dentry, \ - const char *name, const void *buffer, size_t size, int flags) \ -{ \ - return (__ ## fn(kcred->user_ns, dentry->d_inode, name, \ - buffer, size, flags)); \ -} -/* - * 2.6.33 API change, - * The xattr_handler->set() callback was changed to take a dentry - * instead of an inode, and a handler_flags argument was added. - */ -#elif defined(HAVE_XATTR_SET_DENTRY) -#define ZPL_XATTR_SET_WRAPPER(fn) \ -static int \ -fn(struct dentry *dentry, const char *name, const void *buffer, \ - size_t size, int flags, int unused_handler_flags) \ -{ \ - return (__ ## fn(kcred->user_ns, dentry->d_inode, name, \ - buffer, size, flags)); \ -} #else #error "Unsupported kernel" #endif diff --git a/include/os/linux/spl/sys/condvar.h b/include/os/linux/spl/sys/condvar.h index ef405763ca56..cb94ae89c866 100644 --- a/include/os/linux/spl/sys/condvar.h +++ b/include/os/linux/spl/sys/condvar.h @@ -66,8 +66,8 @@ typedef struct { int cv_magic; - spl_wait_queue_head_t cv_event; - spl_wait_queue_head_t cv_destroy; + wait_queue_head_t cv_event; + wait_queue_head_t cv_destroy; atomic_t cv_refs; atomic_t cv_waiters; kmutex_t *cv_mutex; diff --git a/include/os/linux/spl/sys/cred.h b/include/os/linux/spl/sys/cred.h index c19c3c0719ff..1c5120c24371 100644 --- a/include/os/linux/spl/sys/cred.h +++ b/include/os/linux/spl/sys/cred.h @@ -63,11 +63,7 @@ zfs_is_init_userns(struct user_namespace *user_ns) static inline struct user_namespace *zfs_i_user_ns(struct inode *inode) { -#ifdef HAVE_SUPER_USER_NS return (inode->i_sb->s_user_ns); -#else - return (kcred->user_ns); -#endif } static inline boolean_t zfs_no_idmapping(struct user_namespace *mnt_userns, diff --git a/include/os/linux/spl/sys/file.h b/include/os/linux/spl/sys/file.h index e0bbd6d98cba..38d19d8c68b0 100644 --- a/include/os/linux/spl/sys/file.h +++ b/include/os/linux/spl/sys/file.h @@ -28,7 +28,6 @@ #define FKIOCTL 0x80000000 #define ED_CASE_CONFLICT 0x10 -#ifdef HAVE_INODE_LOCK_SHARED #define spl_inode_lock(ip) inode_lock(ip) #define spl_inode_unlock(ip) inode_unlock(ip) #define spl_inode_lock_shared(ip) inode_lock_shared(ip) @@ -37,15 +36,5 @@ #define spl_inode_trylock_shared(ip) inode_trylock_shared(ip) #define spl_inode_is_locked(ip) inode_is_locked(ip) #define spl_inode_lock_nested(ip, s) inode_lock_nested(ip, s) -#else -#define spl_inode_lock(ip) mutex_lock(&(ip)->i_mutex) -#define spl_inode_unlock(ip) mutex_unlock(&(ip)->i_mutex) -#define spl_inode_lock_shared(ip) mutex_lock(&(ip)->i_mutex) -#define spl_inode_unlock_shared(ip) mutex_unlock(&(ip)->i_mutex) -#define spl_inode_trylock(ip) mutex_trylock(&(ip)->i_mutex) -#define spl_inode_trylock_shared(ip) mutex_trylock(&(ip)->i_mutex) -#define spl_inode_is_locked(ip) mutex_is_locked(&(ip)->i_mutex) -#define spl_inode_lock_nested(ip, s) mutex_lock_nested(&(ip)->i_mutex, s) -#endif #endif /* SPL_FILE_H */ diff --git a/include/os/linux/spl/sys/kmem_cache.h b/include/os/linux/spl/sys/kmem_cache.h index 2b4f120e6427..cfdb0ba750f7 100644 --- a/include/os/linux/spl/sys/kmem_cache.h +++ b/include/os/linux/spl/sys/kmem_cache.h @@ -160,7 +160,7 @@ typedef struct spl_kmem_cache { struct list_head skc_partial_list; /* Partially alloc'ed */ struct rb_root skc_emergency_tree; /* Min sized objects */ spinlock_t skc_lock; /* Cache lock */ - spl_wait_queue_head_t skc_waitq; /* Allocation waiters */ + wait_queue_head_t skc_waitq; /* Allocation waiters */ uint64_t skc_slab_fail; /* Slab alloc failures */ uint64_t skc_slab_create; /* Slab creates */ uint64_t skc_slab_destroy; /* Slab destroys */ diff --git a/include/os/linux/spl/sys/signal.h b/include/os/linux/spl/sys/signal.h index cb4b33261647..473a1be14c34 100644 --- a/include/os/linux/spl/sys/signal.h +++ b/include/os/linux/spl/sys/signal.h @@ -25,10 +25,7 @@ #define _SPL_SIGNAL_H #include - -#ifdef HAVE_SCHED_SIGNAL_HEADER #include -#endif extern int issig(void); diff --git a/include/os/linux/spl/sys/string.h b/include/os/linux/spl/sys/string.h index f44bf23eb326..20bde1f0898a 100644 --- a/include/os/linux/spl/sys/string.h +++ b/include/os/linux/spl/sys/string.h @@ -25,10 +25,7 @@ #include -/* Fallbacks for kernel missing strlcpy */ #ifndef HAVE_KERNEL_STRLCPY - -#if defined(HAVE_KERNEL_STRSCPY) /* * strscpy is strlcpy, but returns an error on truncation. strlcpy is defined * to return strlen(src), so detect error and override it. @@ -41,10 +38,6 @@ strlcpy(char *dest, const char *src, size_t size) return ((size_t)ret); return (strlen(src)); } -#else -#error "no strlcpy fallback available" -#endif - #endif /* HAVE_KERNEL_STRLCPY */ #endif /* _SPL_STRING_H */ diff --git a/include/os/linux/spl/sys/taskq.h b/include/os/linux/spl/sys/taskq.h index 8051de36ba82..f63a397f293d 100644 --- a/include/os/linux/spl/sys/taskq.h +++ b/include/os/linux/spl/sys/taskq.h @@ -131,8 +131,8 @@ typedef struct taskq { struct list_head tq_prio_list; /* priority taskq_ent_t's */ struct list_head tq_delay_list; /* delayed taskq_ent_t's */ struct list_head tq_taskqs; /* all taskq_t's */ - spl_wait_queue_head_t tq_work_waitq; /* new work waitq */ - spl_wait_queue_head_t tq_wait_waitq; /* wait waitq */ + wait_queue_head_t tq_work_waitq; /* new work waitq */ + wait_queue_head_t tq_wait_waitq; /* wait waitq */ tq_lock_role_t tq_lock_class; /* class when taking tq_lock */ /* list node for the cpu hotplug callback */ struct hlist_node tq_hp_cb_node; @@ -144,7 +144,7 @@ typedef struct taskq { typedef struct taskq_ent { spinlock_t tqent_lock; - spl_wait_queue_head_t tqent_waitq; + wait_queue_head_t tqent_waitq; struct timer_list tqent_timer; struct list_head tqent_list; taskqid_t tqent_id; diff --git a/include/os/linux/spl/sys/thread.h b/include/os/linux/spl/sys/thread.h index bc88ff4efb67..4f7f659e528d 100644 --- a/include/os/linux/spl/sys/thread.h +++ b/include/os/linux/spl/sys/thread.h @@ -80,11 +80,4 @@ typedef kernel_siginfo_t spl_kernel_siginfo_t; typedef siginfo_t spl_kernel_siginfo_t; #endif -#ifdef HAVE_SET_SPECIAL_STATE -#define spl_set_special_state(x) set_special_state((x)) -#else -#define spl_set_special_state(x) __set_current_state((x)) -#endif - - #endif /* _SPL_THREAD_H */ diff --git a/include/os/linux/spl/sys/time.h b/include/os/linux/spl/sys/time.h index fec85f8b8d13..036948d87aae 100644 --- a/include/os/linux/spl/sys/time.h +++ b/include/os/linux/spl/sys/time.h @@ -59,11 +59,7 @@ typedef struct timespec timespec_t; #define TIMESPEC_OVERFLOW(ts) \ ((ts)->tv_sec < TIME_MIN || (ts)->tv_sec > TIME_MAX) -#if defined(HAVE_INODE_TIMESPEC64_TIMES) typedef struct timespec64 inode_timespec_t; -#else -typedef struct timespec inode_timespec_t; -#endif /* Include for Lustre compatibility */ #define timestruc_t inode_timespec_t @@ -71,46 +67,22 @@ typedef struct timespec inode_timespec_t; static inline void gethrestime(inode_timespec_t *ts) { -#if defined(HAVE_INODE_TIMESPEC64_TIMES) - -#if defined(HAVE_KTIME_GET_COARSE_REAL_TS64) ktime_get_coarse_real_ts64(ts); -#else - *ts = current_kernel_time64(); -#endif /* HAVE_KTIME_GET_COARSE_REAL_TS64 */ - -#else - *ts = current_kernel_time(); -#endif } static inline uint64_t gethrestime_sec(void) { -#if defined(HAVE_INODE_TIMESPEC64_TIMES) -#if defined(HAVE_KTIME_GET_COARSE_REAL_TS64) inode_timespec_t ts; ktime_get_coarse_real_ts64(&ts); -#else - inode_timespec_t ts = current_kernel_time64(); -#endif /* HAVE_KTIME_GET_COARSE_REAL_TS64 */ - -#else - inode_timespec_t ts = current_kernel_time(); -#endif return (ts.tv_sec); } static inline hrtime_t gethrtime(void) { -#if defined(HAVE_KTIME_GET_RAW_TS64) struct timespec64 ts; ktime_get_raw_ts64(&ts); -#else - struct timespec ts; - getrawmonotonic(&ts); -#endif return (((hrtime_t)ts.tv_sec * NSEC_PER_SEC) + ts.tv_nsec); } diff --git a/include/os/linux/spl/sys/timer.h b/include/os/linux/spl/sys/timer.h index 02c3c7893477..61a1fa6aeffb 100644 --- a/include/os/linux/spl/sys/timer.h +++ b/include/os/linux/spl/sys/timer.h @@ -62,24 +62,4 @@ container_of(timer, typeof(*var), timer_field) #endif -#ifdef HAVE_KERNEL_TIMER_FUNCTION_TIMER_LIST -typedef struct timer_list *spl_timer_list_t; -#else -typedef unsigned long spl_timer_list_t; -#endif - -#ifndef HAVE_KERNEL_TIMER_SETUP - -static inline void -timer_setup(struct timer_list *timer, void (*func)(spl_timer_list_t), u32 fl) -{ -#ifdef HAVE_KERNEL_TIMER_LIST_FLAGS - (timer)->flags = fl; -#endif - init_timer(timer); - setup_timer(timer, func, (spl_timer_list_t)(timer)); -} - -#endif /* HAVE_KERNEL_TIMER_SETUP */ - #endif /* _SPL_TIMER_H */ diff --git a/include/os/linux/spl/sys/uio.h b/include/os/linux/spl/sys/uio.h index 5e6ea8d3c221..5d483685eb20 100644 --- a/include/os/linux/spl/sys/uio.h +++ b/include/os/linux/spl/sys/uio.h @@ -33,6 +33,12 @@ #include #include #include +#include + +/* + * uio_extflg: extended flags + */ +#define UIO_DIRECT 0x0001 /* Direct I/O request */ #if defined(HAVE_VFS_IOV_ITER) && defined(HAVE_FAULT_IN_IOV_ITER_READABLE) #define iov_iter_fault_in_readable(a, b) fault_in_iov_iter_readable(a, b) @@ -54,6 +60,14 @@ typedef enum zfs_uio_seg { #endif } zfs_uio_seg_t; +/* + * This structures is used when doing Direct I/O. + */ +typedef struct { + struct page **pages; /* Mapped pages */ + long npages; /* Number of mapped pages */ +} zfs_uio_dio_t; + typedef struct zfs_uio { union { const struct iovec *uio_iov; @@ -62,15 +76,16 @@ typedef struct zfs_uio { struct iov_iter *uio_iter; #endif }; - int uio_iovcnt; - offset_t uio_loffset; - zfs_uio_seg_t uio_segflg; + int uio_iovcnt; /* Number of iovecs */ + offset_t uio_soffset; /* Starting logical offset */ + offset_t uio_loffset; /* Current logical offset */ + zfs_uio_seg_t uio_segflg; /* Segment type */ boolean_t uio_fault_disable; - uint16_t uio_fmode; - uint16_t uio_extflg; - ssize_t uio_resid; - - size_t uio_skip; + uint16_t uio_fmode; /* Access mode (unused) */ + uint16_t uio_extflg; /* Extra flags (UIO_DIRECT) */ + ssize_t uio_resid; /* Residual unprocessed bytes */ + size_t uio_skip; /* Skipped bytes in current iovec */ + zfs_uio_dio_t uio_dio; /* Direct I/O user pages */ struct request *rq; } zfs_uio_t; @@ -83,6 +98,7 @@ typedef struct zfs_uio { #define zfs_uio_iovlen(u, idx) (u)->uio_iov[(idx)].iov_len #define zfs_uio_iovbase(u, idx) (u)->uio_iov[(idx)].iov_base #define zfs_uio_fault_disable(u, set) (u)->uio_fault_disable = set +#define zfs_uio_soffset(u) (u)->uio_soffset #define zfs_uio_rlimit_fsize(z, u) (0) #define zfs_uio_fault_move(p, n, rw, u) zfs_uiomove((p), (n), (rw), (u)) @@ -94,6 +110,13 @@ zfs_uio_setoffset(zfs_uio_t *uio, offset_t off) uio->uio_loffset = off; } +static inline void +zfs_uio_setsoffset(zfs_uio_t *uio, offset_t off) +{ + ASSERT3U(zfs_uio_offset(uio), ==, off); + zfs_uio_soffset(uio) = off; +} + static inline void zfs_uio_advance(zfs_uio_t *uio, ssize_t size) { @@ -117,6 +140,8 @@ zfs_uio_iovec_init(zfs_uio_t *uio, const struct iovec *iov, uio->uio_extflg = 0; uio->uio_resid = resid; uio->uio_skip = skip; + uio->uio_soffset = uio->uio_loffset; + memset(&uio->uio_dio, 0, sizeof (zfs_uio_dio_t)); } static inline void @@ -146,6 +171,8 @@ zfs_uio_bvec_init(zfs_uio_t *uio, struct bio *bio, struct request *rq) } uio->rq = rq; + uio->uio_soffset = uio->uio_loffset; + memset(&uio->uio_dio, 0, sizeof (zfs_uio_dio_t)); } #if defined(HAVE_VFS_IOV_ITER) @@ -162,8 +189,10 @@ zfs_uio_iov_iter_init(zfs_uio_t *uio, struct iov_iter *iter, offset_t offset, uio->uio_extflg = 0; uio->uio_resid = resid; uio->uio_skip = skip; + uio->uio_soffset = uio->uio_loffset; + memset(&uio->uio_dio, 0, sizeof (zfs_uio_dio_t)); } -#endif +#endif /* HAVE_VFS_IOV_ITER */ #if defined(HAVE_ITER_IOV) #define zfs_uio_iter_iov(iter) iter_iov((iter)) diff --git a/include/os/linux/spl/sys/wait.h b/include/os/linux/spl/sys/wait.h index 65cd83e5ef12..78ec62b08155 100644 --- a/include/os/linux/spl/sys/wait.h +++ b/include/os/linux/spl/sys/wait.h @@ -27,28 +27,4 @@ #include #include -#ifndef HAVE_WAIT_ON_BIT_ACTION -#define spl_wait_on_bit(word, bit, mode) wait_on_bit(word, bit, mode) -#else - -static inline int -spl_bit_wait(void *word) -{ - schedule(); - return (0); -} - -#define spl_wait_on_bit(word, bit, mode) \ - wait_on_bit(word, bit, spl_bit_wait, mode) - -#endif /* HAVE_WAIT_ON_BIT_ACTION */ - -#ifdef HAVE_WAIT_QUEUE_ENTRY_T -typedef wait_queue_head_t spl_wait_queue_head_t; -typedef wait_queue_entry_t spl_wait_queue_entry_t; -#else -typedef wait_queue_head_t spl_wait_queue_head_t; -typedef wait_queue_t spl_wait_queue_entry_t; -#endif - #endif /* SPL_WAIT_H */ diff --git a/include/os/linux/spl/sys/wmsum.h b/include/os/linux/spl/sys/wmsum.h index 0871bd69504c..1c87f56e13ac 100644 --- a/include/os/linux/spl/sys/wmsum.h +++ b/include/os/linux/spl/sys/wmsum.h @@ -36,12 +36,7 @@ typedef struct percpu_counter wmsum_t; static inline void wmsum_init(wmsum_t *ws, uint64_t value) { - -#ifdef HAVE_PERCPU_COUNTER_INIT_WITH_GFP percpu_counter_init(ws, value, GFP_KERNEL); -#else - percpu_counter_init(ws, value); -#endif } static inline void @@ -62,11 +57,7 @@ static inline void wmsum_add(wmsum_t *ws, int64_t delta) { -#ifdef HAVE_PERCPU_COUNTER_ADD_BATCH percpu_counter_add_batch(ws, delta, INT_MAX / 2); -#else - __percpu_counter_add(ws, delta, INT_MAX / 2); -#endif } #ifdef __cplusplus diff --git a/include/os/linux/zfs/sys/abd_os.h b/include/os/linux/zfs/sys/abd_os.h index ce4f5a2bdf9b..606e8bf682e8 100644 --- a/include/os/linux/zfs/sys/abd_os.h +++ b/include/os/linux/zfs/sys/abd_os.h @@ -55,6 +55,9 @@ int abd_iterate_page_func(abd_t *, size_t, size_t, abd_iter_page_func_t *, unsigned int abd_bio_map_off(struct bio *, abd_t *, unsigned int, size_t); unsigned long abd_nr_pages_off(abd_t *, unsigned int, size_t); +__attribute__((malloc)) +abd_t *abd_alloc_from_pages(struct page **, unsigned long, uint64_t); + #ifdef __cplusplus } #endif diff --git a/include/os/linux/zfs/sys/zfs_vnops_os.h b/include/os/linux/zfs/sys/zfs_vnops_os.h index 830c76e5743a..f34eb153f546 100644 --- a/include/os/linux/zfs/sys/zfs_vnops_os.h +++ b/include/os/linux/zfs/sys/zfs_vnops_os.h @@ -55,7 +55,7 @@ extern int zfs_mkdir(znode_t *dzp, char *dirname, vattr_t *vap, znode_t **zpp, cred_t *cr, int flags, vsecattr_t *vsecp, zidmap_t *mnt_ns); extern int zfs_rmdir(znode_t *dzp, char *name, znode_t *cwd, cred_t *cr, int flags); -extern int zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr); +extern int zfs_readdir(struct inode *ip, struct dir_context *ctx, cred_t *cr); #ifdef HAVE_GENERIC_FILLATTR_IDMAP_REQMASK extern int zfs_getattr_fast(zidmap_t *, u32 request_mask, struct inode *ip, struct kstat *sp); diff --git a/include/os/linux/zfs/sys/zfs_znode_impl.h b/include/os/linux/zfs/sys/zfs_znode_impl.h index 0be2c445ab76..cc8e5150eaf1 100644 --- a/include/os/linux/zfs/sys/zfs_znode_impl.h +++ b/include/os/linux/zfs/sys/zfs_znode_impl.h @@ -47,16 +47,9 @@ extern "C" { #endif -#if defined(HAVE_FILEMAP_RANGE_HAS_PAGE) #define ZNODE_OS_FIELDS \ inode_timespec_t z_btime; /* creation/birth time (cached) */ \ struct inode z_inode; -#else -#define ZNODE_OS_FIELDS \ - inode_timespec_t z_btime; /* creation/birth time (cached) */ \ - struct inode z_inode; \ - boolean_t z_is_mapped; /* we are mmap'ed */ -#endif /* * Convert between znode pointers and inode pointers @@ -77,13 +70,8 @@ extern "C" { #define Z_ISDEV(type) (S_ISCHR(type) || S_ISBLK(type) || S_ISFIFO(type)) #define Z_ISDIR(type) S_ISDIR(type) -#if defined(HAVE_FILEMAP_RANGE_HAS_PAGE) #define zn_has_cached_data(zp, start, end) \ filemap_range_has_page(ZTOI(zp)->i_mapping, start, end) -#else -#define zn_has_cached_data(zp, start, end) \ - ((zp)->z_is_mapped) -#endif #define zn_flush_cached_data(zp, sync) write_inode_now(ZTOI(zp), sync) #define zn_rlimit_fsize(size) (0) @@ -153,27 +141,14 @@ do { \ (stmp)[1] = (uint64_t)(tp)->tv_nsec; \ } while (0) -#if defined(HAVE_INODE_TIMESPEC64_TIMES) /* * Decode ZFS stored time values to a struct timespec64 - * 4.18 and newer kernels. */ #define ZFS_TIME_DECODE(tp, stmp) \ do { \ (tp)->tv_sec = (time64_t)(stmp)[0]; \ (tp)->tv_nsec = (long)(stmp)[1]; \ } while (0) -#else -/* - * Decode ZFS stored time values to a struct timespec - * 4.17 and older kernels. - */ -#define ZFS_TIME_DECODE(tp, stmp) \ -do { \ - (tp)->tv_sec = (time_t)(stmp)[0]; \ - (tp)->tv_nsec = (long)(stmp)[1]; \ -} while (0) -#endif /* HAVE_INODE_TIMESPEC64_TIMES */ #define ZFS_ACCESSTIME_STAMP(zfsvfs, zp) @@ -184,12 +159,6 @@ extern int zfs_inode_alloc(struct super_block *, struct inode **ip); extern void zfs_inode_destroy(struct inode *); extern void zfs_mark_inode_dirty(struct inode *); extern boolean_t zfs_relatime_need_update(const struct inode *); - -#if defined(HAVE_UIO_RW) -extern caddr_t zfs_map_page(page_t *, enum seg_rw); -extern void zfs_unmap_page(page_t *, caddr_t); -#endif /* HAVE_UIO_RW */ - extern zil_replay_func_t *const zfs_replay_vector[TX_MAX_TYPE]; #ifdef __cplusplus diff --git a/include/os/linux/zfs/sys/zpl.h b/include/os/linux/zfs/sys/zpl.h index 91a4751fffb0..c6e235c48ef3 100644 --- a/include/os/linux/zfs/sys/zpl.h +++ b/include/os/linux/zfs/sys/zpl.h @@ -32,7 +32,6 @@ #include #include #include -#include #include #include #include @@ -42,21 +41,13 @@ extern void zpl_vap_init(vattr_t *vap, struct inode *dir, umode_t mode, cred_t *cr, zidmap_t *mnt_ns); extern const struct inode_operations zpl_inode_operations; -#ifdef HAVE_RENAME2_OPERATIONS_WRAPPER -extern const struct inode_operations_wrapper zpl_dir_inode_operations; -#else extern const struct inode_operations zpl_dir_inode_operations; -#endif extern const struct inode_operations zpl_symlink_inode_operations; extern const struct inode_operations zpl_special_inode_operations; /* zpl_file.c */ extern const struct address_space_operations zpl_address_space_operations; -#ifdef HAVE_VFS_FILE_OPERATIONS_EXTEND -extern const struct file_operations_extend zpl_file_operations; -#else extern const struct file_operations zpl_file_operations; -#endif extern const struct file_operations zpl_dir_file_operations; /* zpl_super.c */ @@ -70,8 +61,9 @@ extern struct file_system_type zpl_fs_type; extern ssize_t zpl_xattr_list(struct dentry *dentry, char *buf, size_t size); extern int zpl_xattr_security_init(struct inode *ip, struct inode *dip, const struct qstr *qstr); + #if defined(CONFIG_FS_POSIX_ACL) -#if defined(HAVE_SET_ACL) + #if defined(HAVE_SET_ACL_IDMAP_DENTRY) extern int zpl_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type); @@ -84,7 +76,7 @@ extern int zpl_set_acl(struct user_namespace *userns, struct dentry *dentry, #else extern int zpl_set_acl(struct inode *ip, struct posix_acl *acl, int type); #endif /* HAVE_SET_ACL_USERNS */ -#endif /* HAVE_SET_ACL */ + #if defined(HAVE_GET_ACL_RCU) || defined(HAVE_GET_INODE_ACL) extern struct posix_acl *zpl_get_acl(struct inode *ip, int type, bool rcu); #elif defined(HAVE_GET_ACL) @@ -118,73 +110,6 @@ extern const struct inode_operations zpl_ops_snapdir; extern const struct file_operations zpl_fops_shares; extern const struct inode_operations zpl_ops_shares; -#if defined(HAVE_VFS_ITERATE) || defined(HAVE_VFS_ITERATE_SHARED) - -#define ZPL_DIR_CONTEXT_INIT(_dirent, _actor, _pos) { \ - .actor = _actor, \ - .pos = _pos, \ -} - -typedef struct dir_context zpl_dir_context_t; - -#define zpl_dir_emit dir_emit -#define zpl_dir_emit_dot dir_emit_dot -#define zpl_dir_emit_dotdot dir_emit_dotdot -#define zpl_dir_emit_dots dir_emit_dots - -#else - -typedef struct zpl_dir_context { - void *dirent; - const filldir_t actor; - loff_t pos; -} zpl_dir_context_t; - -#define ZPL_DIR_CONTEXT_INIT(_dirent, _actor, _pos) { \ - .dirent = _dirent, \ - .actor = _actor, \ - .pos = _pos, \ -} - -static inline bool -zpl_dir_emit(zpl_dir_context_t *ctx, const char *name, int namelen, - uint64_t ino, unsigned type) -{ - return (!ctx->actor(ctx->dirent, name, namelen, ctx->pos, ino, type)); -} - -static inline bool -zpl_dir_emit_dot(struct file *file, zpl_dir_context_t *ctx) -{ - return (ctx->actor(ctx->dirent, ".", 1, ctx->pos, - file_inode(file)->i_ino, DT_DIR) == 0); -} - -static inline bool -zpl_dir_emit_dotdot(struct file *file, zpl_dir_context_t *ctx) -{ - return (ctx->actor(ctx->dirent, "..", 2, ctx->pos, - parent_ino(file_dentry(file)), DT_DIR) == 0); -} - -static inline bool -zpl_dir_emit_dots(struct file *file, zpl_dir_context_t *ctx) -{ - if (ctx->pos == 0) { - if (!zpl_dir_emit_dot(file, ctx)) - return (false); - ctx->pos = 1; - } - if (ctx->pos == 1) { - if (!zpl_dir_emit_dotdot(file, ctx)) - return (false); - ctx->pos = 2; - } - return (true); -} -#endif /* HAVE_VFS_ITERATE */ - - /* zpl_file_range.c */ /* handlers for file_operations of the same name */ @@ -235,12 +160,9 @@ extern long zpl_ioctl_fideduperange(struct file *filp, void *arg); #if defined(HAVE_INODE_TIMESTAMP_TRUNCATE) #define zpl_inode_timestamp_truncate(ts, ip) timestamp_truncate(ts, ip) -#elif defined(HAVE_INODE_TIMESPEC64_TIMES) -#define zpl_inode_timestamp_truncate(ts, ip) \ - timespec64_trunc(ts, (ip)->i_sb->s_time_gran) #else #define zpl_inode_timestamp_truncate(ts, ip) \ - timespec_trunc(ts, (ip)->i_sb->s_time_gran) + timespec64_trunc(ts, (ip)->i_sb->s_time_gran) #endif #if defined(HAVE_INODE_OWNER_OR_CAPABLE) diff --git a/include/sys/abd.h b/include/sys/abd.h index 567b88c0fc01..bd3a7bd7c935 100644 --- a/include/sys/abd.h +++ b/include/sys/abd.h @@ -46,6 +46,7 @@ typedef enum abd_flags { ABD_FLAG_GANG = 1 << 6, /* mult ABDs chained together */ ABD_FLAG_GANG_FREE = 1 << 7, /* gang ABD is responsible for mem */ ABD_FLAG_ALLOCD = 1 << 8, /* we allocated the abd_t */ + ABD_FLAG_FROM_PAGES = 1 << 9, /* does not own pages */ } abd_flags_t; typedef struct abd { @@ -200,6 +201,12 @@ abd_get_size(abd_t *abd) return (abd->abd_size); } +static inline boolean_t +abd_is_from_pages(abd_t *abd) +{ + return ((abd->abd_flags & ABD_FLAG_FROM_PAGES) ? B_TRUE : B_FALSE); +} + /* * Module lifecycle * Defined in each specific OS's abd_os.c diff --git a/include/sys/abd_impl.h b/include/sys/abd_impl.h index 1eb25d94adc5..35a64f8621a5 100644 --- a/include/sys/abd_impl.h +++ b/include/sys/abd_impl.h @@ -43,6 +43,9 @@ typedef enum abd_stats_op { /* forward declarations */ struct scatterlist; struct page; +#if defined(__FreeBSD__) && defined(_KERNEL) +struct sf_buf; +#endif struct abd_iter { /* public interface */ @@ -70,7 +73,11 @@ struct abd_iter { size_t iter_pos; size_t iter_offset; /* offset in current sg/abd_buf, */ /* abd_offset included */ +#if defined(__FreeBSD__) && defined(_KERNEL) + struct sf_buf *sf; /* used to map in vm_page_t FreeBSD */ +#else struct scatterlist *iter_sg; /* current sg */ +#endif }; extern abd_t *abd_zero_scatter; @@ -78,6 +85,7 @@ extern abd_t *abd_zero_scatter; abd_t *abd_gang_get_offset(abd_t *, size_t *); abd_t *abd_alloc_struct(size_t); void abd_free_struct(abd_t *); +void abd_init_struct(abd_t *); /* * OS specific functions @@ -108,9 +116,9 @@ void abd_iter_page(struct abd_iter *); #define ABDSTAT_BUMP(stat) ABDSTAT_INCR(stat, 1) #define ABDSTAT_BUMPDOWN(stat) ABDSTAT_INCR(stat, -1) -#define ABD_SCATTER(abd) (abd->abd_u.abd_scatter) -#define ABD_LINEAR_BUF(abd) (abd->abd_u.abd_linear.abd_buf) -#define ABD_GANG(abd) (abd->abd_u.abd_gang) +#define ABD_SCATTER(abd) ((abd)->abd_u.abd_scatter) +#define ABD_LINEAR_BUF(abd) ((abd)->abd_u.abd_linear.abd_buf) +#define ABD_GANG(abd) ((abd)->abd_u.abd_gang) #ifdef __cplusplus } diff --git a/include/sys/arc.h b/include/sys/arc.h index c92b3eee618c..883c07b4ff3d 100644 --- a/include/sys/arc.h +++ b/include/sys/arc.h @@ -120,7 +120,7 @@ typedef enum arc_flags /* * Private ARC flags. These flags are private ARC only flags that - * will show up in b_flags in the arc_hdr_buf_t. These flags should + * will show up in b_flags in the arc_buf_hdr_t. These flags should * only be set by ARC code. */ ARC_FLAG_IN_HASH_TABLE = 1 << 7, /* buffer is hashed */ @@ -179,7 +179,6 @@ typedef enum arc_flags ARC_FLAG_COMPRESS_4 = 1 << 28, ARC_FLAG_COMPRESS_5 = 1 << 29, ARC_FLAG_COMPRESS_6 = 1 << 30 - } arc_flags_t; typedef enum arc_buf_flags { diff --git a/include/sys/dbuf.h b/include/sys/dbuf.h index 8b03b1f895f8..56741cd2a58b 100644 --- a/include/sys/dbuf.h +++ b/include/sys/dbuf.h @@ -61,17 +61,17 @@ extern "C" { /* * The simplified state transition diagram for dbufs looks like: * - * +--> READ --+ - * | | - * | V - * (alloc)-->UNCACHED CACHED-->EVICTING-->(free) - * ^ | ^ ^ - * | | | | - * | +--> FILL --+ | - * | | | - * | | | - * | +------> NOFILL -----+ - * | | + * +-------> READ ------+ + * | | + * | V + * (alloc)-->UNCACHED CACHED-->EVICTING-->(free) + * ^ | ^ ^ + * | | | | + * | +-------> FILL ------+ | + * | | | | + * | | | | + * | +------> NOFILL -----+-----> UNCACHED + * | | (Direct I/O) * +---------------+ * * DB_SEARCH is an invalid state for a dbuf. It is used by dbuf_free_range @@ -176,6 +176,7 @@ typedef struct dbuf_dirty_record { uint8_t dr_copies; boolean_t dr_nopwrite; boolean_t dr_brtwrite; + boolean_t dr_diowrite; boolean_t dr_has_raw_params; /* @@ -384,7 +385,7 @@ dmu_buf_impl_t *dbuf_find(struct objset *os, uint64_t object, uint8_t level, uint64_t blkid, uint64_t *hash_out); int dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags); -void dmu_buf_will_clone(dmu_buf_t *db, dmu_tx_t *tx); +void dmu_buf_will_clone_or_dio(dmu_buf_t *db, dmu_tx_t *tx); void dmu_buf_will_not_fill(dmu_buf_t *db, dmu_tx_t *tx); void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx, boolean_t canfail); boolean_t dmu_buf_fill_done(dmu_buf_t *db, dmu_tx_t *tx, boolean_t failed); @@ -393,6 +394,8 @@ dbuf_dirty_record_t *dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx); dbuf_dirty_record_t *dbuf_dirty_lightweight(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx); boolean_t dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx); +int dmu_buf_get_bp_from_dbuf(dmu_buf_impl_t *db, blkptr_t **bp); +int dmu_buf_untransform_direct(dmu_buf_impl_t *db, spa_t *spa); arc_buf_t *dbuf_loan_arcbuf(dmu_buf_impl_t *db); void dmu_buf_write_embedded(dmu_buf_t *dbuf, void *data, bp_embedded_type_t etype, enum zio_compress comp, @@ -473,7 +476,7 @@ dbuf_find_dirty_eq(dmu_buf_impl_t *db, uint64_t txg) (dbuf_is_metadata(_db) && \ ((_db)->db_objset->os_primary_cache == ZFS_CACHE_METADATA))) -boolean_t dbuf_is_l2cacheable(dmu_buf_impl_t *db); +boolean_t dbuf_is_l2cacheable(dmu_buf_impl_t *db, blkptr_t *db_bp); #ifdef ZFS_DEBUG diff --git a/include/sys/dmu.h b/include/sys/dmu.h index 928f5f2b4fd4..22cbd7fc73b6 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -525,6 +525,7 @@ void dmu_redact(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, #define WP_NOFILL 0x1 #define WP_DMU_SYNC 0x2 #define WP_SPILL 0x4 +#define WP_DIRECT_WR 0x8 void dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, struct zio_prop *zp); @@ -589,6 +590,7 @@ int dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, dmu_buf_t ***dbpp, uint32_t flags); int dmu_buf_hold_noread_by_dnode(dnode_t *dn, uint64_t offset, const void *tag, dmu_buf_t **dbp); + /* * Add a reference to a dmu buffer that has already been held via * dmu_buf_hold() in the current context. @@ -873,16 +875,20 @@ int dmu_free_long_object(objset_t *os, uint64_t object); #define DMU_READ_PREFETCH 0 /* prefetch */ #define DMU_READ_NO_PREFETCH 1 /* don't prefetch */ #define DMU_READ_NO_DECRYPT 2 /* don't decrypt */ +#define DMU_DIRECTIO 4 /* use Direct I/O */ + int dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, - void *buf, uint32_t flags); + void *buf, uint32_t flags); int dmu_read_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size, void *buf, uint32_t flags); void dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, - const void *buf, dmu_tx_t *tx); -void dmu_write_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size, const void *buf, dmu_tx_t *tx); +int dmu_write_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size, + const void *buf, dmu_tx_t *tx); +int dmu_write_by_dnode_flags(dnode_t *dn, uint64_t offset, uint64_t size, + const void *buf, dmu_tx_t *tx, uint32_t flags); void dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, - dmu_tx_t *tx); + dmu_tx_t *tx); #ifdef _KERNEL int dmu_read_uio(objset_t *os, uint64_t object, zfs_uio_t *uio, uint64_t size); int dmu_read_uio_dbuf(dmu_buf_t *zdb, zfs_uio_t *uio, uint64_t size); diff --git a/include/sys/dmu_impl.h b/include/sys/dmu_impl.h index 83ae2b76ba1f..4eaa399407dd 100644 --- a/include/sys/dmu_impl.h +++ b/include/sys/dmu_impl.h @@ -35,6 +35,10 @@ #include #include #include +#include +#include +#include +#include #ifdef __cplusplus extern "C" { @@ -134,7 +138,7 @@ extern "C" { * db_data_pending * db_dirtied * db_link - * db_dirty_node (??) + * db_dirty_records * db_dirtycnt * db_d.* * db.* @@ -150,8 +154,10 @@ extern "C" { * dbuf_find: none (db_holds) * dbuf_hash_insert: none (db_holds) * dmu_buf_read_array_impl: none (db_state, db_changed) - * dmu_sync: none (db_dirty_node, db_d) + * dmu_sync: none (db_dirty_records, db_d) * dnode_reallocate: none (db) + * dmu_write_direct: none (db_dirty_records, db_d) + * dmu_write_direct_done: none (db_dirty_records, db_d) * * dn_mtx (leaf) * protects: @@ -234,8 +240,9 @@ extern "C" { * dnode_new_blkid */ -struct objset; struct dmu_pool; +struct dmu_buf; +struct zgd; typedef struct dmu_sendstatus { list_node_t dss_link; @@ -245,9 +252,30 @@ typedef struct dmu_sendstatus { uint64_t dss_blocks; /* blocks visited during the sending process */ } dmu_sendstatus_t; +/* + * dmu_sync_{ready/done} args + */ +typedef struct { + dbuf_dirty_record_t *dsa_dr; + void (*dsa_done)(struct zgd *, int); + struct zgd *dsa_zgd; + dmu_tx_t *dsa_tx; +} dmu_sync_arg_t; + +void dmu_sync_done(zio_t *, arc_buf_t *buf, void *varg); +void dmu_sync_ready(zio_t *, arc_buf_t *buf, void *varg); + void dmu_object_zapify(objset_t *, uint64_t, dmu_object_type_t, dmu_tx_t *); void dmu_object_free_zapified(objset_t *, uint64_t, dmu_tx_t *); +int dmu_write_direct(zio_t *, dmu_buf_impl_t *, abd_t *, dmu_tx_t *); +int dmu_read_abd(dnode_t *, uint64_t, uint64_t, abd_t *, uint32_t flags); +int dmu_write_abd(dnode_t *, uint64_t, uint64_t, abd_t *, uint32_t, dmu_tx_t *); +#if defined(_KERNEL) +int dmu_read_uio_direct(dnode_t *, zfs_uio_t *, uint64_t); +int dmu_write_uio_direct(dnode_t *, zfs_uio_t *, uint64_t, dmu_tx_t *); +#endif + #ifdef __cplusplus } #endif diff --git a/include/sys/dmu_objset.h b/include/sys/dmu_objset.h index a9123e862af7..587dac738bae 100644 --- a/include/sys/dmu_objset.h +++ b/include/sys/dmu_objset.h @@ -134,6 +134,7 @@ struct objset { zfs_cache_type_t os_secondary_cache; zfs_prefetch_type_t os_prefetch; zfs_sync_type_t os_sync; + zfs_direct_t os_direct; zfs_redundant_metadata_type_t os_redundant_metadata; uint64_t os_recordsize; /* diff --git a/include/sys/fm/fs/zfs.h b/include/sys/fm/fs/zfs.h index c746600cd2d5..55b150c044ee 100644 --- a/include/sys/fm/fs/zfs.h +++ b/include/sys/fm/fs/zfs.h @@ -42,6 +42,7 @@ extern "C" { #define FM_EREPORT_ZFS_DATA "data" #define FM_EREPORT_ZFS_DELAY "delay" #define FM_EREPORT_ZFS_DEADMAN "deadman" +#define FM_EREPORT_ZFS_DIO_VERIFY "dio_verify" #define FM_EREPORT_ZFS_POOL "zpool" #define FM_EREPORT_ZFS_DEVICE_UNKNOWN "vdev.unknown" #define FM_EREPORT_ZFS_DEVICE_OPEN_FAILED "vdev.open_failed" @@ -84,6 +85,7 @@ extern "C" { #define FM_EREPORT_PAYLOAD_ZFS_VDEV_IO_T "vdev_io_t" #define FM_EREPORT_PAYLOAD_ZFS_VDEV_SLOW_IO_N "vdev_slow_io_n" #define FM_EREPORT_PAYLOAD_ZFS_VDEV_SLOW_IO_T "vdev_slow_io_t" +#define FM_EREPORT_PAYLOAD_ZFS_VDEV_DIO_VERIFY_ERRORS "dio_verify_errors" #define FM_EREPORT_PAYLOAD_ZFS_VDEV_DELAYS "vdev_delays" #define FM_EREPORT_PAYLOAD_ZFS_PARENT_GUID "parent_guid" #define FM_EREPORT_PAYLOAD_ZFS_PARENT_TYPE "parent_type" diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index fc4f22cd5304..3852fa03173c 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -193,6 +193,7 @@ typedef enum { ZFS_PROP_SNAPSHOTS_CHANGED, ZFS_PROP_PREFETCH, ZFS_PROP_VOLTHREADING, + ZFS_PROP_DIRECT, ZFS_NUM_PROPS } zfs_prop_t; @@ -533,6 +534,12 @@ typedef enum { ZFS_VOLMODE_NONE = 3 } zfs_volmode_t; +typedef enum { + ZFS_DIRECT_DISABLED = 0, + ZFS_DIRECT_STANDARD, + ZFS_DIRECT_ALWAYS +} zfs_direct_t; + typedef enum zfs_keystatus { ZFS_KEYSTATUS_NONE = 0, ZFS_KEYSTATUS_UNAVAILABLE, @@ -790,6 +797,9 @@ typedef struct zpool_load_policy { /* Number of slow IOs */ #define ZPOOL_CONFIG_VDEV_SLOW_IOS "vdev_slow_ios" +/* Number of Direct I/O write verify errors */ +#define ZPOOL_CONFIG_VDEV_DIO_VERIFY_ERRORS "vdev_dio_verify_errors" + /* vdev enclosure sysfs path */ #define ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH "vdev_enc_sysfs_path" @@ -1262,6 +1272,7 @@ typedef struct vdev_stat { uint64_t vs_physical_ashift; /* vdev_physical_ashift */ uint64_t vs_noalloc; /* allocations halted? */ uint64_t vs_pspace; /* physical capacity */ + uint64_t vs_dio_verify_errors; /* DIO write verify errors */ } vdev_stat_t; #define VDEV_STAT_VALID(field, uint64_t_field_count) \ diff --git a/include/sys/spa.h b/include/sys/spa.h index aa66d489ef1a..ca30b60c0af7 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -949,6 +949,14 @@ typedef struct spa_iostats { kstat_named_t simple_trim_bytes_skipped; kstat_named_t simple_trim_extents_failed; kstat_named_t simple_trim_bytes_failed; + kstat_named_t arc_read_count; + kstat_named_t arc_read_bytes; + kstat_named_t arc_write_count; + kstat_named_t arc_write_bytes; + kstat_named_t direct_read_count; + kstat_named_t direct_read_bytes; + kstat_named_t direct_write_count; + kstat_named_t direct_write_bytes; } spa_iostats_t; extern void spa_stats_init(spa_t *spa); @@ -972,6 +980,10 @@ extern void spa_iostats_trim_add(spa_t *spa, trim_type_t type, uint64_t extents_written, uint64_t bytes_written, uint64_t extents_skipped, uint64_t bytes_skipped, uint64_t extents_failed, uint64_t bytes_failed); +extern void spa_iostats_read_add(spa_t *spa, uint64_t size, uint64_t iops, + uint32_t flags); +extern void spa_iostats_write_add(spa_t *spa, uint64_t size, uint64_t iops, + uint32_t flags); extern void spa_import_progress_add(spa_t *spa); extern void spa_import_progress_remove(uint64_t spa_guid); extern int spa_import_progress_set_mmp_check(uint64_t pool_guid, diff --git a/include/sys/uio_impl.h b/include/sys/uio_impl.h index aa34edda5f6a..90b80127246c 100644 --- a/include/sys/uio_impl.h +++ b/include/sys/uio_impl.h @@ -40,10 +40,47 @@ #define _SYS_UIO_IMPL_H #include +#include extern int zfs_uiomove(void *, size_t, zfs_uio_rw_t, zfs_uio_t *); extern int zfs_uiocopy(void *, size_t, zfs_uio_rw_t, zfs_uio_t *, size_t *); extern void zfs_uioskip(zfs_uio_t *, size_t); +extern void zfs_uio_free_dio_pages(zfs_uio_t *, zfs_uio_rw_t); +extern int zfs_uio_get_dio_pages_alloc(zfs_uio_t *, zfs_uio_rw_t); +extern boolean_t zfs_uio_page_aligned(zfs_uio_t *); + +static inline boolean_t +zfs_dio_page_aligned(void *buf) +{ + return ((((uintptr_t)(buf) & (PAGESIZE - 1)) == 0) ? + B_TRUE : B_FALSE); +} + +static inline boolean_t +zfs_dio_offset_aligned(uint64_t offset, uint64_t blksz) +{ + return (IS_P2ALIGNED(offset, blksz)); +} + +static inline boolean_t +zfs_dio_size_aligned(uint64_t size, uint64_t blksz) +{ + return ((size % blksz) == 0); +} + +static inline boolean_t +zfs_dio_aligned(uint64_t offset, uint64_t size, uint64_t blksz) +{ + return (zfs_dio_offset_aligned(offset, blksz) && + zfs_dio_size_aligned(size, blksz)); +} + +static inline boolean_t +zfs_uio_aligned(zfs_uio_t *uio, uint64_t blksz) +{ + return (zfs_dio_aligned(zfs_uio_offset(uio), zfs_uio_resid(uio), + blksz)); +} static inline void zfs_uio_iov_at_index(zfs_uio_t *uio, uint_t idx, void **base, uint64_t *len) diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h index 57ff31e89eb9..abd66b8abc96 100644 --- a/include/sys/vdev_impl.h +++ b/include/sys/vdev_impl.h @@ -448,9 +448,14 @@ struct vdev { /* * We rate limit ZIO delay, deadman, and checksum events, since they * can flood ZED with tons of events when a drive is acting up. + * + * We also rate limit Direct I/O write verify errors, since a user might + * be continually manipulating a buffer that can flood ZED with tons of + * events. */ zfs_ratelimit_t vdev_delay_rl; zfs_ratelimit_t vdev_deadman_rl; + zfs_ratelimit_t vdev_dio_verify_rl; zfs_ratelimit_t vdev_checksum_rl; /* @@ -649,6 +654,11 @@ extern uint_t zfs_vdev_max_auto_ashift; int param_set_min_auto_ashift(ZFS_MODULE_PARAM_ARGS); int param_set_max_auto_ashift(ZFS_MODULE_PARAM_ARGS); +/* + * VDEV checksum verification for Direct I/O writes + */ +extern uint_t zfs_vdev_direct_write_verify; + #ifdef __cplusplus } #endif diff --git a/include/sys/zfs_racct.h b/include/sys/zfs_racct.h index 0e8bd04c1a13..ff84cccb09a1 100644 --- a/include/sys/zfs_racct.h +++ b/include/sys/zfs_racct.h @@ -26,12 +26,13 @@ #ifndef _SYS_ZFS_RACCT_H #define _SYS_ZFS_RACCT_H -#include +#include +#include /* * Platform-dependent resource accounting hooks */ -void zfs_racct_read(uint64_t size, uint64_t iops); -void zfs_racct_write(uint64_t size, uint64_t iops); +void zfs_racct_read(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags); +void zfs_racct_write(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags); #endif /* _SYS_ZFS_RACCT_H */ diff --git a/include/sys/zfs_znode.h b/include/sys/zfs_znode.h index d71144807f47..c852c4758a91 100644 --- a/include/sys/zfs_znode.h +++ b/include/sys/zfs_znode.h @@ -308,7 +308,7 @@ extern void zfs_log_rename_whiteout(zilog_t *zilog, dmu_tx_t *tx, const char *dname, znode_t *szp, znode_t *wzp); extern void zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, znode_t *zp, offset_t off, ssize_t len, boolean_t commit, - zil_callback_t callback, void *callback_data); + boolean_t o_direct, zil_callback_t callback, void *callback_data); extern void zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype, znode_t *zp, uint64_t off, uint64_t len); extern void zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype, diff --git a/include/sys/zio.h b/include/sys/zio.h index 3a756949a422..628416e98eb9 100644 --- a/include/sys/zio.h +++ b/include/sys/zio.h @@ -225,6 +225,7 @@ typedef uint64_t zio_flag_t; #define ZIO_FLAG_NOPWRITE (1ULL << 28) #define ZIO_FLAG_REEXECUTED (1ULL << 29) #define ZIO_FLAG_DELEGATED (1ULL << 30) +#define ZIO_FLAG_DIO_CHKSUM_ERR (1ULL << 31) #define ZIO_ALLOCATOR_NONE (-1) #define ZIO_HAS_ALLOCATOR(zio) ((zio)->io_allocator != ZIO_ALLOCATOR_NONE) @@ -355,6 +356,7 @@ typedef struct zio_prop { boolean_t zp_brtwrite; boolean_t zp_encrypt; boolean_t zp_byteorder; + boolean_t zp_direct_write; uint8_t zp_salt[ZIO_DATA_SALT_LEN]; uint8_t zp_iv[ZIO_DATA_IV_LEN]; uint8_t zp_mac[ZIO_DATA_MAC_LEN]; diff --git a/include/sys/zio_impl.h b/include/sys/zio_impl.h index 2c846a5d41f6..a5e3ab2384ba 100644 --- a/include/sys/zio_impl.h +++ b/include/sys/zio_impl.h @@ -160,8 +160,9 @@ enum zio_stage { ZIO_STAGE_VDEV_IO_ASSESS = 1 << 23, /* RW--XT */ ZIO_STAGE_CHECKSUM_VERIFY = 1 << 24, /* R----- */ + ZIO_STAGE_DIO_CHECKSUM_VERIFY = 1 << 25, /* -W---- */ - ZIO_STAGE_DONE = 1 << 25 /* RWFCXT */ + ZIO_STAGE_DONE = 1 << 26 /* RWFCXT */ }; #define ZIO_ROOT_PIPELINE \ @@ -227,6 +228,10 @@ enum zio_stage { ZIO_STAGE_DVA_THROTTLE | \ ZIO_STAGE_DVA_ALLOCATE) +#define ZIO_DIRECT_WRITE_PIPELINE \ + ZIO_WRITE_PIPELINE & \ + (~ZIO_STAGE_ISSUE_ASYNC) + #define ZIO_DDT_CHILD_WRITE_PIPELINE \ (ZIO_INTERLOCK_STAGES | \ ZIO_VDEV_IO_STAGES | \ diff --git a/lib/libspl/include/sys/uio.h b/lib/libspl/include/sys/uio.h index 665bfc42301b..2cb0107d58f5 100644 --- a/lib/libspl/include/sys/uio.h +++ b/lib/libspl/include/sys/uio.h @@ -82,6 +82,32 @@ typedef struct zfs_uio { #define zfs_uio_iovlen(uio, idx) (uio)->uio_iov[(idx)].iov_len #define zfs_uio_iovbase(uio, idx) (uio)->uio_iov[(idx)].iov_base +static inline boolean_t +zfs_dio_page_aligned(void *buf) +{ + return ((((unsigned long)(buf) & (PAGESIZE - 1)) == 0) ? + B_TRUE : B_FALSE); +} + +static inline boolean_t +zfs_dio_offset_aligned(uint64_t offset, uint64_t blksz) +{ + return (IS_P2ALIGNED(offset, blksz)); +} + +static inline boolean_t +zfs_dio_size_aligned(uint64_t size, uint64_t blksz) +{ + return ((size % blksz) == 0); +} + +static inline boolean_t +zfs_dio_aligned(uint64_t offset, uint64_t size, uint64_t blksz) +{ + return (zfs_dio_offset_aligned(offset, blksz) && + zfs_dio_size_aligned(size, blksz)); +} + static inline void zfs_uio_iov_at_index(zfs_uio_t *uio, uint_t idx, void **base, uint64_t *len) { diff --git a/lib/libzfs/libzfs.abi b/lib/libzfs/libzfs.abi index 51b29643ee0c..5b0dffb03f49 100644 --- a/lib/libzfs/libzfs.abi +++ b/lib/libzfs/libzfs.abi @@ -2,6 +2,7 @@ + @@ -761,6 +762,25 @@ + + + + + + + + + + + + + + + + + + + @@ -787,9 +807,8 @@ - + - @@ -803,6 +822,12 @@ + + + + + + @@ -1260,18 +1285,16 @@ - + - - + - @@ -1289,6 +1312,19 @@ + + + + + + + + + + + + + @@ -1437,6 +1473,10 @@ + + + + @@ -1570,10 +1610,6 @@ - - - - @@ -1686,6 +1722,7 @@ + @@ -1880,7 +1917,8 @@ - + + @@ -1928,54 +1966,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -2031,6 +2021,54 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -2195,6 +2233,9 @@ + + + @@ -2358,6 +2399,14 @@ + + + + + + + + @@ -2374,6 +2423,13 @@ + + + + + + + @@ -2441,7 +2497,6 @@ - @@ -2790,14 +2845,6 @@ - - - - - - - - @@ -2811,6 +2858,11 @@ + + + + + @@ -2966,18 +3018,6 @@ - - - - - - - - - - - - @@ -3298,6 +3338,18 @@ + + + + + + + + + + + + @@ -3499,18 +3551,42 @@ + + + + + + + + + + + + + + + + + + - + - + + + + + + + @@ -3532,6 +3608,12 @@ + + + + + + @@ -3556,6 +3638,12 @@ + + + + + + @@ -3567,40 +3655,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -3771,6 +3825,55 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -3798,85 +3901,36 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + - - + + - - + + + + + + + - + - + - - - - + - + - - - - + - @@ -4377,11 +4431,15 @@ - + + - - - + + + + + + @@ -4412,6 +4470,12 @@ + + + + + + @@ -4424,19 +4488,6 @@ - - - - - - - - - - - - - @@ -5102,26 +5153,31 @@ - + + + + + + + + + + + + - + - - - - - - @@ -5322,9 +5378,6 @@ - - - @@ -5438,6 +5491,9 @@ + + + @@ -5509,14 +5565,14 @@ - - - - + + + + @@ -5528,19 +5584,13 @@ - + + - - - - - - - + - @@ -6209,6 +6259,10 @@ + + + + @@ -6347,6 +6401,11 @@ + + + + + @@ -6359,17 +6418,6 @@ - - - - - - - - - - - @@ -6380,6 +6428,11 @@ + + + + + @@ -7407,23 +7460,6 @@ - - - - - - - - - - - - - - - - - @@ -7470,6 +7506,23 @@ + + + + + + + + + + + + + + + + + @@ -7664,6 +7717,12 @@ + + + + + + @@ -7681,6 +7740,12 @@ + + + + + + @@ -7725,12 +7790,6 @@ - - - - - - @@ -8032,10 +8091,6 @@ - - - - @@ -8103,6 +8158,11 @@ + + + + + @@ -8120,9 +8180,26 @@ - + + + + + + + + + + + + + + + + + + + - @@ -8159,6 +8236,12 @@ + + + + + + @@ -8198,31 +8281,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - @@ -8393,9 +8451,6 @@ - - - @@ -8409,6 +8464,9 @@ + + + @@ -8594,12 +8652,11 @@ - - - + + + - - + @@ -8611,6 +8668,12 @@ + + + + + + @@ -8659,11 +8722,10 @@ - + - @@ -8689,10 +8751,10 @@ + - @@ -9233,7 +9295,6 @@ - @@ -9251,6 +9312,7 @@ + diff --git a/lib/libzpool/Makefile.am b/lib/libzpool/Makefile.am index ff30af7d2b9f..cfe3cf9aa4f0 100644 --- a/lib/libzpool/Makefile.am +++ b/lib/libzpool/Makefile.am @@ -87,6 +87,7 @@ nodist_libzpool_la_SOURCES = \ module/zfs/ddt_zap.c \ module/zfs/dmu.c \ module/zfs/dmu_diff.c \ + module/zfs/dmu_direct.c \ module/zfs/dmu_object.c \ module/zfs/dmu_objset.c \ module/zfs/dmu_recv.c \ diff --git a/lib/libzpool/abd_os.c b/lib/libzpool/abd_os.c index 5a91605b2fe3..8531b8f40ace 100644 --- a/lib/libzpool/abd_os.c +++ b/lib/libzpool/abd_os.c @@ -363,3 +363,67 @@ void abd_cache_reap_now(void) { } + +/* + * Borrow a raw buffer from an ABD without copying the contents of the ABD + * into the buffer. If the ABD is scattered, this will alloate a raw buffer + * whose contents are undefined. To copy over the existing data in the ABD, use + * abd_borrow_buf_copy() instead. + */ +void * +abd_borrow_buf(abd_t *abd, size_t n) +{ + void *buf; + abd_verify(abd); + ASSERT3U(abd->abd_size, >=, 0); + if (abd_is_linear(abd)) { + buf = abd_to_buf(abd); + } else { + buf = zio_buf_alloc(n); + } +#ifdef ZFS_DEBUG + (void) zfs_refcount_add_many(&abd->abd_children, n, buf); +#endif + return (buf); +} + +void * +abd_borrow_buf_copy(abd_t *abd, size_t n) +{ + void *buf = abd_borrow_buf(abd, n); + if (!abd_is_linear(abd)) { + abd_copy_to_buf(buf, abd, n); + } + return (buf); +} + +/* + * Return a borrowed raw buffer to an ABD. If the ABD is scattered, this will + * no change the contents of the ABD and will ASSERT that you didn't modify + * the buffer since it was borrowed. If you want any changes you made to buf to + * be copied back to abd, use abd_return_buf_copy() instead. + */ +void +abd_return_buf(abd_t *abd, void *buf, size_t n) +{ + abd_verify(abd); + ASSERT3U(abd->abd_size, >=, n); +#ifdef ZFS_DEBUG + (void) zfs_refcount_remove_many(&abd->abd_children, n, buf); +#endif + if (abd_is_linear(abd)) { + ASSERT3P(buf, ==, abd_to_buf(abd)); + } else { + ASSERT0(abd_cmp_buf(abd, buf, n)); + zio_buf_free(buf, n); + } +} + +void +abd_return_buf_copy(abd_t *abd, void *buf, size_t n) +{ + if (!abd_is_linear(abd)) { + abd_copy_from_buf(abd, buf, n); + } + abd_return_buf(abd, buf, n); +} diff --git a/man/man4/zfs.4 b/man/man4/zfs.4 index 20bb95c1aeea..1f6be3963a3b 100644 --- a/man/man4/zfs.4 +++ b/man/man4/zfs.4 @@ -291,6 +291,14 @@ Default dnode block size as a power of 2. .It Sy zfs_default_ibs Ns = Ns Sy 17 Po 128 KiB Pc Pq int Default dnode indirect block size as a power of 2. . +.It Sy zfs_dio_enabled Ns = Ns Sy 0 Ns | Ns 1 Pq int +Enable Direct I/O. +If this setting is 0, then all I/O requests will be directed through the ARC +acting as though the dataset property +.Sy direct +was set to +.Sy disabled . +. .It Sy zfs_history_output_max Ns = Ns Sy 1048576 Ns B Po 1 MiB Pc Pq u64 When attempting to log an output nvlist of an ioctl in the on-disk history, the output will not be stored if it is larger than this size (in bytes). @@ -416,6 +424,26 @@ May be increased up to .Sy ASHIFT_MAX Po 16 Pc , but this may negatively impact pool space efficiency. . +.It Sy zfs_vdev_direct_write_verify Ns = Ns Sy Linux 1 | FreeBSD 0 Pq uint +If non-zero, then a Direct I/O write's checksum will be verified every +time the write is issued and before it is commited to the block pointer. +In the event the checksum is not valid then the I/O operation will return EIO. +This module parameter can be used to detect if the +contents of the users buffer have changed in the process of doing a Direct I/O +write. +It can also help to identify if reported checksum errors are tied to Direct I/O +writes. +Each verify error causes a +.Sy dio_verify +zevent. +Direct Write I/O checkum verify errors can be seen with +.Nm zpool Cm status Fl d . +The default value for this is 1 on Linux, but is 0 for +.Fx +because user pages can be placed under write protection in +.Fx +before the Direct I/O write is issued. +. .It Sy zfs_vdev_min_auto_ashift Ns = Ns Sy ASHIFT_MIN Po 9 Pc Pq uint Minimum ashift used when creating new top-level vdevs. . @@ -1093,6 +1121,9 @@ This will smoothly handle between ten times and a tenth of this number. .Pp .Sy zfs_delay_scale No \(mu Sy zfs_dirty_data_max Em must No be smaller than Sy 2^64 . . +.It Sy zfs_dio_write_verify_events_per_second Ns = Ns Sy 20 Ns /s Pq uint +Rate limit Direct I/O write verify events to this many per second. +. .It Sy zfs_disable_ivset_guid_check Ns = Ns Sy 0 Ns | Ns 1 Pq int Disables requirement for IVset GUIDs to be present and match when doing a raw receive of encrypted datasets. diff --git a/man/man7/zfsprops.7 b/man/man7/zfsprops.7 index f7026119b730..fa228e9bd7fe 100644 --- a/man/man7/zfsprops.7 +++ b/man/man7/zfsprops.7 @@ -1039,6 +1039,48 @@ See the section of .Xr zfsconcepts 7 . .It Xo +.Sy direct Ns = Ns Sy disabled Ns | Ns Sy standard Ns | Ns Sy always +.Xc +Controls the behavior of Direct I/O requests +.Pq e.g. Dv O_DIRECT . +The +.Sy standard +behavior for Direct I/O requests is to bypass the ARC when possible. +These requests will not be cached and performance will be limited by the +raw speed of the underlying disks +.Pq Dv this is the default . +.Sy always +causes every properly aligned read or write to be treated as a direct request. +.Sy disabled +causes the O_DIRECT flag to be silently ignored and all direct requests will +be handled by the ARC. +This is the default behavior for OpenZFS 2.2 and prior releases. +.Pp +Bypassing the ARC requires that a direct request be correctly aligned. +For write requests the starting offset and size of the request must be +.Sy recordsize Ns +-aligned, if not then the unaligned portion of the request will be silently +redirected through the ARC. +For read requests there is no +.Sy recordsize +alignment restriction on either the starting offset or size. +All direct requests must use a page-aligned memory buffer and the request +size must be a multiple of the page size or an error is returned. +.Pp +Concurrently mixing buffered and direct requests to overlapping regions of +a file can decrease performance. +However, the resulting file will always be coherent. +For example, a direct read after a buffered write will return the data +from the buffered write. +Furthermore, if an application uses +.Xr mmap 2 +based file access then in order to maintain coherency all direct requests +are converted to buffered requests while the file is mapped. +Currently Direct I/O is not supported with zvols. +If dedup is enabled on a dataset, Direct I/O writes will not check for +deduplication. +Deduplication and Direct I/O writes are currently incompatible. +.It Xo .Sy dnodesize Ns = Ns Sy legacy Ns | Ns Sy auto Ns | Ns Sy 1k Ns | Ns .Sy 2k Ns | Ns Sy 4k Ns | Ns Sy 8k Ns | Ns Sy 16k .Xc diff --git a/man/man8/zpool-events.8 b/man/man8/zpool-events.8 index ef20ef4e003c..234612baea8d 100644 --- a/man/man8/zpool-events.8 +++ b/man/man8/zpool-events.8 @@ -98,6 +98,17 @@ This can be an indicator of problems with the underlying storage device. The number of delay events is ratelimited by the .Sy zfs_slow_io_events_per_second module parameter. +.It Sy dio_verify +Issued when there was a checksum verify error after a Direct I/O write has been +issued. +This event can only take place if the module parameter +.Sy zfs_vdev_direct_write_verify +is not set to zero. +See +.Xr zfs 4 +for more details on the +.Sy zfs_vdev_direct_write_verify +module paramter. .It Sy config Issued every time a vdev change have been done to the pool. .It Sy zpool @@ -408,8 +419,9 @@ ZIO_STAGE_VDEV_IO_DONE:0x00400000:RW--XT ZIO_STAGE_VDEV_IO_ASSESS:0x00800000:RW--XT ZIO_STAGE_CHECKSUM_VERIFY:0x01000000:R----- +ZIO_STAGE_DIO_CHECKSUM_VERIFY:0x02000000:-W---- -ZIO_STAGE_DONE:0x02000000:RWFCXT +ZIO_STAGE_DONE:0x04000000:RWFCXT .TE . .Sh I/O FLAGS diff --git a/man/man8/zpool-status.8 b/man/man8/zpool-status.8 index b40faeb9977f..868fc4414dbb 100644 --- a/man/man8/zpool-status.8 +++ b/man/man8/zpool-status.8 @@ -36,7 +36,7 @@ .Sh SYNOPSIS .Nm zpool .Cm status -.Op Fl DegiLpPstvx +.Op Fl dDegiLpPstvx .Op Fl T Sy u Ns | Ns Sy d .Op Fl c Op Ar SCRIPT1 Ns Oo , Ns Ar SCRIPT2 Oc Ns … .Oo Ar pool Oc Ns … @@ -81,6 +81,15 @@ to display vdevs in flat hierarchy instead of nested vdev objects. Specify .Sy --json-pool-key-guid to set pool GUID as key for pool objects instead of pool names. +.It Fl d +Display the number of Direct I/O write checksum verify errors that have occured +on a top-level VDEV. +See +.Sx zfs_vdev_direct_write_verify +in +.Xr zfs 4 +for details about the conditions that can cause Direct I/O write checksum +verify failures to occur. .It Fl D Display a histogram of deduplication statistics, showing the allocated .Pq physically present on disk diff --git a/module/Kbuild.in b/module/Kbuild.in index 0472a9348c13..d96347bad438 100644 --- a/module/Kbuild.in +++ b/module/Kbuild.in @@ -327,6 +327,7 @@ ZFS_OBJS := \ ddt_stats.o \ ddt_zap.o \ dmu.o \ + dmu_direct.o \ dmu_diff.o \ dmu_object.o \ dmu_objset.o \ diff --git a/module/Makefile.bsd b/module/Makefile.bsd index 9161204c99d3..188f5ad2d1ca 100644 --- a/module/Makefile.bsd +++ b/module/Makefile.bsd @@ -257,6 +257,7 @@ SRCS+= abd.c \ ddt_stats.c \ ddt_zap.c \ dmu.c \ + dmu_direct.c \ dmu_diff.c \ dmu_object.c \ dmu_objset.c \ diff --git a/module/os/freebsd/spl/spl_uio.c b/module/os/freebsd/spl/spl_uio.c index 17886cbeb501..74cbe36bbd9b 100644 --- a/module/os/freebsd/spl/spl_uio.c +++ b/module/os/freebsd/spl/spl_uio.c @@ -44,6 +44,10 @@ #include #include #include +#include +#include +#include +#include static void zfs_freeuio(struct uio *uio) @@ -115,3 +119,200 @@ zfs_uio_fault_move(void *p, size_t n, zfs_uio_rw_t dir, zfs_uio_t *uio) ASSERT3U(zfs_uio_rw(uio), ==, dir); return (vn_io_fault_uiomove(p, n, GET_UIO_STRUCT(uio))); } + +/* + * Check if the uio is page-aligned in memory. + */ +boolean_t +zfs_uio_page_aligned(zfs_uio_t *uio) +{ + const struct iovec *iov = GET_UIO_STRUCT(uio)->uio_iov; + + for (int i = zfs_uio_iovcnt(uio); i > 0; iov++, i--) { + uintptr_t addr = (uintptr_t)iov->iov_base; + size_t size = iov->iov_len; + if ((addr & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { + return (B_FALSE); + } + } + + return (B_TRUE); +} + +static void +zfs_uio_set_pages_to_stable(zfs_uio_t *uio) +{ + ASSERT3P(uio->uio_dio.pages, !=, NULL); + ASSERT3S(uio->uio_dio.npages, >, 0); + + for (int i = 0; i < uio->uio_dio.npages; i++) { + vm_page_t page = uio->uio_dio.pages[i]; + ASSERT3P(page, !=, NULL); + + MPASS(page == PHYS_TO_VM_PAGE(VM_PAGE_TO_PHYS(page))); + vm_page_busy_acquire(page, VM_ALLOC_SBUSY); + pmap_remove_write(page); + } +} + +static void +zfs_uio_release_stable_pages(zfs_uio_t *uio) +{ + ASSERT3P(uio->uio_dio.pages, !=, NULL); + for (int i = 0; i < uio->uio_dio.npages; i++) { + vm_page_t page = uio->uio_dio.pages[i]; + + ASSERT3P(page, !=, NULL); + vm_page_sunbusy(page); + } +} + +/* + * If the operation is marked as read, then we are stating the pages will be + * written to and must be given write access. + */ +static int +zfs_uio_hold_pages(unsigned long start, size_t len, int nr_pages, + zfs_uio_rw_t rw, vm_page_t *pages) +{ + vm_map_t map; + vm_prot_t prot; + int count; + + map = &curthread->td_proc->p_vmspace->vm_map; + ASSERT3S(len, >, 0); + + prot = rw == UIO_READ ? (VM_PROT_READ | VM_PROT_WRITE) : VM_PROT_READ; + count = vm_fault_quick_hold_pages(map, start, len, prot, pages, + nr_pages); + + return (count); +} + +void +zfs_uio_free_dio_pages(zfs_uio_t *uio, zfs_uio_rw_t rw) +{ + ASSERT(uio->uio_extflg & UIO_DIRECT); + ASSERT3P(uio->uio_dio.pages, !=, NULL); + ASSERT(zfs_uio_rw(uio) == rw); + + if (rw == UIO_WRITE) + zfs_uio_release_stable_pages(uio); + + vm_page_unhold_pages(&uio->uio_dio.pages[0], + uio->uio_dio.npages); + + kmem_free(uio->uio_dio.pages, + uio->uio_dio.npages * sizeof (vm_page_t)); +} + +static int +zfs_uio_get_user_pages(unsigned long start, int nr_pages, + size_t len, zfs_uio_rw_t rw, vm_page_t *pages) +{ + int count; + + count = zfs_uio_hold_pages(start, len, nr_pages, rw, pages); + + if (count != nr_pages) { + if (count > 0) + vm_page_unhold_pages(pages, count); + return (0); + } + + ASSERT3S(count, ==, nr_pages); + + return (count); +} + +static int +zfs_uio_iov_step(struct iovec v, zfs_uio_t *uio, int *numpages) +{ + unsigned long addr = (unsigned long)(v.iov_base); + size_t len = v.iov_len; + int n = DIV_ROUND_UP(len, PAGE_SIZE); + + int res = zfs_uio_get_user_pages( + P2ALIGN_TYPED(addr, PAGE_SIZE, unsigned long), n, len, + zfs_uio_rw(uio), &uio->uio_dio.pages[uio->uio_dio.npages]); + + if (res != n) + return (SET_ERROR(EFAULT)); + + ASSERT3U(len, ==, res * PAGE_SIZE); + *numpages = res; + return (0); +} + +static int +zfs_uio_get_dio_pages_impl(zfs_uio_t *uio) +{ + const struct iovec *iovp = GET_UIO_STRUCT(uio)->uio_iov; + size_t len = zfs_uio_resid(uio); + + for (int i = 0; i < zfs_uio_iovcnt(uio); i++) { + struct iovec iov; + int numpages = 0; + + if (iovp->iov_len == 0) { + iovp++; + continue; + } + iov.iov_len = MIN(len, iovp->iov_len); + iov.iov_base = iovp->iov_base; + int error = zfs_uio_iov_step(iov, uio, &numpages); + + if (error) + return (error); + + uio->uio_dio.npages += numpages; + len -= iov.iov_len; + iovp++; + } + + ASSERT0(len); + + return (0); +} + +/* + * This function holds user pages into the kernel. In the event that the user + * pages are not successfully held an error value is returned. + * + * On success, 0 is returned. + */ +int +zfs_uio_get_dio_pages_alloc(zfs_uio_t *uio, zfs_uio_rw_t rw) +{ + int error = 0; + int npages = DIV_ROUND_UP(zfs_uio_resid(uio), PAGE_SIZE); + size_t size = npages * sizeof (vm_page_t); + + ASSERT(zfs_uio_rw(uio) == rw); + + uio->uio_dio.pages = kmem_alloc(size, KM_SLEEP); + + error = zfs_uio_get_dio_pages_impl(uio); + + if (error) { + vm_page_unhold_pages(&uio->uio_dio.pages[0], + uio->uio_dio.npages); + kmem_free(uio->uio_dio.pages, size); + return (error); + } + + ASSERT3S(uio->uio_dio.npages, >, 0); + + /* + * Since we will be writing the user pages we must make sure that + * they are stable. That way the contents of the pages can not change + * while we are doing: compression, checksumming, encryption, parity + * calculations or deduplication. + */ + if (zfs_uio_rw(uio) == UIO_WRITE) + zfs_uio_set_pages_to_stable(uio); + + uio->uio_extflg |= UIO_DIRECT; + + return (0); +} diff --git a/module/os/freebsd/zfs/abd_os.c b/module/os/freebsd/zfs/abd_os.c index f24ea3dc7685..f20dc5d8c325 100644 --- a/module/os/freebsd/zfs/abd_os.c +++ b/module/os/freebsd/zfs/abd_os.c @@ -32,6 +32,7 @@ #include #include #include +#include typedef struct abd_stats { kstat_named_t abdstat_struct_size; @@ -135,7 +136,9 @@ abd_size_alloc_linear(size_t size) void abd_update_scatter_stats(abd_t *abd, abd_stats_op_t op) { - uint_t n = abd_scatter_chunkcnt(abd); + uint_t n; + + n = abd_scatter_chunkcnt(abd); ASSERT(op == ABDSTAT_INCR || op == ABDSTAT_DECR); int waste = (n << PAGE_SHIFT) - abd->abd_size; if (op == ABDSTAT_INCR) { @@ -198,10 +201,16 @@ abd_free_chunks(abd_t *abd) { uint_t i, n; - n = abd_scatter_chunkcnt(abd); - for (i = 0; i < n; i++) { - kmem_cache_free(abd_chunk_cache, - ABD_SCATTER(abd).abd_chunks[i]); + /* + * Scatter ABDs may be constructed by abd_alloc_from_pages() from + * an array of pages. In which case they should not be freed. + */ + if (!abd_is_from_pages(abd)) { + n = abd_scatter_chunkcnt(abd); + for (i = 0; i < n; i++) { + kmem_cache_free(abd_chunk_cache, + ABD_SCATTER(abd).abd_chunks[i]); + } } } @@ -342,11 +351,8 @@ abd_fini(void) void abd_free_linear_page(abd_t *abd) { - /* - * FreeBSD does not have scatter linear pages - * so there is an error. - */ - VERIFY(0); + ASSERT3P(abd->abd_u.abd_linear.sf, !=, NULL); + zfs_unmap_page(abd->abd_u.abd_linear.sf); } /* @@ -365,6 +371,26 @@ abd_alloc_for_io(size_t size, boolean_t is_metadata) return (abd_alloc_linear(size, is_metadata)); } +static abd_t * +abd_get_offset_from_pages(abd_t *abd, abd_t *sabd, size_t chunkcnt, + size_t new_offset) +{ + ASSERT(abd_is_from_pages(sabd)); + + /* + * Set the child child chunks to point at the parent chunks as + * the chunks are just pages and we don't want to copy them. + */ + size_t parent_offset = new_offset / PAGE_SIZE; + ASSERT3U(parent_offset, <, abd_scatter_chunkcnt(sabd)); + for (int i = 0; i < chunkcnt; i++) + ABD_SCATTER(abd).abd_chunks[i] = + ABD_SCATTER(sabd).abd_chunks[parent_offset + i]; + + abd->abd_flags |= ABD_FLAG_FROM_PAGES; + return (abd); +} + abd_t * abd_get_offset_scatter(abd_t *abd, abd_t *sabd, size_t off, size_t size) @@ -399,6 +425,11 @@ abd_get_offset_scatter(abd_t *abd, abd_t *sabd, size_t off, ABD_SCATTER(abd).abd_offset = new_offset & PAGE_MASK; + if (abd_is_from_pages(sabd)) { + return (abd_get_offset_from_pages(abd, sabd, chunkcnt, + new_offset)); + } + /* Copy the scatterlist starting at the correct offset */ (void) memcpy(&ABD_SCATTER(abd).abd_chunks, &ABD_SCATTER(sabd).abd_chunks[new_offset >> PAGE_SHIFT], @@ -407,6 +438,44 @@ abd_get_offset_scatter(abd_t *abd, abd_t *sabd, size_t off, return (abd); } +/* + * Allocate a scatter ABD structure from user pages. + */ +abd_t * +abd_alloc_from_pages(vm_page_t *pages, unsigned long offset, uint64_t size) +{ + VERIFY3U(size, <=, DMU_MAX_ACCESS); + ASSERT3U(offset, <, PAGE_SIZE); + ASSERT3P(pages, !=, NULL); + + abd_t *abd = abd_alloc_struct(size); + abd->abd_flags |= ABD_FLAG_OWNER | ABD_FLAG_FROM_PAGES; + abd->abd_size = size; + + if ((offset + size) <= PAGE_SIZE) { + /* + * There is only a single page worth of data, so we will just + * use a linear ABD. We have to make sure to take into account + * the offset though. In all other cases our offset will be 0 + * as we are always PAGE_SIZE aligned. + */ + abd->abd_flags |= ABD_FLAG_LINEAR | ABD_FLAG_LINEAR_PAGE; + ABD_LINEAR_BUF(abd) = (char *)zfs_map_page(pages[0], + &abd->abd_u.abd_linear.sf) + offset; + } else { + ABD_SCATTER(abd).abd_offset = offset; + ASSERT0(ABD_SCATTER(abd).abd_offset); + + /* + * Setting the ABD's abd_chunks to point to the user pages. + */ + for (int i = 0; i < abd_chunkcnt_for_bytes(size); i++) + ABD_SCATTER(abd).abd_chunks[i] = pages[i]; + } + + return (abd); +} + /* * Initialize the abd_iter. */ @@ -468,6 +537,16 @@ abd_iter_map(struct abd_iter *aiter) if (abd_is_linear(abd)) { aiter->iter_mapsize = abd->abd_size - offset; paddr = ABD_LINEAR_BUF(abd); + } else if (abd_is_from_pages(abd)) { + aiter->sf = NULL; + offset += ABD_SCATTER(abd).abd_offset; + size_t index = offset / PAGE_SIZE; + offset &= PAGE_MASK; + aiter->iter_mapsize = MIN(PAGE_SIZE - offset, + abd->abd_size - aiter->iter_pos); + paddr = zfs_map_page( + ABD_SCATTER(aiter->iter_abd).abd_chunks[index], + &aiter->sf); } else { offset += ABD_SCATTER(abd).abd_offset; paddr = ABD_SCATTER(abd).abd_chunks[offset >> PAGE_SHIFT]; @@ -490,6 +569,12 @@ abd_iter_unmap(struct abd_iter *aiter) ASSERT3U(aiter->iter_mapsize, >, 0); } + if (abd_is_from_pages(aiter->iter_abd) && + !abd_is_linear_page(aiter->iter_abd)) { + ASSERT3P(aiter->sf, !=, NULL); + zfs_unmap_page(aiter->sf); + } + aiter->iter_mapaddr = NULL; aiter->iter_mapsize = 0; } @@ -499,3 +584,67 @@ abd_cache_reap_now(void) { kmem_cache_reap_soon(abd_chunk_cache); } + +/* + * Borrow a raw buffer from an ABD without copying the contents of the ABD + * into the buffer. If the ABD is scattered, this will alloate a raw buffer + * whose contents are undefined. To copy over the existing data in the ABD, use + * abd_borrow_buf_copy() instead. + */ +void * +abd_borrow_buf(abd_t *abd, size_t n) +{ + void *buf; + abd_verify(abd); + ASSERT3U(abd->abd_size, >=, 0); + if (abd_is_linear(abd)) { + buf = abd_to_buf(abd); + } else { + buf = zio_buf_alloc(n); + } +#ifdef ZFS_DEBUG + (void) zfs_refcount_add_many(&abd->abd_children, n, buf); +#endif + return (buf); +} + +void * +abd_borrow_buf_copy(abd_t *abd, size_t n) +{ + void *buf = abd_borrow_buf(abd, n); + if (!abd_is_linear(abd)) { + abd_copy_to_buf(buf, abd, n); + } + return (buf); +} + +/* + * Return a borrowed raw buffer to an ABD. If the ABD is scattered, this will + * no change the contents of the ABD and will ASSERT that you didn't modify + * the buffer since it was borrowed. If you want any changes you made to buf to + * be copied back to abd, use abd_return_buf_copy() instead. + */ +void +abd_return_buf(abd_t *abd, void *buf, size_t n) +{ + abd_verify(abd); + ASSERT3U(abd->abd_size, >=, n); +#ifdef ZFS_DEBUG + (void) zfs_refcount_remove_many(&abd->abd_children, n, buf); +#endif + if (abd_is_linear(abd)) { + ASSERT3P(buf, ==, abd_to_buf(abd)); + } else { + ASSERT0(abd_cmp_buf(abd, buf, n)); + zio_buf_free(buf, n); + } +} + +void +abd_return_buf_copy(abd_t *abd, void *buf, size_t n) +{ + if (!abd_is_linear(abd)) { + abd_copy_from_buf(abd, buf, n); + } + abd_return_buf(abd, buf, n); +} diff --git a/module/os/freebsd/zfs/zfs_racct.c b/module/os/freebsd/zfs/zfs_racct.c index 883255bc1901..2989a9af9235 100644 --- a/module/os/freebsd/zfs/zfs_racct.c +++ b/module/os/freebsd/zfs/zfs_racct.c @@ -27,7 +27,7 @@ #include void -zfs_racct_read(uint64_t size, uint64_t iops) +zfs_racct_read(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags) { curthread->td_ru.ru_inblock += iops; #ifdef RACCT @@ -40,10 +40,12 @@ zfs_racct_read(uint64_t size, uint64_t iops) #else (void) size; #endif /* RACCT */ + + spa_iostats_read_add(spa, size, iops, flags); } void -zfs_racct_write(uint64_t size, uint64_t iops) +zfs_racct_write(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags) { curthread->td_ru.ru_oublock += iops; #ifdef RACCT @@ -56,4 +58,6 @@ zfs_racct_write(uint64_t size, uint64_t iops) #else (void) size; #endif /* RACCT */ + + spa_iostats_write_add(spa, size, iops, flags); } diff --git a/module/os/freebsd/zfs/zfs_vnops_os.c b/module/os/freebsd/zfs/zfs_vnops_os.c index 01b964f98f3a..5dbca10a3ee3 100644 --- a/module/os/freebsd/zfs/zfs_vnops_os.c +++ b/module/os/freebsd/zfs/zfs_vnops_os.c @@ -4131,7 +4131,7 @@ zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags, * but that would make the locking messier */ zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, - len, commit, NULL, NULL); + len, commit, B_FALSE, NULL, NULL); zfs_vmobject_wlock(object); for (i = 0; i < ncount; i++) { @@ -4266,6 +4266,8 @@ ioflags(int ioflags) flags |= O_APPEND; if (ioflags & IO_NDELAY) flags |= O_NONBLOCK; + if (ioflags & IO_DIRECT) + flags |= O_DIRECT; if (ioflags & IO_SYNC) flags |= O_SYNC; @@ -4285,9 +4287,36 @@ static int zfs_freebsd_read(struct vop_read_args *ap) { zfs_uio_t uio; + int error = 0; zfs_uio_init(&uio, ap->a_uio); - return (zfs_read(VTOZ(ap->a_vp), &uio, ioflags(ap->a_ioflag), - ap->a_cred)); + error = zfs_read(VTOZ(ap->a_vp), &uio, ioflags(ap->a_ioflag), + ap->a_cred); + /* + * XXX We occasionally get an EFAULT for Direct I/O reads on + * FreeBSD 13. This still needs to be resolved. The EFAULT comes + * from: + * zfs_uio_get__dio_pages_alloc() -> + * zfs_uio_get_dio_pages_impl() -> + * zfs_uio_iov_step() -> + * zfs_uio_get_user_pages(). + * We return EFAULT from zfs_uio_iov_step(). When a Direct I/O + * read fails to map in the user pages (returning EFAULT) the + * Direct I/O request is broken up into two separate IO requests + * and issued separately using Direct I/O. + */ +#ifdef ZFS_DEBUG + if (error == EFAULT && uio.uio_extflg & UIO_DIRECT) { +#if 0 + printf("%s(%d): Direct I/O read returning EFAULT " + "uio = %p, zfs_uio_offset(uio) = %lu " + "zfs_uio_resid(uio) = %lu\n", + __FUNCTION__, __LINE__, &uio, zfs_uio_offset(&uio), + zfs_uio_resid(&uio)); +#endif + } + +#endif + return (error); } #ifndef _SYS_SYSPROTO_H_ diff --git a/module/os/freebsd/zfs/zvol_os.c b/module/os/freebsd/zfs/zvol_os.c index ddb20b031448..c3be4730d4b6 100644 --- a/module/os/freebsd/zfs/zvol_os.c +++ b/module/os/freebsd/zfs/zvol_os.c @@ -922,6 +922,7 @@ zvol_cdev_write(struct cdev *dev, struct uio *uio_s, int ioflag) if (commit) zil_commit(zv->zv_zilog, ZVOL_OBJ); rw_exit(&zv->zv_suspend_lock); + return (error); } diff --git a/module/os/linux/spl/spl-condvar.c b/module/os/linux/spl/spl-condvar.c index 5898789ad53d..1cb0aeebcb67 100644 --- a/module/os/linux/spl/spl-condvar.c +++ b/module/os/linux/spl/spl-condvar.c @@ -31,10 +31,7 @@ #include #include - -#ifdef HAVE_SCHED_SIGNAL_HEADER #include -#endif #define MAX_HRTIMEOUT_SLACK_US 1000 static unsigned int spl_schedule_hrtimeout_slack_us = 0; @@ -209,48 +206,6 @@ __cv_wait_idle(kcondvar_t *cvp, kmutex_t *mp) } EXPORT_SYMBOL(__cv_wait_idle); -#if defined(HAVE_IO_SCHEDULE_TIMEOUT) -#define spl_io_schedule_timeout(t) io_schedule_timeout(t) -#else - -struct spl_task_timer { - struct timer_list timer; - struct task_struct *task; -}; - -static void -__cv_wakeup(spl_timer_list_t t) -{ - struct timer_list *tmr = (struct timer_list *)t; - struct spl_task_timer *task_timer = from_timer(task_timer, tmr, timer); - - wake_up_process(task_timer->task); -} - -static long -spl_io_schedule_timeout(long time_left) -{ - long expire_time = jiffies + time_left; - struct spl_task_timer task_timer; - struct timer_list *timer = &task_timer.timer; - - task_timer.task = current; - - timer_setup(timer, __cv_wakeup, 0); - - timer->expires = expire_time; - add_timer(timer); - - io_schedule(); - - del_timer_sync(timer); - - time_left = expire_time - jiffies; - - return (time_left < 0 ? 0 : time_left); -} -#endif - /* * 'expire_time' argument is an absolute wall clock time in jiffies. * Return value is time left (expire_time - now) or -1 if timeout occurred. @@ -290,7 +245,7 @@ __cv_timedwait_common(kcondvar_t *cvp, kmutex_t *mp, clock_t expire_time, */ mutex_exit(mp); if (io) - time_left = spl_io_schedule_timeout(time_left); + time_left = io_schedule_timeout(time_left); else time_left = schedule_timeout(time_left); diff --git a/module/os/linux/spl/spl-cred.c b/module/os/linux/spl/spl-cred.c index d407fc66b2de..7254df6bf294 100644 --- a/module/os/linux/spl/spl-cred.c +++ b/module/os/linux/spl/spl-cred.c @@ -74,26 +74,13 @@ crgetngroups(const cred_t *cr) gi = cr->group_info; rc = gi->ngroups; -#ifndef HAVE_GROUP_INFO_GID - /* - * For Linux <= 4.8, - * crgetgroups will only returns gi->blocks[0], which contains only - * the first NGROUPS_PER_BLOCK groups. - */ - if (rc > NGROUPS_PER_BLOCK) { - WARN_ON_ONCE(1); - rc = NGROUPS_PER_BLOCK; - } -#endif + return (rc); } /* * Return an array of supplemental gids. The returned address is safe * to use as long as the caller has taken a reference with crhold(). - * - * Linux 4.9 API change, group_info changed from 2d array via ->blocks to 1d - * array via ->gid. */ gid_t * crgetgroups(const cred_t *cr) @@ -102,12 +89,8 @@ crgetgroups(const cred_t *cr) gid_t *gids = NULL; gi = cr->group_info; -#ifdef HAVE_GROUP_INFO_GID gids = KGIDP_TO_SGIDP(gi->gid); -#else - if (gi->nblocks > 0) - gids = KGIDP_TO_SGIDP(gi->blocks[0]); -#endif + return (gids); } diff --git a/module/os/linux/spl/spl-generic.c b/module/os/linux/spl/spl-generic.c index 6ee0236d289a..6a95d77ac278 100644 --- a/module/os/linux/spl/spl-generic.c +++ b/module/os/linux/spl/spl-generic.c @@ -623,26 +623,6 @@ ddi_copyout(const void *from, void *to, size_t len, int flags) } EXPORT_SYMBOL(ddi_copyout); -static ssize_t -spl_kernel_read(struct file *file, void *buf, size_t count, loff_t *pos) -{ -#if defined(HAVE_KERNEL_READ_PPOS) - return (kernel_read(file, buf, count, pos)); -#else - mm_segment_t saved_fs; - ssize_t ret; - - saved_fs = get_fs(); - set_fs(KERNEL_DS); - - ret = vfs_read(file, (void __user *)buf, count, pos); - - set_fs(saved_fs); - - return (ret); -#endif -} - static int spl_getattr(struct file *filp, struct kstat *stat) { @@ -651,16 +631,8 @@ spl_getattr(struct file *filp, struct kstat *stat) ASSERT(filp); ASSERT(stat); -#if defined(HAVE_4ARGS_VFS_GETATTR) rc = vfs_getattr(&filp->f_path, stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT); -#elif defined(HAVE_2ARGS_VFS_GETATTR) - rc = vfs_getattr(&filp->f_path, stat); -#elif defined(HAVE_3ARGS_VFS_GETATTR) - rc = vfs_getattr(filp->f_path.mnt, filp->f_dentry, stat); -#else -#error "No available vfs_getattr()" -#endif if (rc) return (-rc); @@ -738,7 +710,7 @@ hostid_read(uint32_t *hostid) * Read directly into the variable like eglibc does. * Short reads are okay; native behavior is preserved. */ - error = spl_kernel_read(filp, &value, sizeof (value), &off); + error = kernel_read(filp, &value, sizeof (value), &off); if (error < 0) { filp_close(filp, 0); return (EIO); diff --git a/module/os/linux/spl/spl-kmem-cache.c b/module/os/linux/spl/spl-kmem-cache.c index 16412bc9e6cf..7e806bd5699c 100644 --- a/module/os/linux/spl/spl-kmem-cache.c +++ b/module/os/linux/spl/spl-kmem-cache.c @@ -23,7 +23,6 @@ #define SPL_KMEM_CACHE_IMPLEMENTING -#include #include #include #include @@ -728,8 +727,7 @@ spl_kmem_cache_create(const char *name, size_t size, size_t align, skc->skc_obj_emergency = 0; skc->skc_obj_emergency_max = 0; - rc = percpu_counter_init_common(&skc->skc_linux_alloc, 0, - GFP_KERNEL); + rc = percpu_counter_init(&skc->skc_linux_alloc, 0, GFP_KERNEL); if (rc != 0) { kfree(skc); return (NULL); @@ -788,25 +786,8 @@ spl_kmem_cache_create(const char *name, size_t size, size_t align, if (skc->skc_flags & KMC_RECLAIMABLE) slabflags |= SLAB_RECLAIM_ACCOUNT; -#if defined(SLAB_USERCOPY) - /* - * Required for PAX-enabled kernels if the slab is to be - * used for copying between user and kernel space. - */ - slabflags |= SLAB_USERCOPY; -#endif - -#if defined(HAVE_KMEM_CACHE_CREATE_USERCOPY) - /* - * Newer grsec patchset uses kmem_cache_create_usercopy() - * instead of SLAB_USERCOPY flag - */ skc->skc_linux_cache = kmem_cache_create_usercopy( skc->skc_name, size, align, slabflags, 0, size, NULL); -#else - skc->skc_linux_cache = kmem_cache_create( - skc->skc_name, size, align, slabflags, NULL); -#endif if (skc->skc_linux_cache == NULL) goto out; } @@ -1024,7 +1005,7 @@ spl_cache_grow(spl_kmem_cache_t *skc, int flags, void **obj) * then return so the local magazine can be rechecked for new objects. */ if (test_bit(KMC_BIT_REAPING, &skc->skc_flags)) { - rc = spl_wait_on_bit(&skc->skc_flags, KMC_BIT_REAPING, + rc = wait_on_bit(&skc->skc_flags, KMC_BIT_REAPING, TASK_UNINTERRUPTIBLE); return (rc ? rc : -EAGAIN); } diff --git a/module/os/linux/spl/spl-kmem.c b/module/os/linux/spl/spl-kmem.c index d85e2a7daa8d..cae304d33bc3 100644 --- a/module/os/linux/spl/spl-kmem.c +++ b/module/os/linux/spl/spl-kmem.c @@ -134,7 +134,6 @@ EXPORT_SYMBOL(kmem_strfree); void * spl_kvmalloc(size_t size, gfp_t lflags) { -#ifdef HAVE_KVMALLOC /* * GFP_KERNEL allocations can safely use kvmalloc which may * improve performance by avoiding a) high latency caused by @@ -146,7 +145,6 @@ spl_kvmalloc(size_t size, gfp_t lflags) */ if ((lflags & GFP_KERNEL) == GFP_KERNEL) return (kvmalloc(size, lflags)); -#endif gfp_t kmalloc_lflags = lflags; diff --git a/module/os/linux/spl/spl-shrinker.c b/module/os/linux/spl/spl-shrinker.c index d5c8da471cbb..ff1c196d09f6 100644 --- a/module/os/linux/spl/spl-shrinker.c +++ b/module/os/linux/spl/spl-shrinker.c @@ -26,25 +26,6 @@ #include #include -#ifdef HAVE_SINGLE_SHRINKER_CALLBACK -/* 3.0-3.11: single shrink() callback, which we wrap to carry both functions */ -struct spl_shrinker_wrap { - struct shrinker shrinker; - spl_shrinker_cb countfunc; - spl_shrinker_cb scanfunc; -}; - -static int -spl_shrinker_single_cb(struct shrinker *shrinker, struct shrink_control *sc) -{ - struct spl_shrinker_wrap *sw = (struct spl_shrinker_wrap *)shrinker; - - if (sc->nr_to_scan != 0) - (void) sw->scanfunc(&sw->shrinker, sc); - return (sw->countfunc(&sw->shrinker, sc)); -} -#endif - struct shrinker * spl_register_shrinker(const char *name, spl_shrinker_cb countfunc, spl_shrinker_cb scanfunc, int seek_cost) @@ -52,34 +33,20 @@ spl_register_shrinker(const char *name, spl_shrinker_cb countfunc, struct shrinker *shrinker; /* allocate shrinker */ -#if defined(HAVE_SHRINKER_REGISTER) +#ifdef HAVE_SHRINKER_REGISTER /* 6.7: kernel will allocate the shrinker for us */ shrinker = shrinker_alloc(0, name); -#elif defined(HAVE_SPLIT_SHRINKER_CALLBACK) - /* 3.12-6.6: we allocate the shrinker */ - shrinker = kmem_zalloc(sizeof (struct shrinker), KM_SLEEP); -#elif defined(HAVE_SINGLE_SHRINKER_CALLBACK) - /* 3.0-3.11: allocate a wrapper */ - struct spl_shrinker_wrap *sw = - kmem_zalloc(sizeof (struct spl_shrinker_wrap), KM_SLEEP); - shrinker = &sw->shrinker; #else - /* 2.x-2.6.22, or a newer shrinker API has been introduced. */ -#error "Unknown shrinker API" + /* 4.4-6.6: we allocate the shrinker */ + shrinker = kmem_zalloc(sizeof (struct shrinker), KM_SLEEP); #endif if (shrinker == NULL) return (NULL); /* set callbacks */ -#ifdef HAVE_SINGLE_SHRINKER_CALLBACK - sw->countfunc = countfunc; - sw->scanfunc = scanfunc; - shrinker->shrink = spl_shrinker_single_cb; -#else shrinker->count_objects = countfunc; shrinker->scan_objects = scanfunc; -#endif /* set params */ shrinker->seeks = seek_cost; @@ -102,14 +69,9 @@ spl_unregister_shrinker(struct shrinker *shrinker) { #if defined(HAVE_SHRINKER_REGISTER) shrinker_free(shrinker); -#elif defined(HAVE_SPLIT_SHRINKER_CALLBACK) +#else unregister_shrinker(shrinker); kmem_free(shrinker, sizeof (struct shrinker)); -#elif defined(HAVE_SINGLE_SHRINKER_CALLBACK) - unregister_shrinker(shrinker); - kmem_free(shrinker, sizeof (struct spl_shrinker_wrap)); -#else -#error "Unknown shrinker API" #endif } EXPORT_SYMBOL(spl_unregister_shrinker); diff --git a/module/os/linux/spl/spl-taskq.c b/module/os/linux/spl/spl-taskq.c index c16bc9bc6409..7f4cab5da114 100644 --- a/module/os/linux/spl/spl-taskq.c +++ b/module/os/linux/spl/spl-taskq.c @@ -35,9 +35,7 @@ #include #include #include -#ifdef HAVE_CPU_HOTPLUG #include -#endif typedef struct taskq_kstats { /* static values, for completeness */ @@ -156,10 +154,8 @@ EXPORT_SYMBOL(system_delay_taskq); static taskq_t *dynamic_taskq; static taskq_thread_t *taskq_thread_create(taskq_t *); -#ifdef HAVE_CPU_HOTPLUG /* Multi-callback id for cpu hotplugging. */ static int spl_taskq_cpuhp_state; -#endif /* List of all taskqs */ LIST_HEAD(tq_list); @@ -351,7 +347,7 @@ task_expire_impl(taskq_ent_t *t) } static void -task_expire(spl_timer_list_t tl) +task_expire(struct timer_list *tl) { struct timer_list *tmr = (struct timer_list *)tl; taskq_ent_t *t = from_timer(t, tmr, tqent_timer); @@ -1349,7 +1345,7 @@ taskq_create(const char *name, int threads_arg, pri_t pri, return (NULL); tq->tq_hp_support = B_FALSE; -#ifdef HAVE_CPU_HOTPLUG + if (flags & TASKQ_THREADS_CPU_PCT) { tq->tq_hp_support = B_TRUE; if (cpuhp_state_add_instance_nocalls(spl_taskq_cpuhp_state, @@ -1358,7 +1354,6 @@ taskq_create(const char *name, int threads_arg, pri_t pri, return (NULL); } } -#endif spin_lock_init(&tq->tq_lock); INIT_LIST_HEAD(&tq->tq_thread_list); @@ -1447,12 +1442,11 @@ taskq_destroy(taskq_t *tq) tq->tq_flags &= ~TASKQ_ACTIVE; spin_unlock_irqrestore(&tq->tq_lock, flags); -#ifdef HAVE_CPU_HOTPLUG if (tq->tq_hp_support) { VERIFY0(cpuhp_state_remove_instance_nocalls( spl_taskq_cpuhp_state, &tq->tq_hp_cb_node)); } -#endif + /* * When TASKQ_ACTIVE is clear new tasks may not be added nor may * new worker threads be spawned for dynamic taskq. @@ -1709,7 +1703,6 @@ module_param_call(spl_taskq_kick, param_set_taskq_kick, param_get_uint, MODULE_PARM_DESC(spl_taskq_kick, "Write nonzero to kick stuck taskqs to spawn more threads"); -#ifdef HAVE_CPU_HOTPLUG /* * This callback will be called exactly once for each core that comes online, * for each dynamic taskq. We attempt to expand taskqs that have @@ -1787,7 +1780,6 @@ spl_taskq_prepare_down(unsigned int cpu, struct hlist_node *node) spin_unlock_irqrestore(&tq->tq_lock, flags); return (0); } -#endif int spl_taskq_init(void) @@ -1795,10 +1787,8 @@ spl_taskq_init(void) init_rwsem(&tq_list_sem); tsd_create(&taskq_tsd, NULL); -#ifdef HAVE_CPU_HOTPLUG spl_taskq_cpuhp_state = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "fs/spl_taskq:online", spl_taskq_expand, spl_taskq_prepare_down); -#endif system_taskq = taskq_create("spl_system_taskq", MAX(boot_ncpus, 64), maxclsyspri, boot_ncpus, INT_MAX, TASKQ_PREPOPULATE|TASKQ_DYNAMIC); @@ -1808,9 +1798,7 @@ spl_taskq_init(void) system_delay_taskq = taskq_create("spl_delay_taskq", MAX(boot_ncpus, 4), maxclsyspri, boot_ncpus, INT_MAX, TASKQ_PREPOPULATE|TASKQ_DYNAMIC); if (system_delay_taskq == NULL) { -#ifdef HAVE_CPU_HOTPLUG cpuhp_remove_multi_state(spl_taskq_cpuhp_state); -#endif taskq_destroy(system_taskq); return (-ENOMEM); } @@ -1818,9 +1806,7 @@ spl_taskq_init(void) dynamic_taskq = taskq_create("spl_dynamic_taskq", 1, maxclsyspri, boot_ncpus, INT_MAX, TASKQ_PREPOPULATE); if (dynamic_taskq == NULL) { -#ifdef HAVE_CPU_HOTPLUG cpuhp_remove_multi_state(spl_taskq_cpuhp_state); -#endif taskq_destroy(system_taskq); taskq_destroy(system_delay_taskq); return (-ENOMEM); @@ -1854,8 +1840,6 @@ spl_taskq_fini(void) tsd_destroy(&taskq_tsd); -#ifdef HAVE_CPU_HOTPLUG cpuhp_remove_multi_state(spl_taskq_cpuhp_state); spl_taskq_cpuhp_state = 0; -#endif } diff --git a/module/os/linux/spl/spl-thread.c b/module/os/linux/spl/spl-thread.c index 2af766ac2049..80acd0201b3b 100644 --- a/module/os/linux/spl/spl-thread.c +++ b/module/os/linux/spl/spl-thread.c @@ -175,17 +175,9 @@ issig(void) #else if (dequeue_signal(task, &set, &__info) != 0) { #endif -#ifdef HAVE_SIGNAL_STOP spin_unlock_irq(&task->sighand->siglock); kernel_signal_stop(); -#else - if (current->jobctl & JOBCTL_STOP_DEQUEUED) - spl_set_special_state(TASK_STOPPED); - - spin_unlock_irq(¤t->sighand->siglock); - schedule(); -#endif /* * Dequeued SIGSTOP/SIGTSTP. * Check if process has other singal pending. diff --git a/module/os/linux/spl/spl-vmem.c b/module/os/linux/spl/spl-vmem.c index cab3e9549cfe..7e2402477705 100644 --- a/module/os/linux/spl/spl-vmem.c +++ b/module/os/linux/spl/spl-vmem.c @@ -21,7 +21,6 @@ * with the SPL. If not, see . */ -#include #include #include #include diff --git a/module/os/linux/spl/spl-zlib.c b/module/os/linux/spl/spl-zlib.c index a7b6c14ee150..68078ac32bc5 100644 --- a/module/os/linux/spl/spl-zlib.c +++ b/module/os/linux/spl/spl-zlib.c @@ -53,7 +53,6 @@ */ -#include #include #include #include diff --git a/module/os/linux/spl/spl-zone.c b/module/os/linux/spl/spl-zone.c index d0d0cca154a7..58b5e0dc44b7 100644 --- a/module/os/linux/spl/spl-zone.c +++ b/module/os/linux/spl/spl-zone.c @@ -54,7 +54,7 @@ typedef struct zone_dataset { char zd_dsname[]; /* name of the member dataset */ } zone_dataset_t; -#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) +#ifdef CONFIG_USER_NS /* * Returns: * - 0 on success @@ -95,18 +95,14 @@ user_ns_get(int fd, struct user_namespace **userns) return (error); } -#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */ +#endif /* CONFIG_USER_NS */ static unsigned int user_ns_zoneid(struct user_namespace *user_ns) { unsigned int r; -#if defined(HAVE_USER_NS_COMMON_INUM) r = user_ns->ns.inum; -#else - r = user_ns->proc_inum; -#endif return (r); } @@ -123,7 +119,7 @@ zone_datasets_lookup(unsigned int nsinum) return (NULL); } -#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) +#ifdef CONFIG_USER_NS static struct zone_dataset * zone_dataset_lookup(zone_datasets_t *zds, const char *dataset, size_t dsnamelen) { @@ -148,7 +144,7 @@ zone_dataset_cred_check(cred_t *cred) return (0); } -#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */ +#endif /* CONFIG_USER_NS */ static int zone_dataset_name_check(const char *dataset, size_t *dsnamelen) @@ -168,7 +164,7 @@ zone_dataset_name_check(const char *dataset, size_t *dsnamelen) int zone_dataset_attach(cred_t *cred, const char *dataset, int userns_fd) { -#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) +#ifdef CONFIG_USER_NS struct user_namespace *userns; zone_datasets_t *zds; zone_dataset_t *zd; @@ -213,14 +209,14 @@ zone_dataset_attach(cred_t *cred, const char *dataset, int userns_fd) return (0); #else return (ENXIO); -#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */ +#endif /* CONFIG_USER_NS */ } EXPORT_SYMBOL(zone_dataset_attach); int zone_dataset_detach(cred_t *cred, const char *dataset, int userns_fd) { -#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) +#ifdef CONFIG_USER_NS struct user_namespace *userns; zone_datasets_t *zds; zone_dataset_t *zd; @@ -262,7 +258,7 @@ zone_dataset_detach(cred_t *cred, const char *dataset, int userns_fd) return (0); #else return (ENXIO); -#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */ +#endif /* CONFIG_USER_NS */ } EXPORT_SYMBOL(zone_dataset_detach); diff --git a/module/os/linux/zfs/abd_os.c b/module/os/linux/zfs/abd_os.c index 60287ccdda98..dae4107e032c 100644 --- a/module/os/linux/zfs/abd_os.c +++ b/module/os/linux/zfs/abd_os.c @@ -186,6 +186,7 @@ static int zfs_abd_scatter_min_size = 512 * 3; abd_t *abd_zero_scatter = NULL; struct page; + /* * abd_zero_page is assigned to each of the pages of abd_zero_scatter. It will * point to ZERO_PAGE if it is available or it will be an allocated zero'd @@ -453,14 +454,21 @@ abd_free_chunks(abd_t *abd) if (abd->abd_flags & ABD_FLAG_MULTI_CHUNK) ABDSTAT_BUMPDOWN(abdstat_scatter_page_multi_chunk); - abd_for_each_sg(abd, sg, nr_pages, i) { - page = sg_page(sg); - abd_unmark_zfs_page(page); - order = compound_order(page); - __free_pages(page, order); - ASSERT3U(sg->length, <=, PAGE_SIZE << order); - ABDSTAT_BUMPDOWN(abdstat_scatter_orders[order]); + /* + * Scatter ABDs may be constructed by abd_alloc_from_pages() from + * an array of pages. In which case they should not be freed. + */ + if (!abd_is_from_pages(abd)) { + abd_for_each_sg(abd, sg, nr_pages, i) { + page = sg_page(sg); + abd_unmark_zfs_page(page); + order = compound_order(page); + __free_pages(page, order); + ASSERT3U(sg->length, <=, PAGE_SIZE << order); + ABDSTAT_BUMPDOWN(abdstat_scatter_orders[order]); + } } + abd_free_sg_table(abd); } @@ -551,17 +559,19 @@ abd_update_linear_stats(abd_t *abd, abd_stats_op_t op) void abd_verify_scatter(abd_t *abd) { - size_t n; - int i = 0; - struct scatterlist *sg = NULL; - ASSERT3U(ABD_SCATTER(abd).abd_nents, >, 0); ASSERT3U(ABD_SCATTER(abd).abd_offset, <, ABD_SCATTER(abd).abd_sgl->length); - n = ABD_SCATTER(abd).abd_nents; + +#ifdef ZFS_DEBUG + struct scatterlist *sg = NULL; + size_t n = ABD_SCATTER(abd).abd_nents; + int i = 0; + abd_for_each_sg(abd, sg, n, i) { ASSERT3P(sg_page(sg), !=, NULL); } +#endif } static void @@ -687,14 +697,77 @@ abd_free_linear_page(abd_t *abd) { /* Transform it back into a scatter ABD for freeing */ struct scatterlist *sg = abd->abd_u.abd_linear.abd_sgl; + + /* When backed by user page unmap it */ + if (abd_is_from_pages(abd)) + zfs_kunmap(sg_page(sg)); + abd->abd_flags &= ~ABD_FLAG_LINEAR; abd->abd_flags &= ~ABD_FLAG_LINEAR_PAGE; ABD_SCATTER(abd).abd_nents = 1; ABD_SCATTER(abd).abd_offset = 0; ABD_SCATTER(abd).abd_sgl = sg; abd_free_chunks(abd); +} + +/* + * Allocate a scatter ABD structure from user pages. The pages must be + * pinned with get_user_pages, or similiar, but need not be mapped via + * the kmap interfaces. + */ +abd_t * +abd_alloc_from_pages(struct page **pages, unsigned long offset, uint64_t size) +{ + uint_t npages = DIV_ROUND_UP(size, PAGE_SIZE); + struct sg_table table; + + VERIFY3U(size, <=, DMU_MAX_ACCESS); + ASSERT3U(offset, <, PAGE_SIZE); + ASSERT3P(pages, !=, NULL); + + /* + * Even if this buf is filesystem metadata, we only track that we + * own the underlying data buffer, which is not true in this case. + * Therefore, we don't ever use ABD_FLAG_META here. + */ + abd_t *abd = abd_alloc_struct(0); + abd->abd_flags |= ABD_FLAG_FROM_PAGES | ABD_FLAG_OWNER; + abd->abd_size = size; + + while (sg_alloc_table_from_pages(&table, pages, npages, offset, + size, __GFP_NOWARN | GFP_NOIO) != 0) { + ABDSTAT_BUMP(abdstat_scatter_sg_table_retry); + schedule_timeout_interruptible(1); + } + + if ((offset + size) <= PAGE_SIZE) { + /* + * Since there is only one entry, this ABD can be represented + * as a linear buffer. All single-page (4K) ABD's constructed + * from a user page can be represented this way as long as the + * page is mapped to a virtual address. This allows us to + * apply an offset in to the mapped page. + * + * Note that kmap() must be used, not kmap_atomic(), because + * the mapping needs to bet set up on all CPUs. Using kmap() + * also enables the user of highmem pages when required. + */ + abd->abd_flags |= ABD_FLAG_LINEAR | ABD_FLAG_LINEAR_PAGE; + abd->abd_u.abd_linear.abd_sgl = table.sgl; + zfs_kmap(sg_page(table.sgl)); + ABD_LINEAR_BUF(abd) = sg_virt(table.sgl); + } else { + ABDSTAT_BUMP(abdstat_scatter_page_multi_chunk); + abd->abd_flags |= ABD_FLAG_MULTI_CHUNK; + + ABD_SCATTER(abd).abd_offset = offset; + ABD_SCATTER(abd).abd_sgl = table.sgl; + ABD_SCATTER(abd).abd_nents = table.nents; + + ASSERT0(ABD_SCATTER(abd).abd_offset); + } - abd_update_scatter_stats(abd, ABDSTAT_DECR); + return (abd); } /* @@ -746,6 +819,9 @@ abd_get_offset_scatter(abd_t *abd, abd_t *sabd, size_t off, ABD_SCATTER(abd).abd_offset = new_offset; ABD_SCATTER(abd).abd_nents = ABD_SCATTER(sabd).abd_nents - i; + if (abd_is_from_pages(sabd)) + abd->abd_flags |= ABD_FLAG_FROM_PAGES; + return (abd); } @@ -873,6 +949,115 @@ abd_cache_reap_now(void) { } +/* + * Borrow a raw buffer from an ABD without copying the contents of the ABD + * into the buffer. If the ABD is scattered, this will allocate a raw buffer + * whose contents are undefined. To copy over the existing data in the ABD, use + * abd_borrow_buf_copy() instead. + */ +void * +abd_borrow_buf(abd_t *abd, size_t n) +{ + void *buf; + abd_verify(abd); + ASSERT3U(abd->abd_size, >=, 0); + /* + * In the event the ABD is composed of a single user page from Direct + * I/O we can not direclty return the raw buffer. This is a consequence + * of not being able to write protect the page and the contents of the + * page can be changed at any time by the user. + */ + if (abd_is_from_pages(abd)) { + buf = zio_buf_alloc(n); + } else if (abd_is_linear(abd)) { + buf = abd_to_buf(abd); + } else { + buf = zio_buf_alloc(n); + } + +#ifdef ZFS_DEBUG + (void) zfs_refcount_add_many(&abd->abd_children, n, buf); +#endif + return (buf); +} + +void * +abd_borrow_buf_copy(abd_t *abd, size_t n) +{ + void *buf = abd_borrow_buf(abd, n); + + /* + * In the event the ABD is composed of a single user page from Direct + * I/O we must make sure copy the data over into the newly allocated + * buffer. This is a consequence of the fact that we can not write + * protect the user page and there is a risk the contents of the page + * could be changed by the user at any moment. + */ + if (!abd_is_linear(abd) || abd_is_from_pages(abd)) { + abd_copy_to_buf(buf, abd, n); + } + return (buf); +} + +/* + * Return a borrowed raw buffer to an ABD. If the ABD is scatterd, this will + * not change the contents of the ABD. If you want any changes you made to + * buf to be copied back to abd, use abd_return_buf_copy() instead. If the + * ABD is not constructed from user pages for Direct I/O then an ASSERT + * checks to make sure the contents of buffer have not changed since it was + * borrowed. We can not ASSERT that the contents of the buffer have not changed + * if it is composed of user pages because the pages can not be placed under + * write protection and the user could have possibly changed the contents in + * the pages at any time. + */ +void +abd_return_buf(abd_t *abd, void *buf, size_t n) +{ + abd_verify(abd); + ASSERT3U(abd->abd_size, >=, n); +#ifdef ZFS_DEBUG + (void) zfs_refcount_remove_many(&abd->abd_children, n, buf); +#endif + if (abd_is_from_pages(abd)) { + zio_buf_free(buf, n); + } else if (abd_is_linear(abd)) { + ASSERT3P(buf, ==, abd_to_buf(abd)); + } else if (abd_is_gang(abd)) { +#ifdef ZFS_DEBUG + /* + * We have to be careful with gang ABD's that we do not ASSERT0 + * for any ABD's that contain user pages from Direct I/O. In + * order to handle this, we just iterate through the gang ABD + * and only verify ABDs that are not from user pages. + */ + void *cmp_buf = buf; + + for (abd_t *cabd = list_head(&ABD_GANG(abd).abd_gang_chain); + cabd != NULL; + cabd = list_next(&ABD_GANG(abd).abd_gang_chain, cabd)) { + if (!abd_is_from_pages(cabd)) { + ASSERT0(abd_cmp_buf(cabd, cmp_buf, + cabd->abd_size)); + } + cmp_buf = (char *)cmp_buf + cabd->abd_size; + } +#endif + zio_buf_free(buf, n); + } else { + ASSERT0(abd_cmp_buf(abd, buf, n)); + zio_buf_free(buf, n); + } +} + +void +abd_return_buf_copy(abd_t *abd, void *buf, size_t n) +{ + if (!abd_is_linear(abd) || abd_is_from_pages(abd)) { + abd_copy_from_buf(abd, buf, n); + } + abd_return_buf(abd, buf, n); +} + /* * This is abd_iter_page(), the function underneath abd_iterate_page_func(). * It yields the next page struct and data offset and size within it, without diff --git a/module/os/linux/zfs/policy.c b/module/os/linux/zfs/policy.c index 5d1b4383412a..d21bc667ba69 100644 --- a/module/os/linux/zfs/policy.c +++ b/module/os/linux/zfs/policy.c @@ -252,22 +252,13 @@ secpolicy_zfs(const cred_t *cr) * Equivalent to secpolicy_zfs(), but works even if the cred_t is not that of * the current process. Takes both cred_t and proc_t so that this can work * easily on all platforms. - * - * The has_capability() function was first exported in the 4.10 Linux kernel - * then backported to some LTS kernels. Prior to this change there was no - * mechanism to perform this check therefore EACCES is returned when the - * functionality is not present in the kernel. */ int secpolicy_zfs_proc(const cred_t *cr, proc_t *proc) { -#if defined(HAVE_HAS_CAPABILITY) if (!has_capability(proc, CAP_SYS_ADMIN)) return (EACCES); return (0); -#else - return (EACCES); -#endif } void diff --git a/module/os/linux/zfs/vdev_disk.c b/module/os/linux/zfs/vdev_disk.c index e69c5f3841ec..a6271d3a7df1 100644 --- a/module/os/linux/zfs/vdev_disk.c +++ b/module/os/linux/zfs/vdev_disk.c @@ -38,9 +38,7 @@ #include #include #include -#ifdef HAVE_LINUX_BLK_CGROUP_HEADER #include -#endif /* * Linux 6.8.x uses a bdev_handle as an instance/refcount for an underlying @@ -481,16 +479,6 @@ vdev_disk_close(vdev_t *v) v->vdev_tsd = NULL; } -static inline void -vdev_submit_bio_impl(struct bio *bio) -{ -#ifdef HAVE_1ARG_SUBMIT_BIO - (void) submit_bio(bio); -#else - (void) submit_bio(bio_data_dir(bio), bio); -#endif -} - /* * preempt_schedule_notrace is GPL-only which breaks the ZFS build, so * replace it with preempt_schedule under the following condition: @@ -508,7 +496,6 @@ vdev_submit_bio_impl(struct bio *bio) */ #if !defined(HAVE_BIO_ALLOC_4ARG) -#ifdef HAVE_BIO_SET_DEV #if defined(CONFIG_BLK_CGROUP) && defined(HAVE_BIO_SET_DEV_GPL_ONLY) /* * The Linux 5.5 kernel updated percpu_ref_tryget() which is inlined by @@ -594,16 +581,6 @@ vdev_bio_set_dev(struct bio *bio, struct block_device *bdev) #define bio_set_dev vdev_bio_set_dev #endif #endif -#else -/* - * Provide a bio_set_dev() helper macro for pre-Linux 4.14 kernels. - */ -static inline void -bio_set_dev(struct bio *bio, struct block_device *bdev) -{ - bio->bi_bdev = bdev; -} -#endif /* HAVE_BIO_SET_DEV */ #endif /* !HAVE_BIO_ALLOC_4ARG */ static inline void @@ -611,7 +588,7 @@ vdev_submit_bio(struct bio *bio) { struct bio_list *bio_list = current->bio_list; current->bio_list = NULL; - vdev_submit_bio_impl(bio); + (void) submit_bio(bio); current->bio_list = bio_list; } @@ -709,7 +686,7 @@ vbio_alloc(zio_t *zio, struct block_device *bdev, int flags) return (vbio); } -BIO_END_IO_PROTO(vbio_completion, bio, error); +static void vbio_completion(struct bio *bio); static int vbio_add_page(vbio_t *vbio, struct page *page, uint_t size, uint_t offset) @@ -805,7 +782,8 @@ vbio_submit(vbio_t *vbio, abd_t *abd, uint64_t size) } /* IO completion callback */ -BIO_END_IO_PROTO(vbio_completion, bio, error) +static void +vbio_completion(struct bio *bio) { vbio_t *vbio = bio->bi_private; zio_t *zio = vbio->vbio_zio; @@ -813,15 +791,7 @@ BIO_END_IO_PROTO(vbio_completion, bio, error) ASSERT(zio); /* Capture and log any errors */ -#ifdef HAVE_1ARG_BIO_END_IO_T - zio->io_error = BIO_END_IO_ERROR(bio); -#else - zio->io_error = 0; - if (error) - zio->io_error = -(error); - else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) - zio->io_error = EIO; -#endif + zio->io_error = bi_status_to_errno(bio->bi_status); ASSERT3U(zio->io_error, >=, 0); if (zio->io_error) @@ -1072,19 +1042,13 @@ vdev_classic_dio_put(dio_request_t *dr) } } -BIO_END_IO_PROTO(vdev_classic_physio_completion, bio, error) +static void +vdev_classic_physio_completion(struct bio *bio) { dio_request_t *dr = bio->bi_private; if (dr->dr_error == 0) { -#ifdef HAVE_1ARG_BIO_END_IO_T - dr->dr_error = BIO_END_IO_ERROR(bio); -#else - if (error) - dr->dr_error = -(error); - else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) - dr->dr_error = EIO; -#endif + dr->dr_error = bi_status_to_errno(bio->bi_status); } /* Drop reference acquired by vdev_classic_physio */ @@ -1223,14 +1187,11 @@ vdev_classic_physio(zio_t *zio) /* ========== */ -BIO_END_IO_PROTO(vdev_disk_io_flush_completion, bio, error) +static void +vdev_disk_io_flush_completion(struct bio *bio) { zio_t *zio = bio->bi_private; -#ifdef HAVE_1ARG_BIO_END_IO_T - zio->io_error = BIO_END_IO_ERROR(bio); -#else - zio->io_error = -error; -#endif + zio->io_error = bi_status_to_errno(bio->bi_status); if (zio->io_error && (zio->io_error == EOPNOTSUPP)) zio->io_vd->vdev_nowritecache = B_TRUE; @@ -1265,14 +1226,12 @@ vdev_disk_io_flush(struct block_device *bdev, zio_t *zio) return (0); } -BIO_END_IO_PROTO(vdev_disk_discard_end_io, bio, error) +static void +vdev_disk_discard_end_io(struct bio *bio) { zio_t *zio = bio->bi_private; -#ifdef HAVE_1ARG_BIO_END_IO_T - zio->io_error = BIO_END_IO_ERROR(bio); -#else - zio->io_error = -error; -#endif + zio->io_error = bi_status_to_errno(bio->bi_status); + bio_put(bio); if (zio->io_error) vdev_disk_error(zio); diff --git a/module/os/linux/zfs/zfs_ctldir.c b/module/os/linux/zfs/zfs_ctldir.c index e042116333fb..a8b25b2bd8a5 100644 --- a/module/os/linux/zfs/zfs_ctldir.c +++ b/module/os/linux/zfs/zfs_ctldir.c @@ -500,9 +500,6 @@ zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id, zp->z_atime_dirty = B_FALSE; zp->z_zn_prefetch = B_FALSE; zp->z_is_sa = B_FALSE; -#if !defined(HAVE_FILEMAP_RANGE_HAS_PAGE) - zp->z_is_mapped = B_FALSE; -#endif zp->z_is_ctldir = B_TRUE; zp->z_sa_hdl = NULL; zp->z_blksz = 0; diff --git a/module/os/linux/zfs/zfs_file_os.c b/module/os/linux/zfs/zfs_file_os.c index bc753614be27..1b52dbe4f365 100644 --- a/module/os/linux/zfs/zfs_file_os.c +++ b/module/os/linux/zfs/zfs_file_os.c @@ -69,26 +69,6 @@ zfs_file_close(zfs_file_t *fp) filp_close(fp, 0); } -static ssize_t -zfs_file_write_impl(zfs_file_t *fp, const void *buf, size_t count, loff_t *off) -{ -#if defined(HAVE_KERNEL_WRITE_PPOS) - return (kernel_write(fp, buf, count, off)); -#else - mm_segment_t saved_fs; - ssize_t rc; - - saved_fs = get_fs(); - set_fs(KERNEL_DS); - - rc = vfs_write(fp, (__force const char __user __user *)buf, count, off); - - set_fs(saved_fs); - - return (rc); -#endif -} - /* * Stateful write - use os internal file pointer to determine where to * write and update on successful completion. @@ -106,7 +86,7 @@ zfs_file_write(zfs_file_t *fp, const void *buf, size_t count, ssize_t *resid) loff_t off = fp->f_pos; ssize_t rc; - rc = zfs_file_write_impl(fp, buf, count, &off); + rc = kernel_write(fp, buf, count, &off); if (rc < 0) return (-rc); @@ -138,7 +118,7 @@ zfs_file_pwrite(zfs_file_t *fp, const void *buf, size_t count, loff_t off, { ssize_t rc; - rc = zfs_file_write_impl(fp, buf, count, &off); + rc = kernel_write(fp, buf, count, &off); if (rc < 0) return (-rc); @@ -151,25 +131,6 @@ zfs_file_pwrite(zfs_file_t *fp, const void *buf, size_t count, loff_t off, return (0); } -static ssize_t -zfs_file_read_impl(zfs_file_t *fp, void *buf, size_t count, loff_t *off) -{ -#if defined(HAVE_KERNEL_READ_PPOS) - return (kernel_read(fp, buf, count, off)); -#else - mm_segment_t saved_fs; - ssize_t rc; - - saved_fs = get_fs(); - set_fs(KERNEL_DS); - - rc = vfs_read(fp, (void __user *)buf, count, off); - set_fs(saved_fs); - - return (rc); -#endif -} - /* * Stateful read - use os internal file pointer to determine where to * read and update on successful completion. @@ -187,7 +148,7 @@ zfs_file_read(zfs_file_t *fp, void *buf, size_t count, ssize_t *resid) loff_t off = fp->f_pos; ssize_t rc; - rc = zfs_file_read_impl(fp, buf, count, &off); + rc = kernel_read(fp, buf, count, &off); if (rc < 0) return (-rc); @@ -219,7 +180,7 @@ zfs_file_pread(zfs_file_t *fp, void *buf, size_t count, loff_t off, { ssize_t rc; - rc = zfs_file_read_impl(fp, buf, count, &off); + rc = kernel_read(fp, buf, count, &off); if (rc < 0) return (-rc); @@ -274,16 +235,8 @@ zfs_file_getattr(zfs_file_t *filp, zfs_file_attr_t *zfattr) struct kstat stat; int rc; -#if defined(HAVE_4ARGS_VFS_GETATTR) rc = vfs_getattr(&filp->f_path, &stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT); -#elif defined(HAVE_2ARGS_VFS_GETATTR) - rc = vfs_getattr(&filp->f_path, &stat); -#elif defined(HAVE_3ARGS_VFS_GETATTR) - rc = vfs_getattr(filp->f_path.mnt, filp->f_dentry, &stat); -#else -#error "No available vfs_getattr()" -#endif if (rc) return (-rc); diff --git a/module/os/linux/zfs/zfs_racct.c b/module/os/linux/zfs/zfs_racct.c index ce623ef9d185..ce197caa45f0 100644 --- a/module/os/linux/zfs/zfs_racct.c +++ b/module/os/linux/zfs/zfs_racct.c @@ -25,14 +25,35 @@ #include +#ifdef _KERNEL +#include + +void +zfs_racct_read(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags) +{ + task_io_account_read(size); + spa_iostats_read_add(spa, size, iops, flags); +} + void -zfs_racct_read(uint64_t size, uint64_t iops) +zfs_racct_write(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags) { - (void) size, (void) iops; + task_io_account_write(size); + spa_iostats_write_add(spa, size, iops, flags); } +#else + void -zfs_racct_write(uint64_t size, uint64_t iops) +zfs_racct_read(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags) { - (void) size, (void) iops; + (void) spa, (void) size, (void) iops, (void) flags; } + +void +zfs_racct_write(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags) +{ + (void) spa, (void) size, (void) iops, (void) flags; +} + +#endif /* _KERNEL */ diff --git a/module/os/linux/zfs/zfs_uio.c b/module/os/linux/zfs/zfs_uio.c index a99a1ba88256..8fac61fd3178 100644 --- a/module/os/linux/zfs/zfs_uio.c +++ b/module/os/linux/zfs/zfs_uio.c @@ -41,12 +41,19 @@ #ifdef _KERNEL +#include +#include +#include #include #include #include #include +#include +#include #include #include +#include +#include /* * Move "n" bytes at byte address "p"; "rw" indicates the direction @@ -161,7 +168,6 @@ zfs_uiomove_bvec_impl(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio) return (0); } -#ifdef HAVE_BLK_MQ static void zfs_copy_bvec(void *p, size_t skip, size_t cnt, zfs_uio_rw_t rw, struct bio_vec *bv) @@ -253,17 +259,12 @@ zfs_uiomove_bvec_rq(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio) } return (0); } -#endif static int zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio) { -#ifdef HAVE_BLK_MQ if (uio->rq != NULL) return (zfs_uiomove_bvec_rq(p, n, rw, uio)); -#else - ASSERT3P(uio->rq, ==, NULL); -#endif return (zfs_uiomove_bvec_impl(p, n, rw, uio)); } @@ -327,8 +328,13 @@ EXPORT_SYMBOL(zfs_uiomove); int zfs_uio_prefaultpages(ssize_t n, zfs_uio_t *uio) { - if (uio->uio_segflg == UIO_SYSSPACE || uio->uio_segflg == UIO_BVEC) { - /* There's never a need to fault in kernel pages */ + if (uio->uio_segflg == UIO_SYSSPACE || uio->uio_segflg == UIO_BVEC || + (uio->uio_extflg & UIO_DIRECT)) { + /* + * There's never a need to fault in kernel pages or Direct I/O + * write pages. Direct I/O write pages have been pinned in so + * there is never a time for these pages a fault will occur. + */ return (0); #if defined(HAVE_VFS_IOV_ITER) } else if (uio->uio_segflg == UIO_ITER) { @@ -437,9 +443,288 @@ zfs_uioskip(zfs_uio_t *uio, size_t n) uio->uio_iovcnt--; } } + uio->uio_loffset += n; uio->uio_resid -= n; } EXPORT_SYMBOL(zfs_uioskip); +/* + * Check if the uio is page-aligned in memory. + */ +boolean_t +zfs_uio_page_aligned(zfs_uio_t *uio) +{ + boolean_t aligned = B_TRUE; + + if (uio->uio_segflg == UIO_USERSPACE || + uio->uio_segflg == UIO_SYSSPACE) { + const struct iovec *iov = uio->uio_iov; + size_t skip = uio->uio_skip; + + for (int i = uio->uio_iovcnt; i > 0; iov++, i--) { + uintptr_t addr = (uintptr_t)(iov->iov_base + skip); + size_t size = iov->iov_len - skip; + if ((addr & (PAGE_SIZE - 1)) || + (size & (PAGE_SIZE - 1))) { + aligned = B_FALSE; + break; + } + skip = 0; + } +#if defined(HAVE_VFS_IOV_ITER) + } else if (uio->uio_segflg == UIO_ITER) { + unsigned long alignment = + iov_iter_alignment(uio->uio_iter); + aligned = IS_P2ALIGNED(alignment, PAGE_SIZE); +#endif + } else { + /* Currently not supported */ + aligned = B_FALSE; + } + + return (aligned); +} + + +#if defined(HAVE_ZERO_PAGE_GPL_ONLY) || !defined(_LP64) +#define ZFS_MARKEED_PAGE 0x0 +#define IS_ZFS_MARKED_PAGE(_p) 0 +#define zfs_mark_page(_p) +#define zfs_unmark_page(_p) +#define IS_ZERO_PAGE(_p) 0 + +#else +/* + * Mark pages to know if they were allocated to replace ZERO_PAGE() for + * Direct I/O writes. + */ +#define ZFS_MARKED_PAGE 0x5a465350414745 /* ASCII: ZFSPAGE */ +#define IS_ZFS_MARKED_PAGE(_p) \ + (page_private(_p) == (unsigned long)ZFS_MARKED_PAGE) +#define IS_ZERO_PAGE(_p) ((_p) == ZERO_PAGE(0)) + +static inline void +zfs_mark_page(struct page *page) +{ + ASSERT3P(page, !=, NULL); + get_page(page); + SetPagePrivate(page); + set_page_private(page, ZFS_MARKED_PAGE); +} + +static inline void +zfs_unmark_page(struct page *page) +{ + ASSERT3P(page, !=, NULL); + set_page_private(page, 0UL); + ClearPagePrivate(page); + put_page(page); +} +#endif /* HAVE_ZERO_PAGE_GPL_ONLY || !_LP64 */ + +static void +zfs_uio_dio_check_for_zero_page(zfs_uio_t *uio) +{ + ASSERT3P(uio->uio_dio.pages, !=, NULL); + + for (long i = 0; i < uio->uio_dio.npages; i++) { + struct page *p = uio->uio_dio.pages[i]; + lock_page(p); + + if (IS_ZERO_PAGE(p)) { + /* + * If the user page points the kernels ZERO_PAGE() a + * new zero filled page will just be allocated so the + * contents of the page can not be changed by the user + * while a Direct I/O write is taking place. + */ + gfp_t gfp_zero_page = __GFP_NOWARN | GFP_NOIO | + __GFP_ZERO | GFP_KERNEL; + + ASSERT0(IS_ZFS_MARKED_PAGE(p)); + unlock_page(p); + put_page(p); + + p = __page_cache_alloc(gfp_zero_page); + zfs_mark_page(p); + } else { + unlock_page(p); + } + } +} + +void +zfs_uio_free_dio_pages(zfs_uio_t *uio, zfs_uio_rw_t rw) +{ + + ASSERT(uio->uio_extflg & UIO_DIRECT); + ASSERT3P(uio->uio_dio.pages, !=, NULL); + + for (long i = 0; i < uio->uio_dio.npages; i++) { + struct page *p = uio->uio_dio.pages[i]; + + if (IS_ZFS_MARKED_PAGE(p)) { + zfs_unmark_page(p); + __free_page(p); + continue; + } + + put_page(p); + } + + vmem_free(uio->uio_dio.pages, + uio->uio_dio.npages * sizeof (struct page *)); +} + +/* + * zfs_uio_iov_step() is just a modified version of the STEP function of Linux's + * iov_iter_get_pages(). + */ +static int +zfs_uio_iov_step(struct iovec v, zfs_uio_rw_t rw, zfs_uio_t *uio, + long *numpages) +{ + unsigned long addr = (unsigned long)(v.iov_base); + size_t len = v.iov_len; + unsigned long n = DIV_ROUND_UP(len, PAGE_SIZE); + + long res = zfs_get_user_pages( + P2ALIGN_TYPED(addr, PAGE_SIZE, unsigned long), n, rw == UIO_READ, + &uio->uio_dio.pages[uio->uio_dio.npages]); + if (res < 0) { + return (SET_ERROR(-res)); + } else if (len != (res * PAGE_SIZE)) { + return (SET_ERROR(EFAULT)); + } + + ASSERT3S(len, ==, res * PAGE_SIZE); + *numpages = res; + return (0); +} + +static int +zfs_uio_get_dio_pages_iov(zfs_uio_t *uio, zfs_uio_rw_t rw) +{ + const struct iovec *iovp = uio->uio_iov; + size_t skip = uio->uio_skip; + size_t len = uio->uio_resid - skip; + + ASSERT(uio->uio_segflg != UIO_SYSSPACE); + + for (int i = 0; i < uio->uio_iovcnt; i++) { + struct iovec iov; + long numpages = 0; + + if (iovp->iov_len == 0) { + iovp++; + skip = 0; + continue; + } + iov.iov_len = MIN(len, iovp->iov_len - skip); + iov.iov_base = iovp->iov_base + skip; + int error = zfs_uio_iov_step(iov, rw, uio, &numpages); + + if (error) + return (error); + + uio->uio_dio.npages += numpages; + len -= iov.iov_len; + skip = 0; + iovp++; + } + + ASSERT0(len); + + return (0); +} + +#if defined(HAVE_VFS_IOV_ITER) +static int +zfs_uio_get_dio_pages_iov_iter(zfs_uio_t *uio, zfs_uio_rw_t rw) +{ + size_t skip = uio->uio_skip; + size_t wanted = uio->uio_resid - uio->uio_skip; + ssize_t rollback = 0; + ssize_t cnt; + unsigned maxpages = DIV_ROUND_UP(wanted, PAGE_SIZE); + + while (wanted) { +#if defined(HAVE_IOV_ITER_GET_PAGES2) + cnt = iov_iter_get_pages2(uio->uio_iter, + &uio->uio_dio.pages[uio->uio_dio.npages], + wanted, maxpages, &skip); +#else + cnt = iov_iter_get_pages(uio->uio_iter, + &uio->uio_dio.pages[uio->uio_dio.npages], + wanted, maxpages, &skip); +#endif + if (cnt < 0) { + iov_iter_revert(uio->uio_iter, rollback); + return (SET_ERROR(-cnt)); + } + uio->uio_dio.npages += DIV_ROUND_UP(cnt, PAGE_SIZE); + rollback += cnt; + wanted -= cnt; + skip = 0; +#if !defined(HAVE_IOV_ITER_GET_PAGES2) + /* + * iov_iter_get_pages2() advances the iov_iter on success. + */ + iov_iter_advance(uio->uio_iter, cnt); +#endif + + } + ASSERT3U(rollback, ==, uio->uio_resid - uio->uio_skip); + iov_iter_revert(uio->uio_iter, rollback); + + return (0); +} +#endif /* HAVE_VFS_IOV_ITER */ + +/* + * This function pins user pages. In the event that the user pages were not + * successfully pinned an error value is returned. + * + * On success, 0 is returned. + */ +int +zfs_uio_get_dio_pages_alloc(zfs_uio_t *uio, zfs_uio_rw_t rw) +{ + int error = 0; + long npages = DIV_ROUND_UP(uio->uio_resid, PAGE_SIZE); + size_t size = npages * sizeof (struct page *); + + if (uio->uio_segflg == UIO_USERSPACE) { + uio->uio_dio.pages = vmem_alloc(size, KM_SLEEP); + error = zfs_uio_get_dio_pages_iov(uio, rw); +#if defined(HAVE_VFS_IOV_ITER) + } else if (uio->uio_segflg == UIO_ITER) { + uio->uio_dio.pages = vmem_alloc(size, KM_SLEEP); + error = zfs_uio_get_dio_pages_iov_iter(uio, rw); +#endif + } else { + return (SET_ERROR(EOPNOTSUPP)); + } + + ASSERT3S(uio->uio_dio.npages, >=, 0); + + if (error) { + for (long i = 0; i < uio->uio_dio.npages; i++) + put_page(uio->uio_dio.pages[i]); + vmem_free(uio->uio_dio.pages, size); + return (error); + } else { + ASSERT3S(uio->uio_dio.npages, ==, npages); + } + + if (rw == UIO_WRITE) { + zfs_uio_dio_check_for_zero_page(uio); + } + + uio->uio_extflg |= UIO_DIRECT; + + return (0); +} + #endif /* _KERNEL */ diff --git a/module/os/linux/zfs/zfs_vfsops.c b/module/os/linux/zfs/zfs_vfsops.c index a52f08868d96..b28efd3c58cd 100644 --- a/module/os/linux/zfs/zfs_vfsops.c +++ b/module/os/linux/zfs/zfs_vfsops.c @@ -59,6 +59,7 @@ #include #include #include +#include #include "zfs_comutil.h" enum { @@ -1177,64 +1178,6 @@ zfs_root(zfsvfs_t *zfsvfs, struct inode **ipp) return (error); } -/* - * Linux kernels older than 3.1 do not support a per-filesystem shrinker. - * To accommodate this we must improvise and manually walk the list of znodes - * attempting to prune dentries in order to be able to drop the inodes. - * - * To avoid scanning the same znodes multiple times they are always rotated - * to the end of the z_all_znodes list. New znodes are inserted at the - * end of the list so we're always scanning the oldest znodes first. - */ -static int -zfs_prune_aliases(zfsvfs_t *zfsvfs, unsigned long nr_to_scan) -{ - znode_t **zp_array, *zp; - int max_array = MIN(nr_to_scan, PAGE_SIZE * 8 / sizeof (znode_t *)); - int objects = 0; - int i = 0, j = 0; - - zp_array = vmem_zalloc(max_array * sizeof (znode_t *), KM_SLEEP); - - mutex_enter(&zfsvfs->z_znodes_lock); - while ((zp = list_head(&zfsvfs->z_all_znodes)) != NULL) { - - if ((i++ > nr_to_scan) || (j >= max_array)) - break; - - ASSERT(list_link_active(&zp->z_link_node)); - list_remove(&zfsvfs->z_all_znodes, zp); - list_insert_tail(&zfsvfs->z_all_znodes, zp); - - /* Skip active znodes and .zfs entries */ - if (MUTEX_HELD(&zp->z_lock) || zp->z_is_ctldir) - continue; - - if (igrab(ZTOI(zp)) == NULL) - continue; - - zp_array[j] = zp; - j++; - } - mutex_exit(&zfsvfs->z_znodes_lock); - - for (i = 0; i < j; i++) { - zp = zp_array[i]; - - ASSERT3P(zp, !=, NULL); - d_prune_aliases(ZTOI(zp)); - - if (atomic_read(&ZTOI(zp)->i_count) == 1) - objects++; - - zrele(zp); - } - - vmem_free(zp_array, max_array * sizeof (znode_t *)); - - return (objects); -} - /* * The ARC has requested that the filesystem drop entries from the dentry * and inode caches. This can occur when the ARC needs to free meta data @@ -1260,9 +1203,7 @@ zfs_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects) if ((error = zfs_enter(zfsvfs, FTAG)) != 0) return (error); -#if defined(HAVE_SPLIT_SHRINKER_CALLBACK) && \ - defined(SHRINK_CONTROL_HAS_NID) && \ - defined(SHRINKER_NUMA_AWARE) +#ifdef SHRINKER_NUMA_AWARE if (shrinker->flags & SHRINKER_NUMA_AWARE) { long tc = 1; for_each_online_node(sc.nid) { @@ -1284,27 +1225,8 @@ zfs_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects) } else { *objects = (*shrinker->scan_objects)(shrinker, &sc); } - -#elif defined(HAVE_SPLIT_SHRINKER_CALLBACK) - *objects = (*shrinker->scan_objects)(shrinker, &sc); -#elif defined(HAVE_SINGLE_SHRINKER_CALLBACK) - *objects = (*shrinker->shrink)(shrinker, &sc); -#elif defined(HAVE_D_PRUNE_ALIASES) -#define D_PRUNE_ALIASES_IS_DEFAULT - *objects = zfs_prune_aliases(zfsvfs, nr_to_scan); #else -#error "No available dentry and inode cache pruning mechanism." -#endif - -#if defined(HAVE_D_PRUNE_ALIASES) && !defined(D_PRUNE_ALIASES_IS_DEFAULT) -#undef D_PRUNE_ALIASES_IS_DEFAULT - /* - * Fall back to zfs_prune_aliases if the kernel's per-superblock - * shrinker couldn't free anything, possibly due to the inodes being - * allocated in a different memcg. - */ - if (*objects == 0) - *objects = zfs_prune_aliases(zfsvfs, nr_to_scan); + *objects = (*shrinker->scan_objects)(shrinker, &sc); #endif zfs_exit(zfsvfs, FTAG); @@ -1463,9 +1385,7 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) return (0); } -#if defined(HAVE_SUPER_SETUP_BDI_NAME) -atomic_long_t zfs_bdi_seq = ATOMIC_LONG_INIT(0); -#endif +static atomic_long_t zfs_bdi_seq = ATOMIC_LONG_INIT(0); int zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent) @@ -1526,7 +1446,8 @@ zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent) sb->s_blocksize = recordsize; sb->s_blocksize_bits = ilog2(recordsize); - error = -zpl_bdi_setup(sb, "zfs"); + error = -super_setup_bdi_name(sb, "%.28s-%ld", "zfs", + atomic_long_inc_return(&zfs_bdi_seq)); if (error) goto out; @@ -1654,7 +1575,6 @@ zfs_umount(struct super_block *sb) arc_remove_prune_callback(zfsvfs->z_arc_prune); VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0); os = zfsvfs->z_os; - zpl_bdi_destroy(sb); /* * z_os will be NULL if there was an error in @@ -2105,9 +2025,6 @@ zfs_init(void) zfs_znode_init(); dmu_objset_register_type(DMU_OST_ZFS, zpl_get_file_info); register_filesystem(&zpl_fs_type); -#ifdef HAVE_VFS_FILE_OPERATIONS_EXTEND - register_fo_extend(&zpl_file_operations); -#endif } void @@ -2118,9 +2035,6 @@ zfs_fini(void) */ taskq_wait(system_delay_taskq); taskq_wait(system_taskq); -#ifdef HAVE_VFS_FILE_OPERATIONS_EXTEND - unregister_fo_extend(&zpl_file_operations); -#endif unregister_filesystem(&zpl_fs_type); zfs_znode_fini(); zfsctl_fini(); diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c index 9803c7fecb5c..7d3a7b8d9d38 100644 --- a/module/os/linux/zfs/zfs_vnops_os.c +++ b/module/os/linux/zfs/zfs_vnops_os.c @@ -296,6 +296,7 @@ mappedread(znode_t *zp, int nbytes, zfs_uio_t *uio) struct page *pp = find_lock_page(mp, start >> PAGE_SHIFT); if (pp) { + /* * If filemap_fault() retries there exists a window * where the page will be unlocked and not up to date. @@ -1506,7 +1507,7 @@ zfs_rmdir(znode_t *dzp, char *name, znode_t *cwd, cred_t *cr, * we use the offset 2 for the '.zfs' directory. */ int -zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr) +zfs_readdir(struct inode *ip, struct dir_context *ctx, cred_t *cr) { (void) cr; znode_t *zp = ITOZ(ip); @@ -1612,7 +1613,7 @@ zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr) type = ZFS_DIRENT_TYPE(zap.za_first_integer); } - done = !zpl_dir_emit(ctx, zap.za_name, strlen(zap.za_name), + done = !dir_emit(ctx, zap.za_name, strlen(zap.za_name), objnum, type); if (done) break; @@ -3460,9 +3461,9 @@ zfs_link(znode_t *tdzp, znode_t *szp, char *name, cred_t *cr, boolean_t waited = B_FALSE; boolean_t is_tmpfile = 0; uint64_t txg; -#ifdef HAVE_TMPFILE + is_tmpfile = (sip->i_nlink == 0 && (sip->i_state & I_LINKABLE)); -#endif + ASSERT(S_ISDIR(ZTOI(tdzp)->i_mode)); if (name == NULL) @@ -3766,8 +3767,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc, /* * Speed up any non-sync page writebacks since * they may take several seconds to complete. - * Refer to the comment in zpl_fsync() (when - * HAVE_FSYNC_RANGE is defined) for details. + * Refer to the comment in zpl_fsync() for details. */ if (atomic_load_32(&zp->z_async_writes_cnt) > 0) { zil_commit(zfsvfs->z_log, zp->z_id); @@ -3866,7 +3866,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc, } zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, pgoff, pglen, commit, - for_sync ? zfs_putpage_sync_commit_cb : + B_FALSE, for_sync ? zfs_putpage_sync_commit_cb : zfs_putpage_async_commit_cb, pp); dmu_tx_commit(tx); @@ -4009,6 +4009,7 @@ zfs_inactive(struct inode *ip) static int zfs_fillpage(struct inode *ip, struct page *pp) { + znode_t *zp = ITOZ(ip); zfsvfs_t *zfsvfs = ITOZSB(ip); loff_t i_size = i_size_read(ip); u_offset_t io_off = page_offset(pp); @@ -4020,7 +4021,7 @@ zfs_fillpage(struct inode *ip, struct page *pp) io_len = i_size - io_off; void *va = kmap(pp); - int error = dmu_read(zfsvfs->z_os, ITOZ(ip)->z_id, io_off, + int error = dmu_read(zfsvfs->z_os, zp->z_id, io_off, io_len, va, DMU_READ_PREFETCH); if (io_len != PAGE_SIZE) memset((char *)va + io_len, 0, PAGE_SIZE - io_len); @@ -4058,11 +4059,49 @@ zfs_getpage(struct inode *ip, struct page *pp) zfsvfs_t *zfsvfs = ITOZSB(ip); znode_t *zp = ITOZ(ip); int error; + loff_t i_size = i_size_read(ip); + u_offset_t io_off = page_offset(pp); + size_t io_len = PAGE_SIZE; if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0) return (error); + ASSERT3U(io_off, <, i_size); + + if (io_off + io_len > i_size) + io_len = i_size - io_off; + + /* + * It is important to hold the rangelock here because it is possible + * a Direct I/O write or block clone might be taking place at the same + * time that a page is being faulted in through filemap_fault(). With + * Direct I/O writes and block cloning db->db_data will be set to NULL + * with dbuf_clear_data() in dmu_buif_will_clone_or_dio(). If the + * rangelock is not held, then there is a race between faulting in a + * page and writing out a Direct I/O write or block cloning. Without + * the rangelock a NULL pointer dereference can occur in + * dmu_read_impl() for db->db_data during the mempcy operation when + * zfs_fillpage() calls dmu_read(). + */ + zfs_locked_range_t *lr = zfs_rangelock_tryenter(&zp->z_rangelock, + io_off, io_len, RL_READER); + if (lr == NULL) { + /* + * It is important to drop the page lock before grabbing the + * rangelock to avoid another deadlock between here and + * zfs_write() -> update_pages(). update_pages() holds both the + * rangelock and the page lock. + */ + get_page(pp); + unlock_page(pp); + lr = zfs_rangelock_enter(&zp->z_rangelock, io_off, + io_len, RL_READER); + lock_page(pp); + put_page(pp); + } error = zfs_fillpage(ip, pp); + zfs_rangelock_exit(lr); + if (error == 0) dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, PAGE_SIZE); diff --git a/module/os/linux/zfs/zfs_znode.c b/module/os/linux/zfs/zfs_znode.c index 265153e011e7..4d18187b715b 100644 --- a/module/os/linux/zfs/zfs_znode.c +++ b/module/os/linux/zfs/zfs_znode.c @@ -415,21 +415,12 @@ zfs_inode_set_ops(zfsvfs_t *zfsvfs, struct inode *ip) switch (ip->i_mode & S_IFMT) { case S_IFREG: ip->i_op = &zpl_inode_operations; -#ifdef HAVE_VFS_FILE_OPERATIONS_EXTEND - ip->i_fop = &zpl_file_operations.kabi_fops; -#else ip->i_fop = &zpl_file_operations; -#endif ip->i_mapping->a_ops = &zpl_address_space_operations; break; case S_IFDIR: -#ifdef HAVE_RENAME2_OPERATIONS_WRAPPER - ip->i_flags |= S_IOPS_WRAPPER; - ip->i_op = &zpl_dir_inode_operations.ops; -#else ip->i_op = &zpl_dir_inode_operations; -#endif ip->i_fop = &zpl_dir_file_operations; ITOZ(ip)->z_zn_prefetch = B_TRUE; break; @@ -459,11 +450,7 @@ zfs_inode_set_ops(zfsvfs_t *zfsvfs, struct inode *ip) /* Assume the inode is a file and attempt to continue */ ip->i_mode = S_IFREG | 0644; ip->i_op = &zpl_inode_operations; -#ifdef HAVE_VFS_FILE_OPERATIONS_EXTEND - ip->i_fop = &zpl_file_operations.kabi_fops; -#else ip->i_fop = &zpl_file_operations; -#endif ip->i_mapping->a_ops = &zpl_address_space_operations; break; } @@ -476,7 +463,6 @@ zfs_set_inode_flags(znode_t *zp, struct inode *ip) * Linux and Solaris have different sets of file attributes, so we * restrict this conversion to the intersection of the two. */ -#ifdef HAVE_INODE_SET_FLAGS unsigned int flags = 0; if (zp->z_pflags & ZFS_IMMUTABLE) flags |= S_IMMUTABLE; @@ -484,17 +470,6 @@ zfs_set_inode_flags(znode_t *zp, struct inode *ip) flags |= S_APPEND; inode_set_flags(ip, flags, S_IMMUTABLE|S_APPEND); -#else - if (zp->z_pflags & ZFS_IMMUTABLE) - ip->i_flags |= S_IMMUTABLE; - else - ip->i_flags &= ~S_IMMUTABLE; - - if (zp->z_pflags & ZFS_APPENDONLY) - ip->i_flags |= S_APPEND; - else - ip->i_flags &= ~S_APPEND; -#endif } /* @@ -560,9 +535,6 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, ASSERT3P(zp->z_xattr_cached, ==, NULL); zp->z_unlinked = B_FALSE; zp->z_atime_dirty = B_FALSE; -#if !defined(HAVE_FILEMAP_RANGE_HAS_PAGE) - zp->z_is_mapped = B_FALSE; -#endif zp->z_is_ctldir = B_FALSE; zp->z_suspended = B_FALSE; zp->z_sa_hdl = NULL; @@ -1391,12 +1363,6 @@ zfs_zinactive(znode_t *zp) zfs_znode_hold_exit(zfsvfs, zh); } -#if defined(HAVE_INODE_TIMESPEC64_TIMES) -#define zfs_compare_timespec timespec64_compare -#else -#define zfs_compare_timespec timespec_compare -#endif - /* * Determine whether the znode's atime must be updated. The logic mostly * duplicates the Linux kernel's relatime_need_update() functionality. @@ -1416,11 +1382,11 @@ zfs_relatime_need_update(const struct inode *ip) * has passed since the last update of atime. */ tmp_ts = zpl_inode_get_mtime(ip); - if (zfs_compare_timespec(&tmp_ts, &tmp_atime) >= 0) + if (timespec64_compare(&tmp_ts, &tmp_atime) >= 0) return (B_TRUE); tmp_ts = zpl_inode_get_ctime(ip); - if (zfs_compare_timespec(&tmp_ts, &tmp_atime) >= 0) + if (timespec64_compare(&tmp_ts, &tmp_atime) >= 0) return (B_TRUE); if ((hrtime_t)now.tv_sec - (hrtime_t)tmp_atime.tv_sec >= 24*60*60) diff --git a/module/os/linux/zfs/zpl_ctldir.c b/module/os/linux/zfs/zpl_ctldir.c index 8ee7fcecc7b7..56a30be5110c 100644 --- a/module/os/linux/zfs/zpl_ctldir.c +++ b/module/os/linux/zfs/zpl_ctldir.c @@ -52,7 +52,7 @@ zpl_common_open(struct inode *ip, struct file *filp) * Get root directory contents. */ static int -zpl_root_iterate(struct file *filp, zpl_dir_context_t *ctx) +zpl_root_iterate(struct file *filp, struct dir_context *ctx) { zfsvfs_t *zfsvfs = ITOZSB(file_inode(filp)); int error = 0; @@ -60,11 +60,11 @@ zpl_root_iterate(struct file *filp, zpl_dir_context_t *ctx) if ((error = zpl_enter(zfsvfs, FTAG)) != 0) return (error); - if (!zpl_dir_emit_dots(filp, ctx)) + if (!dir_emit_dots(filp, ctx)) goto out; if (ctx->pos == 2) { - if (!zpl_dir_emit(ctx, ZFS_SNAPDIR_NAME, + if (!dir_emit(ctx, ZFS_SNAPDIR_NAME, strlen(ZFS_SNAPDIR_NAME), ZFSCTL_INO_SNAPDIR, DT_DIR)) goto out; @@ -72,7 +72,7 @@ zpl_root_iterate(struct file *filp, zpl_dir_context_t *ctx) } if (ctx->pos == 3) { - if (!zpl_dir_emit(ctx, ZFS_SHAREDIR_NAME, + if (!dir_emit(ctx, ZFS_SHAREDIR_NAME, strlen(ZFS_SHAREDIR_NAME), ZFSCTL_INO_SHARES, DT_DIR)) goto out; @@ -84,21 +84,6 @@ zpl_root_iterate(struct file *filp, zpl_dir_context_t *ctx) return (error); } -#if !defined(HAVE_VFS_ITERATE) && !defined(HAVE_VFS_ITERATE_SHARED) -static int -zpl_root_readdir(struct file *filp, void *dirent, filldir_t filldir) -{ - zpl_dir_context_t ctx = - ZPL_DIR_CONTEXT_INIT(dirent, filldir, filp->f_pos); - int error; - - error = zpl_root_iterate(filp, &ctx); - filp->f_pos = ctx.pos; - - return (error); -} -#endif /* !HAVE_VFS_ITERATE && !HAVE_VFS_ITERATE_SHARED */ - /* * Get root directory attributes. */ @@ -167,13 +152,7 @@ const struct file_operations zpl_fops_root = { .open = zpl_common_open, .llseek = generic_file_llseek, .read = generic_read_dir, -#ifdef HAVE_VFS_ITERATE_SHARED .iterate_shared = zpl_root_iterate, -#elif defined(HAVE_VFS_ITERATE) - .iterate = zpl_root_iterate, -#else - .readdir = zpl_root_readdir, -#endif }; const struct inode_operations zpl_ops_root = { @@ -207,11 +186,7 @@ zpl_snapdir_automount(struct path *path) * the snapshot being immediately unmounted. */ static int -#ifdef HAVE_D_REVALIDATE_NAMEIDATA -zpl_snapdir_revalidate(struct dentry *dentry, struct nameidata *i) -#else zpl_snapdir_revalidate(struct dentry *dentry, unsigned int flags) -#endif { return (!!dentry->d_inode); } @@ -258,7 +233,7 @@ zpl_snapdir_lookup(struct inode *dip, struct dentry *dentry, } static int -zpl_snapdir_iterate(struct file *filp, zpl_dir_context_t *ctx) +zpl_snapdir_iterate(struct file *filp, struct dir_context *ctx) { zfsvfs_t *zfsvfs = ITOZSB(file_inode(filp)); fstrans_cookie_t cookie; @@ -271,7 +246,7 @@ zpl_snapdir_iterate(struct file *filp, zpl_dir_context_t *ctx) return (error); cookie = spl_fstrans_mark(); - if (!zpl_dir_emit_dots(filp, ctx)) + if (!dir_emit_dots(filp, ctx)) goto out; /* Start the position at 0 if it already emitted . and .. */ @@ -284,7 +259,7 @@ zpl_snapdir_iterate(struct file *filp, zpl_dir_context_t *ctx) if (error) goto out; - if (!zpl_dir_emit(ctx, snapname, strlen(snapname), + if (!dir_emit(ctx, snapname, strlen(snapname), ZFSCTL_INO_SHARES - id, DT_DIR)) goto out; @@ -300,21 +275,6 @@ zpl_snapdir_iterate(struct file *filp, zpl_dir_context_t *ctx) return (error); } -#if !defined(HAVE_VFS_ITERATE) && !defined(HAVE_VFS_ITERATE_SHARED) -static int -zpl_snapdir_readdir(struct file *filp, void *dirent, filldir_t filldir) -{ - zpl_dir_context_t ctx = - ZPL_DIR_CONTEXT_INIT(dirent, filldir, filp->f_pos); - int error; - - error = zpl_snapdir_iterate(filp, &ctx); - filp->f_pos = ctx.pos; - - return (error); -} -#endif /* !HAVE_VFS_ITERATE && !HAVE_VFS_ITERATE_SHARED */ - static int #ifdef HAVE_IOPS_RENAME_USERNS zpl_snapdir_rename2(struct user_namespace *user_ns, struct inode *sdip, @@ -478,13 +438,7 @@ const struct file_operations zpl_fops_snapdir = { .open = zpl_common_open, .llseek = generic_file_llseek, .read = generic_read_dir, -#ifdef HAVE_VFS_ITERATE_SHARED .iterate_shared = zpl_snapdir_iterate, -#elif defined(HAVE_VFS_ITERATE) - .iterate = zpl_snapdir_iterate, -#else - .readdir = zpl_snapdir_readdir, -#endif }; @@ -535,7 +489,7 @@ zpl_shares_lookup(struct inode *dip, struct dentry *dentry, } static int -zpl_shares_iterate(struct file *filp, zpl_dir_context_t *ctx) +zpl_shares_iterate(struct file *filp, struct dir_context *ctx) { fstrans_cookie_t cookie; cred_t *cr = CRED(); @@ -548,7 +502,7 @@ zpl_shares_iterate(struct file *filp, zpl_dir_context_t *ctx) cookie = spl_fstrans_mark(); if (zfsvfs->z_shares_dir == 0) { - zpl_dir_emit_dots(filp, ctx); + dir_emit_dots(filp, ctx); goto out; } @@ -569,21 +523,6 @@ zpl_shares_iterate(struct file *filp, zpl_dir_context_t *ctx) return (error); } -#if !defined(HAVE_VFS_ITERATE) && !defined(HAVE_VFS_ITERATE_SHARED) -static int -zpl_shares_readdir(struct file *filp, void *dirent, filldir_t filldir) -{ - zpl_dir_context_t ctx = - ZPL_DIR_CONTEXT_INIT(dirent, filldir, filp->f_pos); - int error; - - error = zpl_shares_iterate(filp, &ctx); - filp->f_pos = ctx.pos; - - return (error); -} -#endif /* !HAVE_VFS_ITERATE && !HAVE_VFS_ITERATE_SHARED */ - static int #ifdef HAVE_USERNS_IOPS_GETATTR zpl_shares_getattr_impl(struct user_namespace *user_ns, @@ -654,14 +593,7 @@ const struct file_operations zpl_fops_shares = { .open = zpl_common_open, .llseek = generic_file_llseek, .read = generic_read_dir, -#ifdef HAVE_VFS_ITERATE_SHARED .iterate_shared = zpl_shares_iterate, -#elif defined(HAVE_VFS_ITERATE) - .iterate = zpl_shares_iterate, -#else - .readdir = zpl_shares_readdir, -#endif - }; /* diff --git a/module/os/linux/zfs/zpl_export.c b/module/os/linux/zfs/zpl_export.c index aa80b72e2d7a..b6b9e2754055 100644 --- a/module/os/linux/zfs/zpl_export.c +++ b/module/os/linux/zfs/zpl_export.c @@ -31,15 +31,8 @@ static int -#ifdef HAVE_ENCODE_FH_WITH_INODE zpl_encode_fh(struct inode *ip, __u32 *fh, int *max_len, struct inode *parent) { -#else -zpl_encode_fh(struct dentry *dentry, __u32 *fh, int *max_len, int connectable) -{ - /* CSTYLED */ - struct inode *ip = dentry->d_inode; -#endif /* HAVE_ENCODE_FH_WITH_INODE */ fstrans_cookie_t cookie; ushort_t empty_fid = 0; fid_t *fid; diff --git a/module/os/linux/zfs/zpl_file.c b/module/os/linux/zfs/zpl_file.c index 9dec52215c7c..4d1bf1d5477f 100644 --- a/module/os/linux/zfs/zpl_file.c +++ b/module/os/linux/zfs/zpl_file.c @@ -38,9 +38,7 @@ defined(HAVE_VFS_FILEMAP_DIRTY_FOLIO) #include #endif -#ifdef HAVE_FILE_FADVISE #include -#endif #ifdef HAVE_VFS_FILEMAP_DIRTY_FOLIO #include #endif @@ -93,7 +91,7 @@ zpl_release(struct inode *ip, struct file *filp) } static int -zpl_iterate(struct file *filp, zpl_dir_context_t *ctx) +zpl_iterate(struct file *filp, struct dir_context *ctx) { cred_t *cr = CRED(); int error; @@ -109,62 +107,6 @@ zpl_iterate(struct file *filp, zpl_dir_context_t *ctx) return (error); } -#if !defined(HAVE_VFS_ITERATE) && !defined(HAVE_VFS_ITERATE_SHARED) -static int -zpl_readdir(struct file *filp, void *dirent, filldir_t filldir) -{ - zpl_dir_context_t ctx = - ZPL_DIR_CONTEXT_INIT(dirent, filldir, filp->f_pos); - int error; - - error = zpl_iterate(filp, &ctx); - filp->f_pos = ctx.pos; - - return (error); -} -#endif /* !HAVE_VFS_ITERATE && !HAVE_VFS_ITERATE_SHARED */ - -#if defined(HAVE_FSYNC_WITHOUT_DENTRY) -/* - * Linux 2.6.35 - 3.0 API, - * As of 2.6.35 the dentry argument to the fops->fsync() hook was deemed - * redundant. The dentry is still accessible via filp->f_path.dentry, - * and we are guaranteed that filp will never be NULL. - */ -static int -zpl_fsync(struct file *filp, int datasync) -{ - struct inode *inode = filp->f_mapping->host; - cred_t *cr = CRED(); - int error; - fstrans_cookie_t cookie; - - crhold(cr); - cookie = spl_fstrans_mark(); - error = -zfs_fsync(ITOZ(inode), datasync, cr); - spl_fstrans_unmark(cookie); - crfree(cr); - ASSERT3S(error, <=, 0); - - return (error); -} - -#ifdef HAVE_FILE_AIO_FSYNC -static int -zpl_aio_fsync(struct kiocb *kiocb, int datasync) -{ - return (zpl_fsync(kiocb->ki_filp, datasync)); -} -#endif - -#elif defined(HAVE_FSYNC_RANGE) -/* - * Linux 3.1 API, - * As of 3.1 the responsibility to call filemap_write_and_wait_range() has - * been pushed down in to the .fsync() vfs hook. Additionally, the i_mutex - * lock is no longer held by the caller, for zfs we don't require the lock - * to be held so we don't acquire it. - */ static int zpl_fsync(struct file *filp, loff_t start, loff_t end, int datasync) { @@ -229,18 +171,6 @@ zpl_fsync(struct file *filp, loff_t start, loff_t end, int datasync) return (error); } -#ifdef HAVE_FILE_AIO_FSYNC -static int -zpl_aio_fsync(struct kiocb *kiocb, int datasync) -{ - return (zpl_fsync(kiocb->ki_filp, kiocb->ki_pos, -1, datasync)); -} -#endif - -#else -#error "Unsupported fops->fsync() implementation" -#endif - static inline int zfs_io_flags(struct kiocb *kiocb) { @@ -285,8 +215,6 @@ zpl_file_accessed(struct file *filp) } } -#if defined(HAVE_VFS_RW_ITERATE) - /* * When HAVE_VFS_IOV_ITER is defined the iov_iter structure supports * iovecs, kvevs, bvecs and pipes, plus all the required interfaces to @@ -322,14 +250,14 @@ zpl_iter_read(struct kiocb *kiocb, struct iov_iter *to) crhold(cr); cookie = spl_fstrans_mark(); - int error = -zfs_read(ITOZ(filp->f_mapping->host), &uio, + ssize_t ret = -zfs_read(ITOZ(filp->f_mapping->host), &uio, filp->f_flags | zfs_io_flags(kiocb), cr); spl_fstrans_unmark(cookie); crfree(cr); - if (error < 0) - return (error); + if (ret < 0) + return (ret); ssize_t read = count - uio.uio_resid; kiocb->ki_pos += read; @@ -343,23 +271,11 @@ static inline ssize_t zpl_generic_write_checks(struct kiocb *kiocb, struct iov_iter *from, size_t *countp) { -#ifdef HAVE_GENERIC_WRITE_CHECKS_KIOCB ssize_t ret = generic_write_checks(kiocb, from); if (ret <= 0) return (ret); *countp = ret; -#else - struct file *file = kiocb->ki_filp; - struct address_space *mapping = file->f_mapping; - struct inode *ip = mapping->host; - int isblk = S_ISBLK(ip->i_mode); - - *countp = iov_iter_count(from); - ssize_t ret = generic_write_checks(file, &kiocb->ki_pos, countp, isblk); - if (ret) - return (ret); -#endif return (0); } @@ -384,169 +300,50 @@ zpl_iter_write(struct kiocb *kiocb, struct iov_iter *from) crhold(cr); cookie = spl_fstrans_mark(); - int error = -zfs_write(ITOZ(ip), &uio, - filp->f_flags | zfs_io_flags(kiocb), cr); - - spl_fstrans_unmark(cookie); - crfree(cr); - - if (error < 0) - return (error); - - ssize_t wrote = count - uio.uio_resid; - kiocb->ki_pos += wrote; - - return (wrote); -} - -#else /* !HAVE_VFS_RW_ITERATE */ - -static ssize_t -zpl_aio_read(struct kiocb *kiocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) -{ - cred_t *cr = CRED(); - fstrans_cookie_t cookie; - struct file *filp = kiocb->ki_filp; - size_t count; - ssize_t ret; - - ret = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE); - if (ret) - return (ret); - - zfs_uio_t uio; - zfs_uio_iovec_init(&uio, iov, nr_segs, kiocb->ki_pos, UIO_USERSPACE, - count, 0); - - crhold(cr); - cookie = spl_fstrans_mark(); - - int error = -zfs_read(ITOZ(filp->f_mapping->host), &uio, + ret = -zfs_write(ITOZ(ip), &uio, filp->f_flags | zfs_io_flags(kiocb), cr); spl_fstrans_unmark(cookie); crfree(cr); - if (error < 0) - return (error); - - ssize_t read = count - uio.uio_resid; - kiocb->ki_pos += read; - - zpl_file_accessed(filp); - - return (read); -} - -static ssize_t -zpl_aio_write(struct kiocb *kiocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) -{ - cred_t *cr = CRED(); - fstrans_cookie_t cookie; - struct file *filp = kiocb->ki_filp; - struct inode *ip = filp->f_mapping->host; - size_t count; - ssize_t ret; - - ret = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ); - if (ret) - return (ret); - - ret = generic_write_checks(filp, &pos, &count, S_ISBLK(ip->i_mode)); - if (ret) + if (ret < 0) return (ret); - kiocb->ki_pos = pos; - - zfs_uio_t uio; - zfs_uio_iovec_init(&uio, iov, nr_segs, kiocb->ki_pos, UIO_USERSPACE, - count, 0); - - crhold(cr); - cookie = spl_fstrans_mark(); - - int error = -zfs_write(ITOZ(ip), &uio, - filp->f_flags | zfs_io_flags(kiocb), cr); - - spl_fstrans_unmark(cookie); - crfree(cr); - - if (error < 0) - return (error); - ssize_t wrote = count - uio.uio_resid; kiocb->ki_pos += wrote; return (wrote); } -#endif /* HAVE_VFS_RW_ITERATE */ -#if defined(HAVE_VFS_RW_ITERATE) static ssize_t -zpl_direct_IO_impl(int rw, struct kiocb *kiocb, struct iov_iter *iter) +zpl_direct_IO_impl(void) { - if (rw == WRITE) - return (zpl_iter_write(kiocb, iter)); - else - return (zpl_iter_read(kiocb, iter)); + /* + * All O_DIRECT requests should be handled by + * zpl_{iter/aio}_{write/read}(). There is no way kernel generic code + * should call the direct_IO address_space_operations function. We set + * this code path to be fatal if it is executed. + */ + PANIC(0); + return (0); } + #if defined(HAVE_VFS_DIRECT_IO_ITER) static ssize_t zpl_direct_IO(struct kiocb *kiocb, struct iov_iter *iter) { - return (zpl_direct_IO_impl(iov_iter_rw(iter), kiocb, iter)); + return (zpl_direct_IO_impl()); } #elif defined(HAVE_VFS_DIRECT_IO_ITER_OFFSET) static ssize_t zpl_direct_IO(struct kiocb *kiocb, struct iov_iter *iter, loff_t pos) { - ASSERT3S(pos, ==, kiocb->ki_pos); - return (zpl_direct_IO_impl(iov_iter_rw(iter), kiocb, iter)); -} -#elif defined(HAVE_VFS_DIRECT_IO_ITER_RW_OFFSET) -static ssize_t -zpl_direct_IO(int rw, struct kiocb *kiocb, struct iov_iter *iter, loff_t pos) -{ - ASSERT3S(pos, ==, kiocb->ki_pos); - return (zpl_direct_IO_impl(rw, kiocb, iter)); + return (zpl_direct_IO_impl()); } #else -#error "Unknown direct IO interface" +#error "Unknown Direct I/O interface" #endif -#else /* HAVE_VFS_RW_ITERATE */ - -#if defined(HAVE_VFS_DIRECT_IO_IOVEC) -static ssize_t -zpl_direct_IO(int rw, struct kiocb *kiocb, const struct iovec *iov, - loff_t pos, unsigned long nr_segs) -{ - if (rw == WRITE) - return (zpl_aio_write(kiocb, iov, nr_segs, pos)); - else - return (zpl_aio_read(kiocb, iov, nr_segs, pos)); -} -#elif defined(HAVE_VFS_DIRECT_IO_ITER_RW_OFFSET) -static ssize_t -zpl_direct_IO(int rw, struct kiocb *kiocb, struct iov_iter *iter, loff_t pos) -{ - const struct iovec *iovp = iov_iter_iovec(iter); - unsigned long nr_segs = iter->nr_segs; - - ASSERT3S(pos, ==, kiocb->ki_pos); - if (rw == WRITE) - return (zpl_aio_write(kiocb, iovp, nr_segs, pos)); - else - return (zpl_aio_read(kiocb, iovp, nr_segs, pos)); -} -#else -#error "Unknown direct IO interface" -#endif - -#endif /* HAVE_VFS_RW_ITERATE */ - static loff_t zpl_llseek(struct file *filp, loff_t offset, int whence) { @@ -627,6 +424,7 @@ zpl_mmap(struct file *filp, struct vm_area_struct *vma) error = -zfs_map(ip, vma->vm_pgoff, (caddr_t *)vma->vm_start, (size_t)(vma->vm_end - vma->vm_start), vma->vm_flags); spl_fstrans_unmark(cookie); + if (error) return (error); @@ -634,13 +432,6 @@ zpl_mmap(struct file *filp, struct vm_area_struct *vma) if (error) return (error); -#if !defined(HAVE_FILEMAP_RANGE_HAS_PAGE) - znode_t *zp = ITOZ(ip); - mutex_enter(&zp->z_lock); - zp->z_is_mapped = B_TRUE; - mutex_exit(&zp->z_lock); -#endif - return (error); } @@ -833,10 +624,7 @@ zpl_fallocate_common(struct inode *ip, int mode, loff_t offset, loff_t len) fstrans_cookie_t cookie; int error = 0; - int test_mode = FALLOC_FL_PUNCH_HOLE; -#ifdef HAVE_FALLOC_FL_ZERO_RANGE - test_mode |= FALLOC_FL_ZERO_RANGE; -#endif + int test_mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE; if ((mode & ~(FALLOC_FL_KEEP_SIZE | test_mode)) != 0) return (-EOPNOTSUPP); @@ -920,7 +708,6 @@ zpl_ioctl_getversion(struct file *filp, void __user *arg) return (copy_to_user(arg, &generation, sizeof (generation))); } -#ifdef HAVE_FILE_FADVISE static int zpl_fadvise(struct file *filp, loff_t offset, loff_t len, int advice) { @@ -973,7 +760,6 @@ zpl_fadvise(struct file *filp, loff_t offset, loff_t len, int advice) return (error); } -#endif /* HAVE_FILE_FADVISE */ #define ZFS_FL_USER_VISIBLE (FS_FL_USER_VISIBLE | ZFS_PROJINHERIT_FL) #define ZFS_FL_USER_MODIFIABLE (FS_FL_USER_MODIFIABLE | ZFS_PROJINHERIT_FL) @@ -1306,20 +1092,10 @@ const struct address_space_operations zpl_address_space_operations = { #endif }; -#ifdef HAVE_VFS_FILE_OPERATIONS_EXTEND -const struct file_operations_extend zpl_file_operations = { - .kabi_fops = { -#else const struct file_operations zpl_file_operations = { -#endif .open = zpl_open, .release = zpl_release, .llseek = zpl_llseek, -#ifdef HAVE_VFS_RW_ITERATE -#ifdef HAVE_NEW_SYNC_READ - .read = new_sync_read, - .write = new_sync_write, -#endif .read_iter = zpl_iter_read, .write_iter = zpl_iter_write, #ifdef HAVE_VFS_IOV_ITER @@ -1329,22 +1105,11 @@ const struct file_operations zpl_file_operations = { .splice_read = generic_file_splice_read, #endif .splice_write = iter_file_splice_write, -#endif -#else - .read = do_sync_read, - .write = do_sync_write, - .aio_read = zpl_aio_read, - .aio_write = zpl_aio_write, #endif .mmap = zpl_mmap, .fsync = zpl_fsync, -#ifdef HAVE_FILE_AIO_FSYNC - .aio_fsync = zpl_aio_fsync, -#endif .fallocate = zpl_fallocate, -#ifdef HAVE_VFS_COPY_FILE_RANGE .copy_file_range = zpl_copy_file_range, -#endif #ifdef HAVE_VFS_CLONE_FILE_RANGE .clone_file_range = zpl_clone_file_range, #endif @@ -1354,30 +1119,17 @@ const struct file_operations zpl_file_operations = { #ifdef HAVE_VFS_DEDUPE_FILE_RANGE .dedupe_file_range = zpl_dedupe_file_range, #endif -#ifdef HAVE_FILE_FADVISE .fadvise = zpl_fadvise, -#endif .unlocked_ioctl = zpl_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = zpl_compat_ioctl, #endif -#ifdef HAVE_VFS_FILE_OPERATIONS_EXTEND - }, /* kabi_fops */ - .copy_file_range = zpl_copy_file_range, - .clone_file_range = zpl_clone_file_range, -#endif }; const struct file_operations zpl_dir_file_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, -#if defined(HAVE_VFS_ITERATE_SHARED) .iterate_shared = zpl_iterate, -#elif defined(HAVE_VFS_ITERATE) - .iterate = zpl_iterate, -#else - .readdir = zpl_readdir, -#endif .fsync = zpl_fsync, .unlocked_ioctl = zpl_ioctl, #ifdef CONFIG_COMPAT diff --git a/module/os/linux/zfs/zpl_file_range.c b/module/os/linux/zfs/zpl_file_range.c index 64728fdb1187..d63797568ed2 100644 --- a/module/os/linux/zfs/zpl_file_range.c +++ b/module/os/linux/zfs/zpl_file_range.c @@ -83,8 +83,6 @@ zpl_clone_file_range_impl(struct file *src_file, loff_t src_off, return ((ssize_t)len_o); } -#if defined(HAVE_VFS_COPY_FILE_RANGE) || \ - defined(HAVE_VFS_FILE_OPERATIONS_EXTEND) /* * Entry point for copy_file_range(). Copy len bytes from src_off in src_file * to dst_off in dst_file. We are permitted to do this however we like, so we @@ -134,7 +132,6 @@ zpl_copy_file_range(struct file *src_file, loff_t src_off, return (ret); } -#endif /* HAVE_VFS_COPY_FILE_RANGE || HAVE_VFS_FILE_OPERATIONS_EXTEND */ #ifdef HAVE_VFS_REMAP_FILE_RANGE /* @@ -179,8 +176,7 @@ zpl_remap_file_range(struct file *src_file, loff_t src_off, } #endif /* HAVE_VFS_REMAP_FILE_RANGE */ -#if defined(HAVE_VFS_CLONE_FILE_RANGE) || \ - defined(HAVE_VFS_FILE_OPERATIONS_EXTEND) +#if defined(HAVE_VFS_CLONE_FILE_RANGE) /* * Entry point for FICLONE and FICLONERANGE, before Linux 4.20. */ @@ -201,7 +197,7 @@ zpl_clone_file_range(struct file *src_file, loff_t src_off, return (ret); } -#endif /* HAVE_VFS_CLONE_FILE_RANGE || HAVE_VFS_FILE_OPERATIONS_EXTEND */ +#endif /* HAVE_VFS_CLONE_FILE_RANGE */ #ifdef HAVE_VFS_DEDUPE_FILE_RANGE /* diff --git a/module/os/linux/zfs/zpl_inode.c b/module/os/linux/zfs/zpl_inode.c index ad1753f7a071..8386fc2ae0ce 100644 --- a/module/os/linux/zfs/zpl_inode.c +++ b/module/os/linux/zfs/zpl_inode.c @@ -238,7 +238,6 @@ zpl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, return (error); } -#ifdef HAVE_TMPFILE static int #ifdef HAVE_TMPFILE_IDMAP zpl_tmpfile(struct mnt_idmap *userns, struct inode *dir, @@ -307,7 +306,6 @@ zpl_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) return (error); } -#endif static int zpl_unlink(struct inode *dir, struct dentry *dentry) @@ -591,7 +589,6 @@ zpl_rename2(struct inode *sdip, struct dentry *sdentry, #if !defined(HAVE_IOPS_RENAME_USERNS) && \ !defined(HAVE_RENAME_WANTS_FLAGS) && \ - !defined(HAVE_RENAME2) && \ !defined(HAVE_IOPS_RENAME_IDMAP) static int zpl_rename(struct inode *sdip, struct dentry *sdentry, @@ -647,28 +644,11 @@ zpl_symlink(struct inode *dir, struct dentry *dentry, const char *name) return (error); } -#if defined(HAVE_PUT_LINK_COOKIE) -static void -zpl_put_link(struct inode *unused, void *cookie) -{ - kmem_free(cookie, MAXPATHLEN); -} -#elif defined(HAVE_PUT_LINK_NAMEIDATA) -static void -zpl_put_link(struct dentry *dentry, struct nameidata *nd, void *ptr) -{ - const char *link = nd_get_link(nd); - - if (!IS_ERR(link)) - kmem_free(link, MAXPATHLEN); -} -#elif defined(HAVE_PUT_LINK_DELAYED) static void zpl_put_link(void *ptr) { kmem_free(ptr, MAXPATHLEN); } -#endif static int zpl_get_link_common(struct dentry *dentry, struct inode *ip, char **link) @@ -700,7 +680,6 @@ zpl_get_link_common(struct dentry *dentry, struct inode *ip, char **link) return (error); } -#if defined(HAVE_GET_LINK_DELAYED) static const char * zpl_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) @@ -719,51 +698,6 @@ zpl_get_link(struct dentry *dentry, struct inode *inode, return (link); } -#elif defined(HAVE_GET_LINK_COOKIE) -static const char * -zpl_get_link(struct dentry *dentry, struct inode *inode, void **cookie) -{ - char *link = NULL; - int error; - - if (!dentry) - return (ERR_PTR(-ECHILD)); - - error = zpl_get_link_common(dentry, inode, &link); - if (error) - return (ERR_PTR(error)); - - return (*cookie = link); -} -#elif defined(HAVE_FOLLOW_LINK_COOKIE) -static const char * -zpl_follow_link(struct dentry *dentry, void **cookie) -{ - char *link = NULL; - int error; - - error = zpl_get_link_common(dentry, dentry->d_inode, &link); - if (error) - return (ERR_PTR(error)); - - return (*cookie = link); -} -#elif defined(HAVE_FOLLOW_LINK_NAMEIDATA) -static void * -zpl_follow_link(struct dentry *dentry, struct nameidata *nd) -{ - char *link = NULL; - int error; - - error = zpl_get_link_common(dentry, dentry->d_inode, &link); - if (error) - nd_set_link(nd, ERR_PTR(error)); - else - nd_set_link(nd, link); - - return (NULL); -} -#endif static int zpl_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) @@ -800,16 +734,9 @@ zpl_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) const struct inode_operations zpl_inode_operations = { .setattr = zpl_setattr, .getattr = zpl_getattr, -#ifdef HAVE_GENERIC_SETXATTR - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, - .removexattr = generic_removexattr, -#endif .listxattr = zpl_xattr_list, #if defined(CONFIG_FS_POSIX_ACL) -#if defined(HAVE_SET_ACL) .set_acl = zpl_set_acl, -#endif /* HAVE_SET_ACL */ #if defined(HAVE_GET_INODE_ACL) .get_inode_acl = zpl_get_acl, #else @@ -818,12 +745,7 @@ const struct inode_operations zpl_inode_operations = { #endif /* CONFIG_FS_POSIX_ACL */ }; -#ifdef HAVE_RENAME2_OPERATIONS_WRAPPER -const struct inode_operations_wrapper zpl_dir_inode_operations = { - .ops = { -#else const struct inode_operations zpl_dir_inode_operations = { -#endif .create = zpl_create, .lookup = zpl_lookup, .link = zpl_link, @@ -832,77 +754,40 @@ const struct inode_operations zpl_dir_inode_operations = { .mkdir = zpl_mkdir, .rmdir = zpl_rmdir, .mknod = zpl_mknod, -#ifdef HAVE_RENAME2 - .rename2 = zpl_rename2, -#elif defined(HAVE_RENAME_WANTS_FLAGS) || defined(HAVE_IOPS_RENAME_USERNS) +#if defined(HAVE_RENAME_WANTS_FLAGS) || defined(HAVE_IOPS_RENAME_USERNS) .rename = zpl_rename2, #elif defined(HAVE_IOPS_RENAME_IDMAP) .rename = zpl_rename2, #else .rename = zpl_rename, #endif -#ifdef HAVE_TMPFILE .tmpfile = zpl_tmpfile, -#endif .setattr = zpl_setattr, .getattr = zpl_getattr, -#ifdef HAVE_GENERIC_SETXATTR - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, - .removexattr = generic_removexattr, -#endif .listxattr = zpl_xattr_list, #if defined(CONFIG_FS_POSIX_ACL) -#if defined(HAVE_SET_ACL) .set_acl = zpl_set_acl, -#endif /* HAVE_SET_ACL */ #if defined(HAVE_GET_INODE_ACL) .get_inode_acl = zpl_get_acl, #else .get_acl = zpl_get_acl, #endif /* HAVE_GET_INODE_ACL */ #endif /* CONFIG_FS_POSIX_ACL */ -#ifdef HAVE_RENAME2_OPERATIONS_WRAPPER - }, - .rename2 = zpl_rename2, -#endif }; const struct inode_operations zpl_symlink_inode_operations = { -#ifdef HAVE_GENERIC_READLINK - .readlink = generic_readlink, -#endif -#if defined(HAVE_GET_LINK_DELAYED) || defined(HAVE_GET_LINK_COOKIE) .get_link = zpl_get_link, -#elif defined(HAVE_FOLLOW_LINK_COOKIE) || defined(HAVE_FOLLOW_LINK_NAMEIDATA) - .follow_link = zpl_follow_link, -#endif -#if defined(HAVE_PUT_LINK_COOKIE) || defined(HAVE_PUT_LINK_NAMEIDATA) - .put_link = zpl_put_link, -#endif .setattr = zpl_setattr, .getattr = zpl_getattr, -#ifdef HAVE_GENERIC_SETXATTR - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, - .removexattr = generic_removexattr, -#endif .listxattr = zpl_xattr_list, }; const struct inode_operations zpl_special_inode_operations = { .setattr = zpl_setattr, .getattr = zpl_getattr, -#ifdef HAVE_GENERIC_SETXATTR - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, - .removexattr = generic_removexattr, -#endif .listxattr = zpl_xattr_list, #if defined(CONFIG_FS_POSIX_ACL) -#if defined(HAVE_SET_ACL) .set_acl = zpl_set_acl, -#endif /* HAVE_SET_ACL */ #if defined(HAVE_GET_INODE_ACL) .get_inode_acl = zpl_get_acl, #else diff --git a/module/os/linux/zfs/zpl_super.c b/module/os/linux/zfs/zpl_super.c index 0a82b8858eb8..287f5f36f9dd 100644 --- a/module/os/linux/zfs/zpl_super.c +++ b/module/os/linux/zfs/zpl_super.c @@ -29,6 +29,7 @@ #include #include #include +#include static struct inode * @@ -54,7 +55,6 @@ zpl_inode_destroy(struct inode *ip) * inode has changed. We use it to ensure the znode system attributes * are always strictly update to date with respect to the inode. */ -#ifdef HAVE_DIRTY_INODE_WITH_FLAGS static void zpl_dirty_inode(struct inode *ip, int flags) { @@ -64,17 +64,6 @@ zpl_dirty_inode(struct inode *ip, int flags) zfs_dirty_inode(ip, flags); spl_fstrans_unmark(cookie); } -#else -static void -zpl_dirty_inode(struct inode *ip) -{ - fstrans_cookie_t cookie; - - cookie = spl_fstrans_mark(); - zfs_dirty_inode(ip, 0); - spl_fstrans_unmark(cookie); -} -#endif /* HAVE_DIRTY_INODE_WITH_FLAGS */ /* * When ->drop_inode() is called its return value indicates if the diff --git a/module/os/linux/zfs/zpl_xattr.c b/module/os/linux/zfs/zpl_xattr.c index 4e4f5210f85d..958af83ef8fa 100644 --- a/module/os/linux/zfs/zpl_xattr.c +++ b/module/os/linux/zfs/zpl_xattr.c @@ -712,10 +712,6 @@ __zpl_xattr_user_get(struct inode *ip, const char *name, { int error; /* xattr_resolve_name will do this for us if this is defined */ -#ifndef HAVE_XATTR_HANDLER_NAME - if (strcmp(name, "") == 0) - return (-EINVAL); -#endif if (ZFS_XA_NS_PREFIX_FORBIDDEN(name)) return (-EINVAL); if (!(ITOZSB(ip)->z_flags & ZSB_XATTR)) @@ -745,10 +741,6 @@ __zpl_xattr_user_set(zidmap_t *user_ns, (void) user_ns; int error = 0; /* xattr_resolve_name will do this for us if this is defined */ -#ifndef HAVE_XATTR_HANDLER_NAME - if (strcmp(name, "") == 0) - return (-EINVAL); -#endif if (ZFS_XA_NS_PREFIX_FORBIDDEN(name)) return (-EINVAL); if (!(ITOZSB(ip)->z_flags & ZSB_XATTR)) @@ -835,10 +827,6 @@ __zpl_xattr_trusted_get(struct inode *ip, const char *name, if (!capable(CAP_SYS_ADMIN)) return (-EACCES); /* xattr_resolve_name will do this for us if this is defined */ -#ifndef HAVE_XATTR_HANDLER_NAME - if (strcmp(name, "") == 0) - return (-EINVAL); -#endif xattr_name = kmem_asprintf("%s%s", XATTR_TRUSTED_PREFIX, name); error = zpl_xattr_get(ip, xattr_name, value, size); kmem_strfree(xattr_name); @@ -859,10 +847,6 @@ __zpl_xattr_trusted_set(zidmap_t *user_ns, if (!capable(CAP_SYS_ADMIN)) return (-EACCES); /* xattr_resolve_name will do this for us if this is defined */ -#ifndef HAVE_XATTR_HANDLER_NAME - if (strcmp(name, "") == 0) - return (-EINVAL); -#endif xattr_name = kmem_asprintf("%s%s", XATTR_TRUSTED_PREFIX, name); error = zpl_xattr_set(ip, xattr_name, value, size, flags); kmem_strfree(xattr_name); @@ -905,10 +889,6 @@ __zpl_xattr_security_get(struct inode *ip, const char *name, char *xattr_name; int error; /* xattr_resolve_name will do this for us if this is defined */ -#ifndef HAVE_XATTR_HANDLER_NAME - if (strcmp(name, "") == 0) - return (-EINVAL); -#endif xattr_name = kmem_asprintf("%s%s", XATTR_SECURITY_PREFIX, name); error = zpl_xattr_get(ip, xattr_name, value, size); kmem_strfree(xattr_name); @@ -926,10 +906,6 @@ __zpl_xattr_security_set(zidmap_t *user_ns, char *xattr_name; int error; /* xattr_resolve_name will do this for us if this is defined */ -#ifndef HAVE_XATTR_HANDLER_NAME - if (strcmp(name, "") == 0) - return (-EINVAL); -#endif xattr_name = kmem_asprintf("%s%s", XATTR_SECURITY_PREFIX, name); error = zpl_xattr_set(ip, xattr_name, value, size, flags); kmem_strfree(xattr_name); @@ -1049,15 +1025,14 @@ zpl_set_acl_impl(struct inode *ip, struct posix_acl *acl, int type) if (!error) { if (acl) - zpl_set_cached_acl(ip, type, acl); + set_cached_acl(ip, type, acl); else - zpl_forget_cached_acl(ip, type); + forget_cached_acl(ip, type); } return (error); } -#ifdef HAVE_SET_ACL int #ifdef HAVE_SET_ACL_USERNS zpl_set_acl(struct user_namespace *userns, struct inode *ip, @@ -1080,7 +1055,6 @@ zpl_set_acl(struct inode *ip, struct posix_acl *acl, int type) return (zpl_set_acl_impl(ip, acl, type)); #endif /* HAVE_SET_ACL_USERNS_DENTRY_ARG2 */ } -#endif /* HAVE_SET_ACL */ static struct posix_acl * zpl_get_acl_impl(struct inode *ip, int type) @@ -1089,17 +1063,6 @@ zpl_get_acl_impl(struct inode *ip, int type) void *value = NULL; char *name; - /* - * As of Linux 3.14, the kernel get_acl will check this for us. - * Also as of Linux 4.7, comparing against ACL_NOT_CACHED is wrong - * as the kernel get_acl will set it to temporary sentinel value. - */ -#ifndef HAVE_KERNEL_GET_ACL_HANDLE_CACHE - acl = get_cached_acl(ip, type); - if (acl != ACL_NOT_CACHED) - return (acl); -#endif - switch (type) { case ACL_TYPE_ACCESS: name = XATTR_NAME_POSIX_ACL_ACCESS; @@ -1128,12 +1091,6 @@ zpl_get_acl_impl(struct inode *ip, int type) if (size > 0) kmem_free(value, size); - /* As of Linux 4.7, the kernel get_acl will set this for us */ -#ifndef HAVE_KERNEL_GET_ACL_HANDLE_CACHE - if (!IS_ERR(acl)) - zpl_set_cached_acl(ip, type, acl); -#endif - return (acl); } @@ -1270,10 +1227,6 @@ __zpl_xattr_acl_get_access(struct inode *ip, const char *name, int type = ACL_TYPE_ACCESS; int error; /* xattr_resolve_name will do this for us if this is defined */ -#ifndef HAVE_XATTR_HANDLER_NAME - if (strcmp(name, "") != 0) - return (-EINVAL); -#endif if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX) return (-EOPNOTSUPP); @@ -1298,10 +1251,6 @@ __zpl_xattr_acl_get_default(struct inode *ip, const char *name, int type = ACL_TYPE_DEFAULT; int error; /* xattr_resolve_name will do this for us if this is defined */ -#ifndef HAVE_XATTR_HANDLER_NAME - if (strcmp(name, "") != 0) - return (-EINVAL); -#endif if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX) return (-EOPNOTSUPP); @@ -1327,10 +1276,6 @@ __zpl_xattr_acl_set_access(zidmap_t *mnt_ns, int type = ACL_TYPE_ACCESS; int error = 0; /* xattr_resolve_name will do this for us if this is defined */ -#ifndef HAVE_XATTR_HANDLER_NAME - if (strcmp(name, "") != 0) - return (-EINVAL); -#endif if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX) return (-EOPNOTSUPP); @@ -1348,7 +1293,7 @@ __zpl_xattr_acl_set_access(zidmap_t *mnt_ns, if (IS_ERR(acl)) return (PTR_ERR(acl)); else if (acl) { - error = zpl_posix_acl_valid(ip, acl); + error = posix_acl_valid(ip->i_sb->s_user_ns, acl); if (error) { zpl_posix_acl_release(acl); return (error); @@ -1373,10 +1318,6 @@ __zpl_xattr_acl_set_default(zidmap_t *mnt_ns, int type = ACL_TYPE_DEFAULT; int error = 0; /* xattr_resolve_name will do this for us if this is defined */ -#ifndef HAVE_XATTR_HANDLER_NAME - if (strcmp(name, "") != 0) - return (-EINVAL); -#endif if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX) return (-EOPNOTSUPP); @@ -1394,7 +1335,7 @@ __zpl_xattr_acl_set_default(zidmap_t *mnt_ns, if (IS_ERR(acl)) return (PTR_ERR(acl)); else if (acl) { - error = zpl_posix_acl_valid(ip, acl); + error = posix_acl_valid(ip->i_sb->s_user_ns, acl); if (error) { zpl_posix_acl_release(acl); return (error); @@ -1418,41 +1359,25 @@ ZPL_XATTR_SET_WRAPPER(zpl_xattr_acl_set_default); * whole name and reject anything that has .name only as prefix. */ static xattr_handler_t zpl_xattr_acl_access_handler = { -#ifdef HAVE_XATTR_HANDLER_NAME .name = XATTR_NAME_POSIX_ACL_ACCESS, -#else - .prefix = XATTR_NAME_POSIX_ACL_ACCESS, -#endif .list = zpl_xattr_acl_list_access, .get = zpl_xattr_acl_get_access, .set = zpl_xattr_acl_set_access, -#if defined(HAVE_XATTR_LIST_SIMPLE) || \ - defined(HAVE_XATTR_LIST_DENTRY) || \ - defined(HAVE_XATTR_LIST_HANDLER) .flags = ACL_TYPE_ACCESS, -#endif }; /* * ACL default xattr namespace handlers. * - * Use .name instead of .prefix when available. xattr_resolve_name will match - * whole name and reject anything that has .name only as prefix. + * Use .name instead of .prefix. xattr_resolve_name will match whole name and + * reject anything that has .name only as prefix. */ static xattr_handler_t zpl_xattr_acl_default_handler = { -#ifdef HAVE_XATTR_HANDLER_NAME .name = XATTR_NAME_POSIX_ACL_DEFAULT, -#else - .prefix = XATTR_NAME_POSIX_ACL_DEFAULT, -#endif .list = zpl_xattr_acl_list_default, .get = zpl_xattr_acl_get_default, .set = zpl_xattr_acl_set_default, -#if defined(HAVE_XATTR_LIST_SIMPLE) || \ - defined(HAVE_XATTR_LIST_DENTRY) || \ - defined(HAVE_XATTR_LIST_HANDLER) .flags = ACL_TYPE_DEFAULT, -#endif }; #endif /* CONFIG_FS_POSIX_ACL */ @@ -1517,24 +1442,15 @@ zpl_xattr_permission(xattr_filldir_t *xf, const char *name, int name_len) } if (handler->list) { -#if defined(HAVE_XATTR_LIST_SIMPLE) if (!handler->list(d)) return (XAPERM_DENY); -#elif defined(HAVE_XATTR_LIST_DENTRY) - if (!handler->list(d, NULL, 0, name, name_len, 0)) - return (XAPERM_DENY); -#elif defined(HAVE_XATTR_LIST_HANDLER) - if (!handler->list(handler, d, NULL, 0, name, name_len)) - return (XAPERM_DENY); -#endif } return (perm); } -#if defined(CONFIG_FS_POSIX_ACL) && \ - (!defined(HAVE_POSIX_ACL_RELEASE) || \ - defined(HAVE_POSIX_ACL_RELEASE_GPL_ONLY)) +#ifdef CONFIG_FS_POSIX_ACL + struct acl_rel_struct { struct acl_rel_struct *next; struct posix_acl *acl; diff --git a/module/os/linux/zfs/zvol_os.c b/module/os/linux/zfs/zvol_os.c index d1e3061b50e6..303aafc95dd7 100644 --- a/module/os/linux/zfs/zvol_os.c +++ b/module/os/linux/zfs/zvol_os.c @@ -44,10 +44,7 @@ #include #include #include - -#ifdef HAVE_BLK_MQ #include -#endif static void zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq, boolean_t force_sync); @@ -68,7 +65,6 @@ static unsigned int zvol_open_timeout_ms = 1000; #endif static unsigned int zvol_threads = 0; -#ifdef HAVE_BLK_MQ static unsigned int zvol_blk_mq_threads = 0; static unsigned int zvol_blk_mq_actual_threads; static boolean_t zvol_use_blk_mq = B_FALSE; @@ -84,7 +80,6 @@ static boolean_t zvol_use_blk_mq = B_FALSE; * read and write tests to a zvol in an NVMe pool (with 16 CPUs). */ static unsigned int zvol_blk_mq_blocks_per_thread = 8; -#endif static unsigned int zvol_num_taskqs = 0; @@ -96,31 +91,26 @@ static unsigned int zvol_num_taskqs = 0; /* * Finalize our BIO or request. */ -#ifdef HAVE_BLK_MQ -#define END_IO(zv, bio, rq, error) do { \ - if (bio) { \ - BIO_END_IO(bio, error); \ - } else { \ - blk_mq_end_request(rq, errno_to_bi_status(error)); \ - } \ -} while (0) -#else -#define END_IO(zv, bio, rq, error) BIO_END_IO(bio, error) -#endif +static inline void +zvol_end_io(struct bio *bio, struct request *rq, int error) +{ + if (bio) { + bio->bi_status = errno_to_bi_status(-error); + bio_endio(bio); + } else { + blk_mq_end_request(rq, errno_to_bi_status(error)); + } +} -#ifdef HAVE_BLK_MQ static unsigned int zvol_blk_mq_queue_depth = BLKDEV_DEFAULT_RQ; static unsigned int zvol_actual_blk_mq_queue_depth; -#endif struct zvol_state_os { struct gendisk *zvo_disk; /* generic disk */ struct request_queue *zvo_queue; /* request queue */ dev_t zvo_dev; /* device id */ -#ifdef HAVE_BLK_MQ struct blk_mq_tag_set tag_set; -#endif /* Set from the global 'zvol_use_blk_mq' at zvol load */ boolean_t use_blk_mq; @@ -165,8 +155,6 @@ zv_request_task_free(zv_request_task_t *task) kmem_free(task, sizeof (*task)); } -#ifdef HAVE_BLK_MQ - /* * This is called when a new block multiqueue request comes in. A request * contains one or more BIOs. @@ -219,7 +207,6 @@ static int zvol_blk_mq_alloc_tag_set(zvol_state_t *zv) return (blk_mq_alloc_tag_set(&zso->tag_set)); } -#endif /* HAVE_BLK_MQ */ /* * Given a path, return TRUE if path is a ZVOL. @@ -265,7 +252,7 @@ zvol_write(zv_request_t *zvr) /* Some requests are just for flush and nothing else. */ if (io_size(bio, rq) == 0) { rw_exit(&zv->zv_suspend_lock); - END_IO(zv, bio, rq, 0); + zvol_end_io(bio, rq, 0); return; } @@ -332,7 +319,7 @@ zvol_write(zv_request_t *zvr) blk_generic_end_io_acct(q, disk, WRITE, bio, start_time); } - END_IO(zv, bio, rq, -error); + zvol_end_io(bio, rq, -error); } static void @@ -421,7 +408,7 @@ zvol_discard(zv_request_t *zvr) start_time); } - END_IO(zv, bio, rq, -error); + zvol_end_io(bio, rq, -error); } static void @@ -498,7 +485,7 @@ zvol_read(zv_request_t *zvr) blk_generic_end_io_acct(q, disk, READ, bio, start_time); } - END_IO(zv, bio, rq, -error); + zvol_end_io(bio, rq, -error); } static void @@ -529,7 +516,7 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq, int rw = io_data_dir(bio, rq); if (unlikely(zv->zv_flags & ZVOL_REMOVING)) { - END_IO(zv, bio, rq, -SET_ERROR(ENXIO)); + zvol_end_io(bio, rq, -SET_ERROR(ENXIO)); goto out; } @@ -548,7 +535,7 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq, (long long unsigned)offset, (long unsigned)size); - END_IO(zv, bio, rq, -SET_ERROR(EIO)); + zvol_end_io(bio, rq, -SET_ERROR(EIO)); goto out; } @@ -557,14 +544,12 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq, uint_t blk_mq_hw_queue = 0; uint_t tq_idx; uint_t taskq_hash; -#ifdef HAVE_BLK_MQ if (rq) #ifdef HAVE_BLK_MQ_RQ_HCTX blk_mq_hw_queue = rq->mq_hctx->queue_num; #else blk_mq_hw_queue = rq->q->queue_hw_ctx[rq->q->mq_map[rq->cpu]]->queue_num; -#endif #endif taskq_hash = cityhash4((uintptr_t)zv, offset >> ZVOL_TASKQ_OFFSET_SHIFT, blk_mq_hw_queue, 0); @@ -572,7 +557,7 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq, if (rw == WRITE) { if (unlikely(zv->zv_flags & ZVOL_RDONLY)) { - END_IO(zv, bio, rq, -SET_ERROR(EROFS)); + zvol_end_io(bio, rq, -SET_ERROR(EROFS)); goto out; } @@ -657,7 +642,7 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq, * data and require no additional handling. */ if (size == 0) { - END_IO(zv, bio, rq, 0); + zvol_end_io(bio, rq, 0); goto out; } @@ -1171,7 +1156,6 @@ zvol_queue_limits_init(zvol_queue_limits_t *limits, zvol_state_t *zv, * the correct number of segments for the volblocksize and * number of chunks you want. */ -#ifdef HAVE_BLK_MQ if (zvol_blk_mq_blocks_per_thread != 0) { unsigned int chunks; chunks = MIN(zvol_blk_mq_blocks_per_thread, UINT16_MAX); @@ -1188,7 +1172,6 @@ zvol_queue_limits_init(zvol_queue_limits_t *limits, zvol_state_t *zv, limits->zql_max_segment_size = UINT_MAX; } } else { -#endif limits->zql_max_segments = UINT16_MAX; limits->zql_max_segment_size = UINT_MAX; } @@ -1301,7 +1284,6 @@ zvol_alloc_non_blk_mq(struct zvol_state_os *zso, zvol_queue_limits_t *limits) static int zvol_alloc_blk_mq(zvol_state_t *zv, zvol_queue_limits_t *limits) { -#ifdef HAVE_BLK_MQ struct zvol_state_os *zso = zv->zv_zso; /* Allocate our blk-mq tag_set */ @@ -1348,7 +1330,6 @@ zvol_alloc_blk_mq(zvol_state_t *zv, zvol_queue_limits_t *limits) #endif zvol_queue_limits_apply(limits, zso->zvo_queue); -#endif return (0); } @@ -1384,9 +1365,7 @@ zvol_alloc(dev_t dev, const char *name, uint64_t volblocksize) mutex_init(&zv->zv_state_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&zv->zv_removing_cv, NULL, CV_DEFAULT, NULL); -#ifdef HAVE_BLK_MQ zv->zv_zso->use_blk_mq = zvol_use_blk_mq; -#endif zvol_queue_limits_t limits; zvol_queue_limits_init(&limits, zv, zv->zv_zso->use_blk_mq); @@ -1442,8 +1421,8 @@ zvol_alloc(dev_t dev, const char *name, uint64_t volblocksize) */ if (volmode == ZFS_VOLMODE_DEV) { zso->zvo_disk->minors = 1; - zso->zvo_disk->flags &= ~ZFS_GENHD_FL_EXT_DEVT; - zso->zvo_disk->flags |= ZFS_GENHD_FL_NO_PART; + zso->zvo_disk->flags &= ~GENHD_FL_EXT_DEVT; + zso->zvo_disk->flags |= GENHD_FL_NO_PART; } zso->zvo_disk->first_minor = (dev & MINORMASK); @@ -1495,10 +1474,8 @@ zvol_os_free(zvol_state_t *zv) put_disk(zv->zv_zso->zvo_disk); #endif -#ifdef HAVE_BLK_MQ if (zv->zv_zso->use_blk_mq) blk_mq_free_tag_set(&zv->zv_zso->tag_set); -#endif ida_simple_remove(&zvol_ida, MINOR(zv->zv_zso->zvo_dev) >> ZVOL_MINOR_BITS); @@ -1863,7 +1840,6 @@ zvol_init(void) return (error); } -#ifdef HAVE_BLK_MQ if (zvol_blk_mq_queue_depth == 0) { zvol_actual_blk_mq_queue_depth = BLKDEV_DEFAULT_RQ; } else { @@ -1877,7 +1853,7 @@ zvol_init(void) zvol_blk_mq_actual_threads = MIN(MAX(zvol_blk_mq_threads, 1), 1024); } -#endif + for (uint_t i = 0; i < num_tqs; i++) { char name[32]; (void) snprintf(name, sizeof (name), "%s_tq-%u", @@ -1949,7 +1925,6 @@ MODULE_PARM_DESC(zvol_prefetch_bytes, "Prefetch N bytes at zvol start+end"); module_param(zvol_volmode, uint, 0644); MODULE_PARM_DESC(zvol_volmode, "Default volmode property value"); -#ifdef HAVE_BLK_MQ module_param(zvol_blk_mq_queue_depth, uint, 0644); MODULE_PARM_DESC(zvol_blk_mq_queue_depth, "Default blk-mq queue depth"); @@ -1959,7 +1934,6 @@ MODULE_PARM_DESC(zvol_use_blk_mq, "Use the blk-mq API for zvols"); module_param(zvol_blk_mq_blocks_per_thread, uint, 0644); MODULE_PARM_DESC(zvol_blk_mq_blocks_per_thread, "Process volblocksize blocks per thread"); -#endif #ifndef HAVE_BLKDEV_GET_ERESTARTSYS module_param(zvol_open_timeout_ms, uint, 0644); diff --git a/module/zcommon/zfs_prop.c b/module/zcommon/zfs_prop.c index 764993b45e7c..10ac13a898ce 100644 --- a/module/zcommon/zfs_prop.c +++ b/module/zcommon/zfs_prop.c @@ -395,6 +395,13 @@ zfs_prop_init(void) { NULL } }; + static const zprop_index_t direct_table[] = { + { "disabled", ZFS_DIRECT_DISABLED }, + { "standard", ZFS_DIRECT_STANDARD }, + { "always", ZFS_DIRECT_ALWAYS }, + { NULL } + }; + struct zfs_mod_supported_features *sfeatures = zfs_mod_list_supported(ZFS_SYSFS_DATASET_PROPERTIES); @@ -479,6 +486,10 @@ zfs_prop_init(void) ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "default | full | geom | dev | none", "VOLMODE", volmode_table, sfeatures); + zprop_register_index(ZFS_PROP_DIRECT, "direct", + ZFS_DIRECT_STANDARD, PROP_INHERIT, ZFS_TYPE_FILESYSTEM, + "disabled | standard | always", "DIRECT", direct_table, + sfeatures); /* inherit index (boolean) properties */ zprop_register_index(ZFS_PROP_ATIME, "atime", 1, PROP_INHERIT, diff --git a/module/zcommon/zfs_valstr.c b/module/zcommon/zfs_valstr.c index e2d4d1aefefb..622323bbbd5f 100644 --- a/module/zcommon/zfs_valstr.c +++ b/module/zcommon/zfs_valstr.c @@ -218,6 +218,7 @@ _VALSTR_BITFIELD_IMPL(zio_flag, { '.', "NP", "NOPWRITE" }, { '.', "EX", "REEXECUTED" }, { '.', "DG", "DELEGATED" }, + { '.', "DC", "DIO_CHKSUM_ERR" }, ) /* END CSTYLED */ @@ -252,6 +253,7 @@ _VALSTR_BITFIELD_IMPL(zio_stage, { 'V', "VD", "VDEV_IO_DONE" }, { 'V', "VA", "VDEV_IO_ASSESS" }, { 'C', "CV", "CHECKSUM_VERIFY" }, + { 'C', "DC", "DIO_CHECKSUM_VERIFY" }, { 'X', "X ", "DONE" }, ) /* END CSTYLED */ diff --git a/module/zfs/abd.c b/module/zfs/abd.c index c8c4d2270fae..529deeecfd4b 100644 --- a/module/zfs/abd.c +++ b/module/zfs/abd.c @@ -89,8 +89,8 @@ * functions. * * As an additional feature, linear and scatter ABD's can be stitched together - * by using the gang ABD type (abd_alloc_gang_abd()). This allows for - * multiple ABDs to be viewed as a singular ABD. + * by using the gang ABD type (abd_alloc_gang()). This allows for multiple ABDs + * to be viewed as a singular ABD. * * It is possible to make all ABDs linear by setting zfs_abd_scatter_enabled to * B_FALSE. @@ -109,11 +109,15 @@ void abd_verify(abd_t *abd) { #ifdef ZFS_DEBUG - ASSERT3U(abd->abd_size, <=, SPA_MAXBLOCKSIZE); + if (abd_is_from_pages(abd)) { + ASSERT3U(abd->abd_size, <=, DMU_MAX_ACCESS); + } else { + ASSERT3U(abd->abd_size, <=, SPA_MAXBLOCKSIZE); + } ASSERT3U(abd->abd_flags, ==, abd->abd_flags & (ABD_FLAG_LINEAR | ABD_FLAG_OWNER | ABD_FLAG_META | ABD_FLAG_MULTI_ZONE | ABD_FLAG_MULTI_CHUNK | ABD_FLAG_LINEAR_PAGE | ABD_FLAG_GANG | - ABD_FLAG_GANG_FREE | ABD_FLAG_ALLOCD)); + ABD_FLAG_GANG_FREE | ABD_FLAG_ALLOCD | ABD_FLAG_FROM_PAGES)); IMPLY(abd->abd_parent != NULL, !(abd->abd_flags & ABD_FLAG_OWNER)); IMPLY(abd->abd_flags & ABD_FLAG_META, abd->abd_flags & ABD_FLAG_OWNER); if (abd_is_linear(abd)) { @@ -136,7 +140,7 @@ abd_verify(abd_t *abd) #endif } -static void +void abd_init_struct(abd_t *abd) { list_link_init(&abd->abd_gang_link); @@ -238,6 +242,7 @@ abd_free_linear(abd_t *abd) abd_free_linear_page(abd); return; } + if (abd->abd_flags & ABD_FLAG_META) { zio_buf_free(ABD_LINEAR_BUF(abd), abd->abd_size); } else { @@ -520,6 +525,21 @@ abd_get_offset_impl(abd_t *abd, abd_t *sabd, size_t off, size_t size) */ abd->abd_flags |= ABD_FLAG_LINEAR; + /* + * User pages from Direct I/O requests may be in a single page + * (ABD_FLAG_LINEAR_PAGE), and we must make sure to still flag + * that here for abd. This is required because we have to be + * careful when borrowing the buffer from the ABD because we + * can not place user pages under write protection on Linux. + * See the comments in abd_os.c for abd_borrow_buf(), + * abd_borrow_buf_copy(), abd_return_buf() and + * abd_return_buf_copy(). + */ + if (abd_is_from_pages(sabd)) { + abd->abd_flags |= ABD_FLAG_FROM_PAGES | + ABD_FLAG_LINEAR_PAGE; + } + ABD_LINEAR_BUF(abd) = (char *)ABD_LINEAR_BUF(sabd) + off; } else if (abd_is_gang(sabd)) { size_t left = size; @@ -648,70 +668,6 @@ abd_to_buf(abd_t *abd) return (ABD_LINEAR_BUF(abd)); } -/* - * Borrow a raw buffer from an ABD without copying the contents of the ABD - * into the buffer. If the ABD is scattered, this will allocate a raw buffer - * whose contents are undefined. To copy over the existing data in the ABD, use - * abd_borrow_buf_copy() instead. - */ -void * -abd_borrow_buf(abd_t *abd, size_t n) -{ - void *buf; - abd_verify(abd); - ASSERT3U(abd->abd_size, >=, n); - if (abd_is_linear(abd)) { - buf = abd_to_buf(abd); - } else { - buf = zio_buf_alloc(n); - } -#ifdef ZFS_DEBUG - (void) zfs_refcount_add_many(&abd->abd_children, n, buf); -#endif - return (buf); -} - -void * -abd_borrow_buf_copy(abd_t *abd, size_t n) -{ - void *buf = abd_borrow_buf(abd, n); - if (!abd_is_linear(abd)) { - abd_copy_to_buf(buf, abd, n); - } - return (buf); -} - -/* - * Return a borrowed raw buffer to an ABD. If the ABD is scattered, this will - * not change the contents of the ABD and will ASSERT that you didn't modify - * the buffer since it was borrowed. If you want any changes you made to buf to - * be copied back to abd, use abd_return_buf_copy() instead. - */ -void -abd_return_buf(abd_t *abd, void *buf, size_t n) -{ - abd_verify(abd); - ASSERT3U(abd->abd_size, >=, n); -#ifdef ZFS_DEBUG - (void) zfs_refcount_remove_many(&abd->abd_children, n, buf); -#endif - if (abd_is_linear(abd)) { - ASSERT3P(buf, ==, abd_to_buf(abd)); - } else { - ASSERT0(abd_cmp_buf(abd, buf, n)); - zio_buf_free(buf, n); - } -} - -void -abd_return_buf_copy(abd_t *abd, void *buf, size_t n) -{ - if (!abd_is_linear(abd)) { - abd_copy_from_buf(abd, buf, n); - } - abd_return_buf(abd, buf, n); -} - void abd_release_ownership_of_buf(abd_t *abd) { diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 714a30e863a7..b5bcd367b247 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -5961,7 +5961,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, ARCSTAT_CONDSTAT(!(*arc_flags & ARC_FLAG_PREFETCH), demand, prefetch, !HDR_ISTYPE_METADATA(hdr), data, metadata, misses); - zfs_racct_read(size, 1); + zfs_racct_read(spa, size, 1, 0); } /* Check if the spa even has l2 configured */ diff --git a/module/zfs/dataset_kstats.c b/module/zfs/dataset_kstats.c index 914260e742f9..27a04c2af06c 100644 --- a/module/zfs/dataset_kstats.c +++ b/module/zfs/dataset_kstats.c @@ -217,8 +217,7 @@ dataset_kstats_rename(dataset_kstats_t *dk, const char *name) } void -dataset_kstats_update_write_kstats(dataset_kstats_t *dk, - int64_t nwritten) +dataset_kstats_update_write_kstats(dataset_kstats_t *dk, int64_t nwritten) { ASSERT3S(nwritten, >=, 0); @@ -230,8 +229,7 @@ dataset_kstats_update_write_kstats(dataset_kstats_t *dk, } void -dataset_kstats_update_read_kstats(dataset_kstats_t *dk, - int64_t nread) +dataset_kstats_update_read_kstats(dataset_kstats_t *dk, int64_t nread) { ASSERT3S(nread, >=, 0); diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index 099883ba2652..df9368fc8bdb 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -628,7 +628,7 @@ dbuf_is_metadata(dmu_buf_impl_t *db) * L2ARC. */ boolean_t -dbuf_is_l2cacheable(dmu_buf_impl_t *db) +dbuf_is_l2cacheable(dmu_buf_impl_t *db, blkptr_t *bp) { if (db->db_objset->os_secondary_cache == ZFS_CACHE_ALL || (db->db_objset->os_secondary_cache == @@ -636,10 +636,17 @@ dbuf_is_l2cacheable(dmu_buf_impl_t *db) if (l2arc_exclude_special == 0) return (B_TRUE); - blkptr_t *bp = db->db_blkptr; - if (bp == NULL || BP_IS_HOLE(bp)) + /* + * bp must be checked in the event it was passed from + * dbuf_read_impl() as the result of a the BP being set from + * a Direct I/O write in dbuf_read(). See comments in + * dbuf_read(). + */ + blkptr_t *db_bp = bp == NULL ? db->db_blkptr : bp; + + if (db_bp == NULL || BP_IS_HOLE(db_bp)) return (B_FALSE); - uint64_t vdev = DVA_GET_VDEV(bp->blk_dva); + uint64_t vdev = DVA_GET_VDEV(db_bp->blk_dva); vdev_t *rvd = db->db_objset->os_spa->spa_root_vdev; vdev_t *vd = NULL; @@ -1380,6 +1387,7 @@ dbuf_read_done(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp, mutex_enter(&db->db_mtx); ASSERT3U(db->db_state, ==, DB_READ); + /* * All reads are synchronous, so we must have a hold on the dbuf */ @@ -1570,12 +1578,11 @@ dbuf_read_verify_dnode_crypt(dmu_buf_impl_t *db, dnode_t *dn, uint32_t flags) */ static int dbuf_read_impl(dmu_buf_impl_t *db, dnode_t *dn, zio_t *zio, uint32_t flags, - db_lock_type_t dblt, const void *tag) + db_lock_type_t dblt, blkptr_t *bp, const void *tag) { zbookmark_phys_t zb; uint32_t aflags = ARC_FLAG_NOWAIT; int err, zio_flags; - blkptr_t bp, *bpp = NULL; ASSERT(!zfs_refcount_is_zero(&db->db_holds)); ASSERT(MUTEX_HELD(&db->db_mtx)); @@ -1589,43 +1596,18 @@ dbuf_read_impl(dmu_buf_impl_t *db, dnode_t *dn, zio_t *zio, uint32_t flags, goto early_unlock; } - /* - * If we have a pending block clone, we don't want to read the - * underlying block, but the content of the block being cloned, - * pointed by the dirty record, so we have the most recent data. - * If there is no dirty record, then we hit a race in a sync - * process when the dirty record is already removed, while the - * dbuf is not yet destroyed. Such case is equivalent to uncached. - */ - if (db->db_state == DB_NOFILL) { - dbuf_dirty_record_t *dr = list_head(&db->db_dirty_records); - if (dr != NULL) { - if (!dr->dt.dl.dr_brtwrite) { - err = EIO; - goto early_unlock; - } - bp = dr->dt.dl.dr_overridden_by; - bpp = &bp; - } - } - - if (bpp == NULL && db->db_blkptr != NULL) { - bp = *db->db_blkptr; - bpp = &bp; - } - - err = dbuf_read_hole(db, dn, bpp); + err = dbuf_read_hole(db, dn, bp); if (err == 0) goto early_unlock; - ASSERT(bpp != NULL); + ASSERT(bp != NULL); /* * Any attempt to read a redacted block should result in an error. This * will never happen under normal conditions, but can be useful for * debugging purposes. */ - if (BP_IS_REDACTED(bpp)) { + if (BP_IS_REDACTED(bp)) { ASSERT(dsl_dataset_feature_is_active( db->db_objset->os_dsl_dataset, SPA_FEATURE_REDACTED_DATASETS)); @@ -1640,9 +1622,9 @@ dbuf_read_impl(dmu_buf_impl_t *db, dnode_t *dn, zio_t *zio, uint32_t flags, * All bps of an encrypted os should have the encryption bit set. * If this is not true it indicates tampering and we report an error. */ - if (db->db_objset->os_encrypted && !BP_USES_CRYPT(bpp)) { + if (db->db_objset->os_encrypted && !BP_USES_CRYPT(bp)) { spa_log_error(db->db_objset->os_spa, &zb, - BP_GET_LOGICAL_BIRTH(bpp)); + BP_GET_LOGICAL_BIRTH(bp)); err = SET_ERROR(EIO); goto early_unlock; } @@ -1653,7 +1635,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, dnode_t *dn, zio_t *zio, uint32_t flags, if (!DBUF_IS_CACHEABLE(db)) aflags |= ARC_FLAG_UNCACHED; - else if (dbuf_is_l2cacheable(db)) + else if (dbuf_is_l2cacheable(db, bp)) aflags |= ARC_FLAG_L2CACHE; dbuf_add_ref(db, NULL); @@ -1661,17 +1643,19 @@ dbuf_read_impl(dmu_buf_impl_t *db, dnode_t *dn, zio_t *zio, uint32_t flags, zio_flags = (flags & DB_RF_CANFAIL) ? ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED; - if ((flags & DB_RF_NO_DECRYPT) && BP_IS_PROTECTED(db->db_blkptr)) + if ((flags & DB_RF_NO_DECRYPT) && BP_IS_PROTECTED(bp)) zio_flags |= ZIO_FLAG_RAW; + /* - * The zio layer will copy the provided blkptr later, but we have our - * own copy so that we can release the parent's rwlock. We have to - * do that so that if dbuf_read_done is called synchronously (on + * The zio layer will copy the provided blkptr later, but we need to + * do this now so that we can release the parent's rwlock. We have to + * do that now so that if dbuf_read_done is called synchronously (on * an l1 cache hit) we don't acquire the db_mtx while holding the * parent's rwlock, which would be a lock ordering violation. */ + blkptr_t copy = *bp; dmu_buf_unlock_parent(db, dblt, tag); - return (arc_read(zio, db->db_objset->os_spa, bpp, + return (arc_read(zio, db->db_objset->os_spa, ©, dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb)); @@ -1844,13 +1828,30 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *pio, uint32_t flags) ASSERT(db->db_state == DB_UNCACHED || db->db_state == DB_NOFILL); db_lock_type_t dblt = dmu_buf_lock_parent(db, RW_READER, FTAG); - if (pio == NULL && (db->db_state == DB_NOFILL || - (db->db_blkptr != NULL && !BP_IS_HOLE(db->db_blkptr)))) { - spa_t *spa = dn->dn_objset->os_spa; - pio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL); - need_wait = B_TRUE; + blkptr_t *bp; + + /* + * If a block clone or Direct I/O write has occurred we will + * get the dirty records overridden BP so we get the most + * recent data. + */ + err = dmu_buf_get_bp_from_dbuf(db, &bp); + + if (!err) { + if (pio == NULL && (db->db_state == DB_NOFILL || + (bp != NULL && !BP_IS_HOLE(bp)))) { + spa_t *spa = dn->dn_objset->os_spa; + pio = + zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL); + need_wait = B_TRUE; + } + + err = + dbuf_read_impl(db, dn, pio, flags, dblt, bp, FTAG); + } else { + mutex_exit(&db->db_mtx); + dmu_buf_unlock_parent(db, dblt, FTAG); } - err = dbuf_read_impl(db, dn, pio, flags, dblt, FTAG); /* dbuf_read_impl drops db_mtx and parent's rwlock. */ miss = (db->db_state != DB_CACHED); } @@ -1918,6 +1919,7 @@ dbuf_unoverride(dbuf_dirty_record_t *dr) uint64_t txg = dr->dr_txg; ASSERT(MUTEX_HELD(&db->db_mtx)); + /* * This assert is valid because dmu_sync() expects to be called by * a zilog's get_data while holding a range lock. This call only @@ -1936,16 +1938,20 @@ dbuf_unoverride(dbuf_dirty_record_t *dr) if (!BP_IS_HOLE(bp) && !dr->dt.dl.dr_nopwrite) zio_free(db->db_objset->os_spa, txg, bp); - if (dr->dt.dl.dr_brtwrite) { + if (dr->dt.dl.dr_brtwrite || dr->dt.dl.dr_diowrite) { ASSERT0P(dr->dt.dl.dr_data); dr->dt.dl.dr_data = db->db_buf; } dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN; dr->dt.dl.dr_nopwrite = B_FALSE; dr->dt.dl.dr_brtwrite = B_FALSE; + dr->dt.dl.dr_diowrite = B_FALSE; dr->dt.dl.dr_has_raw_params = B_FALSE; /* + * In the event that Direct I/O was used, we do not + * need to release the buffer from the ARC. + * * Release the already-written buffer, so we leave it in * a consistent dirty state. Note that all callers are * modifying the buffer, so they will immediately do @@ -2084,6 +2090,8 @@ dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx) */ dmu_buf_will_dirty(&db->db, tx); + VERIFY3P(db->db_buf, !=, NULL); + /* create the data buffer for the new block */ buf = arc_alloc_buf(dn->dn_objset->os_spa, db, type, size); @@ -2532,6 +2540,7 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) { uint64_t txg = tx->tx_txg; boolean_t brtwrite; + boolean_t diowrite; ASSERT(txg != 0); @@ -2557,7 +2566,9 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) ASSERT(dr->dr_dbuf == db); brtwrite = dr->dt.dl.dr_brtwrite; + diowrite = dr->dt.dl.dr_diowrite; if (brtwrite) { + ASSERT3B(diowrite, ==, B_FALSE); /* * We are freeing a block that we cloned in the same * transaction group. @@ -2598,10 +2609,11 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) if (db->db_state != DB_NOFILL && !brtwrite) { dbuf_unoverride(dr); - ASSERT(db->db_buf != NULL); - ASSERT(dr->dt.dl.dr_data != NULL); - if (dr->dt.dl.dr_data != db->db_buf) + if (dr->dt.dl.dr_data != db->db_buf) { + ASSERT(db->db_buf != NULL); + ASSERT(dr->dt.dl.dr_data != NULL); arc_buf_destroy(dr->dt.dl.dr_data, db); + } } kmem_free(dr, sizeof (dbuf_dirty_record_t)); @@ -2610,7 +2622,7 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) db->db_dirtycnt -= 1; if (zfs_refcount_remove(&db->db_holds, (void *)(uintptr_t)txg) == 0) { - ASSERT(db->db_state == DB_NOFILL || brtwrite || + ASSERT(db->db_state == DB_NOFILL || brtwrite || diowrite || arc_released(db->db_buf)); dbuf_destroy(db); return (B_TRUE); @@ -2670,8 +2682,7 @@ dmu_buf_will_dirty_impl(dmu_buf_t *db_fake, int flags, dmu_tx_t *tx) * Block cloning: Do the dbuf_read() before undirtying the dbuf, as we * want to make sure dbuf_read() will read the pending cloned block and * not the uderlying block that is being replaced. dbuf_undirty() will - * do dbuf_unoverride(), so we will end up with cloned block content, - * without overridden BP. + * do brt_pending_remove() before removing the dirty record. */ (void) dbuf_read(db, NULL, flags); if (undirty) { @@ -2701,23 +2712,126 @@ dmu_buf_is_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx) return (dr != NULL); } +/* + * Normally the db_blkptr points to the most recent on-disk content for the + * dbuf (and anything newer will be cached in the dbuf). However, a pending + * block clone or not yet synced Direct I/O write will have a dirty record BP + * pointing to the most recent data. + */ +int +dmu_buf_get_bp_from_dbuf(dmu_buf_impl_t *db, blkptr_t **bp) +{ + ASSERT(MUTEX_HELD(&db->db_mtx)); + int error = 0; + + if (db->db_level != 0) { + *bp = db->db_blkptr; + return (0); + } + + *bp = db->db_blkptr; + dbuf_dirty_record_t *dr = list_head(&db->db_dirty_records); + if (dr && db->db_state == DB_NOFILL) { + /* Block clone */ + if (!dr->dt.dl.dr_brtwrite) + error = EIO; + else + *bp = &dr->dt.dl.dr_overridden_by; + } else if (dr && db->db_state == DB_UNCACHED) { + /* Direct I/O write */ + if (dr->dt.dl.dr_diowrite) + *bp = &dr->dt.dl.dr_overridden_by; + } + + return (error); +} + +/* + * Direct I/O reads can read directly from the ARC, but the data has + * to be untransformed in order to copy it over into user pages. + */ +int +dmu_buf_untransform_direct(dmu_buf_impl_t *db, spa_t *spa) +{ + int err = 0; + DB_DNODE_ENTER(db); + dnode_t *dn = DB_DNODE(db); + + ASSERT3S(db->db_state, ==, DB_CACHED); + ASSERT(MUTEX_HELD(&db->db_mtx)); + + /* + * Ensure that this block's dnode has been decrypted if + * the caller has requested decrypted data. + */ + err = dbuf_read_verify_dnode_crypt(db, dn, 0); + + /* + * If the arc buf is compressed or encrypted and the caller + * requested uncompressed data, we need to untransform it + * before returning. We also call arc_untransform() on any + * unauthenticated blocks, which will verify their MAC if + * the key is now available. + */ + if (err == 0 && db->db_buf != NULL && + (arc_is_encrypted(db->db_buf) || + arc_is_unauthenticated(db->db_buf) || + arc_get_compression(db->db_buf) != ZIO_COMPRESS_OFF)) { + zbookmark_phys_t zb; + + SET_BOOKMARK(&zb, dmu_objset_id(db->db_objset), + db->db.db_object, db->db_level, db->db_blkid); + dbuf_fix_old_data(db, spa_syncing_txg(spa)); + err = arc_untransform(db->db_buf, spa, &zb, B_FALSE); + dbuf_set_data(db, db->db_buf); + } + DB_DNODE_EXIT(db); + DBUF_STAT_BUMP(hash_hits); + + return (err); +} + void -dmu_buf_will_clone(dmu_buf_t *db_fake, dmu_tx_t *tx) +dmu_buf_will_clone_or_dio(dmu_buf_t *db_fake, dmu_tx_t *tx) { + /* + * Block clones and Direct I/O writes always happen in open-context. + */ dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; ASSERT0(db->db_level); + ASSERT(!dmu_tx_is_syncing(tx)); + ASSERT0(db->db_level); ASSERT(db->db_blkid != DMU_BONUS_BLKID); ASSERT(db->db.db_object != DMU_META_DNODE_OBJECT); - /* - * Block cloning: We are going to clone into this block, so undirty - * modifications done to this block so far in this txg. This includes - * writes and clones into this block. - */ mutex_enter(&db->db_mtx); DBUF_VERIFY(db); - VERIFY(!dbuf_undirty(db, tx)); + + /* + * We are going to clone or issue a Direct I/O write on this block, so + * undirty modifications done to this block so far in this txg. This + * includes writes and clones into this block. + * + * If there dirty record associated with this txg from a previous Direct + * I/O write then space accounting cleanup takes place. It is important + * to go ahead free up the space accounting through dbuf_undirty() -> + * dbuf_unoverride() -> zio_free(). Space accountiung for determining + * if a write can occur in zfs_write() happens through dmu_tx_assign(). + * This can cause an issue with Direct I/O writes in the case of + * overwriting the same block, because all DVA allocations are being + * done in open-context. Constantly allowing Direct I/O overwrites to + * the same block can exhaust the pools available space leading to + * ENOSPC errors at the DVA allocation part of the ZIO pipeline, which + * will eventually suspend the pool. By cleaning up sapce acccounting + * now, the ENOSPC error can be avoided. + * + * Since we are undirtying the record in open-context, we must have a + * hold on the db, so it should never be evicted after calling + * dbuf_undirty(). + */ + VERIFY3B(dbuf_undirty(db, tx), ==, B_FALSE); ASSERT0P(dbuf_find_dirty_eq(db, tx->tx_txg)); + if (db->db_buf != NULL) { /* * If there is an associated ARC buffer with this dbuf we can @@ -2728,6 +2842,11 @@ dmu_buf_will_clone(dmu_buf_t *db_fake, dmu_tx_t *tx) if (dr == NULL || dr->dt.dl.dr_data != db->db_buf) arc_buf_destroy(db->db_buf, db); + /* + * Setting the dbuf's data pointers to NULL will force all + * future reads down to the devices to get the most up to date + * version of the data after a Direct I/O write has completed. + */ db->db_buf = NULL; dbuf_clear_data(db); } @@ -2736,7 +2855,8 @@ dmu_buf_will_clone(dmu_buf_t *db_fake, dmu_tx_t *tx) ASSERT3P(db->db.db_data, ==, NULL); db->db_state = DB_NOFILL; - DTRACE_SET_STATE(db, "allocating NOFILL buffer for clone"); + DTRACE_SET_STATE(db, + "allocating NOFILL buffer for clone or direct I/O write"); DBUF_VERIFY(db); mutex_exit(&db->db_mtx); @@ -2773,21 +2893,28 @@ dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx, boolean_t canfail) dmu_tx_private_ok(tx)); mutex_enter(&db->db_mtx); - if (db->db_state == DB_NOFILL) { + dbuf_dirty_record_t *dr = dbuf_find_dirty_eq(db, tx->tx_txg); + if (db->db_state == DB_NOFILL || + (db->db_state == DB_UNCACHED && dr && dr->dt.dl.dr_diowrite)) { /* - * Block cloning: We will be completely overwriting a block - * cloned in this transaction group, so let's undirty the - * pending clone and mark the block as uncached. This will be - * as if the clone was never done. But if the fill can fail - * we should have a way to return back to the cloned data. + * If the fill can fail we should have a way to return back to + * the cloned or Direct I/O write data. */ - if (canfail && dbuf_find_dirty_eq(db, tx->tx_txg) != NULL) { + if (canfail && dr) { mutex_exit(&db->db_mtx); dmu_buf_will_dirty(db_fake, tx); return; } - VERIFY(!dbuf_undirty(db, tx)); - db->db_state = DB_UNCACHED; + /* + * Block cloning: We will be completely overwriting a block + * cloned in this transaction group, so let's undirty the + * pending clone and mark the block as uncached. This will be + * as if the clone was never done. + */ + if (dr && dr->dt.dl.dr_brtwrite) { + VERIFY(!dbuf_undirty(db, tx)); + db->db_state = DB_UNCACHED; + } } mutex_exit(&db->db_mtx); @@ -4080,7 +4207,6 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, const void *tag, boolean_t evicting) } else { mutex_exit(&db->db_mtx); } - } #pragma weak dmu_buf_refcount = dbuf_refcount @@ -4540,24 +4666,32 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx) mutex_enter(&db->db_mtx); /* - * To be synced, we must be dirtied. But we - * might have been freed after the dirty. + * To be synced, we must be dirtied. But we might have been freed + * after the dirty. */ if (db->db_state == DB_UNCACHED) { /* This buffer has been freed since it was dirtied */ - ASSERT(db->db.db_data == NULL); + ASSERT3P(db->db.db_data, ==, NULL); } else if (db->db_state == DB_FILL) { /* This buffer was freed and is now being re-filled */ ASSERT(db->db.db_data != dr->dt.dl.dr_data); } else if (db->db_state == DB_READ) { /* - * This buffer has a clone we need to write, and an in-flight - * read on the BP we're about to clone. Its safe to issue the - * write here because the read has already been issued and the - * contents won't change. + * This buffer was either cloned or had a Direct I/O write + * occur and has an in-flgiht read on the BP. It is safe to + * issue the write here, because the read has already been + * issued and the contents won't change. + * + * We can verify the case of both the clone and Direct I/O + * write by making sure the first dirty record for the dbuf + * has no ARC buffer associated with it. */ - ASSERT(dr->dt.dl.dr_brtwrite && - dr->dt.dl.dr_override_state == DR_OVERRIDDEN); + dbuf_dirty_record_t *dr_head = + list_head(&db->db_dirty_records); + ASSERT3P(db->db_buf, ==, NULL); + ASSERT3P(db->db.db_data, ==, NULL); + ASSERT3P(dr_head->dt.dl.dr_data, ==, NULL); + ASSERT3U(dr_head->dt.dl.dr_override_state, ==, DR_OVERRIDDEN); } else { ASSERT(db->db_state == DB_CACHED || db->db_state == DB_NOFILL); } @@ -4608,8 +4742,12 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx) dbuf_check_blkptr(dn, db); /* - * If this buffer is in the middle of an immediate write, - * wait for the synchronous IO to complete. + * If this buffer is in the middle of an immediate write, wait for the + * synchronous IO to complete. + * + * This is also valid even with Direct I/O writes setting a dirty + * records override state into DR_IN_DMU_SYNC, because all + * Direct I/O writes happen in open-context. */ while (dr->dt.dl.dr_override_state == DR_IN_DMU_SYNC) { ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT); @@ -4913,8 +5051,12 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb) if (db->db_level == 0) { ASSERT(db->db_blkid != DMU_BONUS_BLKID); ASSERT(dr->dt.dl.dr_override_state == DR_NOT_OVERRIDDEN); + + /* no dr_data if this is a NO_FILL or Direct I/O */ if (dr->dt.dl.dr_data != NULL && dr->dt.dl.dr_data != db->db_buf) { + ASSERT3B(dr->dt.dl.dr_brtwrite, ==, B_FALSE); + ASSERT3B(dr->dt.dl.dr_diowrite, ==, B_FALSE); arc_buf_destroy(dr->dt.dl.dr_data, db); } } else { @@ -5180,7 +5322,8 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx) dr->dt.dl.dr_override_state == DR_OVERRIDDEN) { /* * The BP for this block has been provided by open context - * (by dmu_sync() or dmu_buf_write_embedded()). + * (by dmu_sync(), dmu_write_direct(), + * or dmu_buf_write_embedded()). */ abd_t *contents = (data != NULL) ? abd_get_from_buf(data->b_data, arc_buf_size(data)) : NULL; @@ -5219,7 +5362,7 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx) dr->dr_zio = arc_write(pio, os->os_spa, txg, &dr->dr_bp_copy, data, !DBUF_IS_CACHEABLE(db), - dbuf_is_l2cacheable(db), &zp, dbuf_write_ready, + dbuf_is_l2cacheable(db, NULL), &zp, dbuf_write_ready, children_ready_cb, dbuf_write_done, db, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb); } @@ -5239,7 +5382,7 @@ EXPORT_SYMBOL(dbuf_dirty); EXPORT_SYMBOL(dmu_buf_set_crypt_params); EXPORT_SYMBOL(dmu_buf_will_dirty); EXPORT_SYMBOL(dmu_buf_is_dirty); -EXPORT_SYMBOL(dmu_buf_will_clone); +EXPORT_SYMBOL(dmu_buf_will_clone_or_dio); EXPORT_SYMBOL(dmu_buf_will_not_fill); EXPORT_SYMBOL(dmu_buf_will_fill); EXPORT_SYMBOL(dmu_buf_fill_done); diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index b3eda8ea5097..3f87cfe6bee9 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -609,8 +609,16 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length, dbp[i] = &db->db; } - if (!read) - zfs_racct_write(length, nblks); + /* + * If we are doing O_DIRECT we still hold the dbufs, even for reads, + * but we do not issue any reads here. We do not want to account for + * writes in this case. + * + * O_DIRECT write/read accounting takes place in + * dmu_{write/read}_abd(). + */ + if (!read && ((flags & DMU_DIRECTIO) == 0)) + zfs_racct_write(dn->dn_objset->os_spa, length, nblks, flags); if (zs) dmu_zfetch_run(&dn->dn_zfetch, zs, missed, B_TRUE); @@ -897,7 +905,7 @@ dmu_prefetch_dnode(objset_t *os, uint64_t object, zio_priority_t pri) /* * Get the next "chunk" of file data to free. We traverse the file from - * the end so that the file gets shorter over time (if we crashes in the + * the end so that the file gets shorter over time (if we crash in the * middle, this will leave us in a better state). We find allocated file * data by simply searching the allocated level 1 indirects. * @@ -1168,7 +1176,7 @@ dmu_read_impl(dnode_t *dn, uint64_t offset, uint64_t size, /* * Deal with odd block sizes, where there can't be data past the first - * block. If we ever do the tail block optimization, we will need to + * block. If we ever do the tail block optimization, we will need to * handle that here as well. */ if (dn->dn_maxblkid == 0) { @@ -1178,6 +1186,18 @@ dmu_read_impl(dnode_t *dn, uint64_t offset, uint64_t size, size = newsz; } + if (size == 0) + return (0); + + /* Allow Direct I/O when requested and properly aligned */ + if ((flags & DMU_DIRECTIO) && zfs_dio_page_aligned(buf) && + zfs_dio_aligned(offset, size, PAGESIZE)) { + abd_t *data = abd_get_from_buf(buf, size); + err = dmu_read_abd(dn, offset, size, data, flags); + abd_free(data); + return (err); + } + while (size > 0) { uint64_t mylen = MIN(size, DMU_MAX_ACCESS / 2); int i; @@ -1286,22 +1306,41 @@ dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, } /* - * Note: Lustre is an external consumer of this interface. + * This interface is not used internally by ZFS but is provided for + * use by Lustre which is built on the DMU interfaces. */ -void -dmu_write_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size, - const void *buf, dmu_tx_t *tx) +int +dmu_write_by_dnode_flags(dnode_t *dn, uint64_t offset, uint64_t size, + const void *buf, dmu_tx_t *tx, uint32_t flags) { dmu_buf_t **dbp; int numbufs; + int error; if (size == 0) - return; + return (0); + + /* Allow Direct I/O when requested and properly aligned */ + if ((flags & DMU_DIRECTIO) && zfs_dio_page_aligned((void *)buf) && + zfs_dio_aligned(offset, size, dn->dn_datablksz)) { + abd_t *data = abd_get_from_buf((void *)buf, size); + error = dmu_write_abd(dn, offset, size, data, DMU_DIRECTIO, tx); + abd_free(data); + return (error); + } VERIFY0(dmu_buf_hold_array_by_dnode(dn, offset, size, FALSE, FTAG, &numbufs, &dbp, DMU_READ_PREFETCH)); dmu_write_impl(dbp, numbufs, offset, size, buf, tx); dmu_buf_rele_array(dbp, numbufs, FTAG); + return (0); +} + +int +dmu_write_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size, + const void *buf, dmu_tx_t *tx) +{ + return (dmu_write_by_dnode_flags(dn, offset, size, buf, tx, 0)); } void @@ -1365,6 +1404,9 @@ dmu_read_uio_dnode(dnode_t *dn, zfs_uio_t *uio, uint64_t size) dmu_buf_t **dbp; int numbufs, i, err; + if (uio->uio_extflg & UIO_DIRECT) + return (dmu_read_uio_direct(dn, uio, size)); + /* * NB: we could do this block-at-a-time, but it's nice * to be reading in parallel. @@ -1453,23 +1495,53 @@ dmu_write_uio_dnode(dnode_t *dn, zfs_uio_t *uio, uint64_t size, dmu_tx_t *tx) dmu_buf_t **dbp; int numbufs; int err = 0; - int i; + uint64_t write_size; - err = dmu_buf_hold_array_by_dnode(dn, zfs_uio_offset(uio), size, +top: + write_size = size; + + /* + * We only allow Direct I/O writes to happen if we are block + * sized aligned. Otherwise, we pass the write off to the ARC. + */ + if ((uio->uio_extflg & UIO_DIRECT) && + (write_size >= dn->dn_datablksz)) { + if (zfs_dio_aligned(zfs_uio_offset(uio), write_size, + dn->dn_datablksz)) { + return (dmu_write_uio_direct(dn, uio, size, tx)); + } else if (write_size > dn->dn_datablksz && + zfs_dio_offset_aligned(zfs_uio_offset(uio), + dn->dn_datablksz)) { + write_size = + dn->dn_datablksz * (write_size / dn->dn_datablksz); + err = dmu_write_uio_direct(dn, uio, write_size, tx); + if (err == 0) { + size -= write_size; + goto top; + } else { + return (err); + } + } else { + write_size = + P2PHASE(zfs_uio_offset(uio), dn->dn_datablksz); + } + } + + err = dmu_buf_hold_array_by_dnode(dn, zfs_uio_offset(uio), write_size, FALSE, FTAG, &numbufs, &dbp, DMU_READ_PREFETCH); if (err) return (err); - for (i = 0; i < numbufs; i++) { + for (int i = 0; i < numbufs; i++) { uint64_t tocpy; int64_t bufoff; dmu_buf_t *db = dbp[i]; - ASSERT(size > 0); + ASSERT(write_size > 0); offset_t off = zfs_uio_offset(uio); bufoff = off - db->db_offset; - tocpy = MIN(db->db_size - bufoff, size); + tocpy = MIN(db->db_size - bufoff, write_size); ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size); @@ -1489,10 +1561,18 @@ dmu_write_uio_dnode(dnode_t *dn, zfs_uio_t *uio, uint64_t size, dmu_tx_t *tx) if (err) break; + write_size -= tocpy; size -= tocpy; } + IMPLY(err == 0, write_size == 0); + dmu_buf_rele_array(dbp, numbufs, FTAG); + + if ((uio->uio_extflg & UIO_DIRECT) && size > 0) { + goto top; + } + return (err); } @@ -1731,7 +1811,7 @@ dmu_assign_arcbuf_by_dnode(dnode_t *dn, uint64_t offset, arc_buf_t *buf, * same size as the dbuf. */ if (offset == db->db.db_offset && blksz == db->db.db_size) { - zfs_racct_write(blksz, 1); + zfs_racct_write(os->os_spa, blksz, 1, 0); dbuf_assign_arcbuf(db, buf, tx); dbuf_rele(db, FTAG); } else { @@ -1761,23 +1841,22 @@ dmu_assign_arcbuf_by_dbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf, return (err); } -typedef struct { - dbuf_dirty_record_t *dsa_dr; - dmu_sync_cb_t *dsa_done; - zgd_t *dsa_zgd; - dmu_tx_t *dsa_tx; -} dmu_sync_arg_t; - -static void +void dmu_sync_ready(zio_t *zio, arc_buf_t *buf, void *varg) { (void) buf; dmu_sync_arg_t *dsa = varg; - dmu_buf_t *db = dsa->dsa_zgd->zgd_db; - blkptr_t *bp = zio->io_bp; if (zio->io_error == 0) { + dbuf_dirty_record_t *dr = dsa->dsa_dr; + blkptr_t *bp = zio->io_bp; + if (BP_IS_HOLE(bp)) { + dmu_buf_t *db = NULL; + if (dr) + db = &(dr->dr_dbuf->db); + else + db = dsa->dsa_zgd->zgd_db; /* * A block of zeros may compress to a hole, but the * block size still needs to be known for replay. @@ -1796,7 +1875,7 @@ dmu_sync_late_arrival_ready(zio_t *zio) dmu_sync_ready(zio, NULL, zio->io_private); } -static void +void dmu_sync_done(zio_t *zio, arc_buf_t *buf, void *varg) { (void) buf; @@ -1809,7 +1888,7 @@ dmu_sync_done(zio_t *zio, arc_buf_t *buf, void *varg) * Record the vdev(s) backing this blkptr so they can be flushed after * the writes for the lwb have completed. */ - if (zio->io_error == 0) { + if (zgd && zio->io_error == 0) { zil_lwb_add_block(zgd->zgd_lwb, zgd->zgd_bp); } @@ -1848,10 +1927,12 @@ dmu_sync_done(zio_t *zio, arc_buf_t *buf, void *varg) } else { dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN; } + cv_broadcast(&db->db_changed); mutex_exit(&db->db_mtx); - dsa->dsa_done(dsa->dsa_zgd, zio->io_error); + if (dsa->dsa_done) + dsa->dsa_done(dsa->dsa_zgd, zio->io_error); kmem_free(dsa, sizeof (*dsa)); } @@ -2120,9 +2201,10 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd) dsa->dsa_tx = NULL; zio_nowait(arc_write(pio, os->os_spa, txg, zgd->zgd_bp, - dr->dt.dl.dr_data, !DBUF_IS_CACHEABLE(db), dbuf_is_l2cacheable(db), - &zp, dmu_sync_ready, NULL, dmu_sync_done, dsa, - ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, &zb)); + dr->dt.dl.dr_data, !DBUF_IS_CACHEABLE(db), + dbuf_is_l2cacheable(db, NULL), &zp, dmu_sync_ready, NULL, + dmu_sync_done, dsa, ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, + &zb)); return (0); } @@ -2385,6 +2467,7 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp) zp->zp_nopwrite = nopwrite; zp->zp_encrypt = encrypt; zp->zp_byteorder = ZFS_HOST_BYTEORDER; + zp->zp_direct_write = (wp & WP_DIRECT_WR) ? B_TRUE : B_FALSE; memset(zp->zp_salt, 0, ZIO_DATA_SALT_LEN); memset(zp->zp_iv, 0, ZIO_DATA_IV_LEN); memset(zp->zp_mac, 0, ZIO_DATA_MAC_LEN); @@ -2594,7 +2677,7 @@ dmu_brt_clone(objset_t *os, uint64_t object, uint64_t offset, uint64_t length, ASSERT(db->db_blkid != DMU_SPILL_BLKID); ASSERT(BP_IS_HOLE(bp) || dbuf->db_size == BP_GET_LSIZE(bp)); - dmu_buf_will_clone(dbuf, tx); + dmu_buf_will_clone_or_dio(dbuf, tx); mutex_enter(&db->db_mtx); @@ -2817,8 +2900,15 @@ EXPORT_SYMBOL(dmu_free_long_range); EXPORT_SYMBOL(dmu_free_long_object); EXPORT_SYMBOL(dmu_read); EXPORT_SYMBOL(dmu_read_by_dnode); +EXPORT_SYMBOL(dmu_read_uio); +EXPORT_SYMBOL(dmu_read_uio_dbuf); +EXPORT_SYMBOL(dmu_read_uio_dnode); EXPORT_SYMBOL(dmu_write); EXPORT_SYMBOL(dmu_write_by_dnode); +EXPORT_SYMBOL(dmu_write_by_dnode_flags); +EXPORT_SYMBOL(dmu_write_uio); +EXPORT_SYMBOL(dmu_write_uio_dbuf); +EXPORT_SYMBOL(dmu_write_uio_dnode); EXPORT_SYMBOL(dmu_prealloc); EXPORT_SYMBOL(dmu_object_info); EXPORT_SYMBOL(dmu_object_info_from_dnode); diff --git a/module/zfs/dmu_direct.c b/module/zfs/dmu_direct.c new file mode 100644 index 000000000000..91a7fd8df464 --- /dev/null +++ b/module/zfs/dmu_direct.c @@ -0,0 +1,395 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + + +#include +#include +#include +#include +#include +#include +#include +#include + +static abd_t * +make_abd_for_dbuf(dmu_buf_impl_t *db, abd_t *data, uint64_t offset, + uint64_t size) +{ + size_t buf_size = db->db.db_size; + abd_t *pre_buf = NULL, *post_buf = NULL, *mbuf = NULL; + size_t buf_off = 0; + + ASSERT(MUTEX_HELD(&db->db_mtx)); + + if (offset > db->db.db_offset) { + size_t pre_size = offset - db->db.db_offset; + pre_buf = abd_alloc_for_io(pre_size, B_TRUE); + buf_size -= pre_size; + buf_off = 0; + } else { + buf_off = db->db.db_offset - offset; + size -= buf_off; + } + + if (size < buf_size) { + size_t post_size = buf_size - size; + post_buf = abd_alloc_for_io(post_size, B_TRUE); + buf_size -= post_size; + } + + ASSERT3U(buf_size, >, 0); + abd_t *buf = abd_get_offset_size(data, buf_off, buf_size); + + if (pre_buf || post_buf) { + mbuf = abd_alloc_gang(); + if (pre_buf) + abd_gang_add(mbuf, pre_buf, B_TRUE); + abd_gang_add(mbuf, buf, B_TRUE); + if (post_buf) + abd_gang_add(mbuf, post_buf, B_TRUE); + } else { + mbuf = buf; + } + + return (mbuf); +} + +static void +dmu_read_abd_done(zio_t *zio) +{ + abd_free(zio->io_abd); +} + +static void +dmu_write_direct_ready(zio_t *zio) +{ + dmu_sync_ready(zio, NULL, zio->io_private); +} + +static void +dmu_write_direct_done(zio_t *zio) +{ + dmu_sync_arg_t *dsa = zio->io_private; + dbuf_dirty_record_t *dr = dsa->dsa_dr; + dmu_buf_impl_t *db = dr->dr_dbuf; + + abd_free(zio->io_abd); + + mutex_enter(&db->db_mtx); + ASSERT3P(db->db_buf, ==, NULL); + ASSERT3P(dr->dt.dl.dr_data, ==, NULL); + ASSERT3P(db->db.db_data, ==, NULL); + db->db_state = DB_UNCACHED; + mutex_exit(&db->db_mtx); + + dmu_sync_done(zio, NULL, zio->io_private); + + if (zio->io_error != 0) { + if (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR) + ASSERT3U(zio->io_error, ==, EIO); + + /* + * In the event of an I/O error this block has been freed in + * zio_done() through zio_dva_unallocate(). Calling + * dmu_sync_done() above set dr_override_state to + * DR_NOT_OVERRIDDEN. In this case when dbuf_undirty() calls + * dbuf_unoverride(), it will skip doing zio_free() to free + * this block as that was already taken care of. + * + * Since we are undirtying the record in open-context, we must + * have a hold on the db, so it should never be evicted after + * calling dbuf_undirty(). + */ + mutex_enter(&db->db_mtx); + VERIFY3B(dbuf_undirty(db, dsa->dsa_tx), ==, B_FALSE); + mutex_exit(&db->db_mtx); + } + + kmem_free(zio->io_bp, sizeof (blkptr_t)); + zio->io_bp = NULL; +} + +int +dmu_write_direct(zio_t *pio, dmu_buf_impl_t *db, abd_t *data, dmu_tx_t *tx) +{ + objset_t *os = db->db_objset; + dsl_dataset_t *ds = dmu_objset_ds(os); + zbookmark_phys_t zb; + dbuf_dirty_record_t *dr_head; + + SET_BOOKMARK(&zb, ds->ds_object, + db->db.db_object, db->db_level, db->db_blkid); + + DB_DNODE_ENTER(db); + zio_prop_t zp; + dmu_write_policy(os, DB_DNODE(db), db->db_level, + WP_DMU_SYNC | WP_DIRECT_WR, &zp); + DB_DNODE_EXIT(db); + + /* + * Dirty this dbuf with DB_NOFILL since we will not have any data + * associated with the dbuf. + */ + dmu_buf_will_clone_or_dio(&db->db, tx); + + mutex_enter(&db->db_mtx); + + uint64_t txg = dmu_tx_get_txg(tx); + ASSERT3U(txg, >, spa_last_synced_txg(os->os_spa)); + ASSERT3U(txg, >, spa_syncing_txg(os->os_spa)); + + dr_head = list_head(&db->db_dirty_records); + ASSERT3U(dr_head->dr_txg, ==, txg); + dr_head->dt.dl.dr_diowrite = B_TRUE; + dr_head->dr_accounted = db->db.db_size; + + blkptr_t *bp = kmem_alloc(sizeof (blkptr_t), KM_SLEEP); + if (db->db_blkptr != NULL) { + /* + * Fill in bp with the current block pointer so that + * the nopwrite code can check if we're writing the same + * data that's already on disk. + */ + *bp = *db->db_blkptr; + } else { + memset(bp, 0, sizeof (blkptr_t)); + } + + /* + * Disable nopwrite if the current block pointer could change + * before this TXG syncs. + */ + if (list_next(&db->db_dirty_records, dr_head) != NULL) + zp.zp_nopwrite = B_FALSE; + + ASSERT3S(dr_head->dt.dl.dr_override_state, ==, DR_NOT_OVERRIDDEN); + dr_head->dt.dl.dr_override_state = DR_IN_DMU_SYNC; + + mutex_exit(&db->db_mtx); + + dmu_objset_willuse_space(os, dr_head->dr_accounted, tx); + + dmu_sync_arg_t *dsa = kmem_zalloc(sizeof (dmu_sync_arg_t), KM_SLEEP); + dsa->dsa_dr = dr_head; + dsa->dsa_tx = tx; + + zio_t *zio = zio_write(pio, os->os_spa, txg, bp, data, + db->db.db_size, db->db.db_size, &zp, + dmu_write_direct_ready, NULL, dmu_write_direct_done, dsa, + ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, &zb); + + if (pio == NULL) + return (zio_wait(zio)); + + zio_nowait(zio); + + return (0); +} + +int +dmu_write_abd(dnode_t *dn, uint64_t offset, uint64_t size, + abd_t *data, uint32_t flags, dmu_tx_t *tx) +{ + dmu_buf_t **dbp; + spa_t *spa = dn->dn_objset->os_spa; + int numbufs, err; + + ASSERT(flags & DMU_DIRECTIO); + + err = dmu_buf_hold_array_by_dnode(dn, offset, + size, B_FALSE, FTAG, &numbufs, &dbp, flags); + if (err) + return (err); + + zio_t *pio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL); + + for (int i = 0; i < numbufs && err == 0; i++) { + dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbp[i]; + + abd_t *abd = abd_get_offset_size(data, + db->db.db_offset - offset, dn->dn_datablksz); + + zfs_racct_write(spa, db->db.db_size, 1, flags); + err = dmu_write_direct(pio, db, abd, tx); + ASSERT0(err); + } + + err = zio_wait(pio); + + /* + * The dbuf must be held until the Direct I/O write has completed in + * the event there was any errors and dbuf_undirty() was called. + */ + dmu_buf_rele_array(dbp, numbufs, FTAG); + + return (err); +} + +int +dmu_read_abd(dnode_t *dn, uint64_t offset, uint64_t size, + abd_t *data, uint32_t flags) +{ + objset_t *os = dn->dn_objset; + spa_t *spa = os->os_spa; + dmu_buf_t **dbp; + int numbufs, err; + + ASSERT(flags & DMU_DIRECTIO); + + err = dmu_buf_hold_array_by_dnode(dn, offset, + size, B_FALSE, FTAG, &numbufs, &dbp, flags); + if (err) + return (err); + + zio_t *rio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL); + + for (int i = 0; i < numbufs; i++) { + dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbp[i]; + abd_t *mbuf; + zbookmark_phys_t zb; + blkptr_t *bp; + + mutex_enter(&db->db_mtx); + + SET_BOOKMARK(&zb, dmu_objset_ds(os)->ds_object, + db->db.db_object, db->db_level, db->db_blkid); + + /* + * If there is another read for this dbuf, we will wait for + * that to complete first before checking the db_state below. + */ + while (db->db_state == DB_READ) + cv_wait(&db->db_changed, &db->db_mtx); + + err = dmu_buf_get_bp_from_dbuf(db, &bp); + if (err) { + mutex_exit(&db->db_mtx); + goto error; + } + + /* + * There is no need to read if this is a hole or the data is + * cached. This will not be considered a direct read for IO + * accounting in the same way that an ARC hit is not counted. + */ + if (bp == NULL || BP_IS_HOLE(bp) || db->db_state == DB_CACHED) { + size_t aoff = offset < db->db.db_offset ? + db->db.db_offset - offset : 0; + size_t boff = offset > db->db.db_offset ? + offset - db->db.db_offset : 0; + size_t len = MIN(size - aoff, db->db.db_size - boff); + + if (db->db_state == DB_CACHED) { + /* + * We need to untransformed the ARC buf data + * before we copy it over. + */ + err = dmu_buf_untransform_direct(db, spa); + ASSERT0(err); + abd_copy_from_buf_off(data, + (char *)db->db.db_data + boff, aoff, len); + } else { + abd_zero_off(data, aoff, len); + } + + mutex_exit(&db->db_mtx); + continue; + } + + mbuf = make_abd_for_dbuf(db, data, offset, size); + ASSERT3P(mbuf, !=, NULL); + + /* + * The dbuf mutex (db_mtx) must be held when creating the ZIO + * for the read. The BP returned from + * dmu_buf_get_bp_from_dbuf() could be from a pending block + * clone or a yet to be synced Direct I/O write that is in the + * dbuf's dirty record. When zio_read() is called, zio_create() + * will make a copy of the BP. However, if zio_read() is called + * without the mutex being held then the dirty record from the + * dbuf could be freed in dbuf_write_done() resulting in garbage + * being set for the zio BP. + */ + zio_t *cio = zio_read(rio, spa, bp, mbuf, db->db.db_size, + dmu_read_abd_done, NULL, ZIO_PRIORITY_SYNC_READ, + ZIO_FLAG_CANFAIL, &zb); + mutex_exit(&db->db_mtx); + + zfs_racct_read(spa, db->db.db_size, 1, flags); + zio_nowait(cio); + } + + dmu_buf_rele_array(dbp, numbufs, FTAG); + + return (zio_wait(rio)); + +error: + dmu_buf_rele_array(dbp, numbufs, FTAG); + (void) zio_wait(rio); + return (err); +} + +#ifdef _KERNEL +int +dmu_read_uio_direct(dnode_t *dn, zfs_uio_t *uio, uint64_t size) +{ + offset_t offset = zfs_uio_offset(uio); + offset_t page_index = (offset - zfs_uio_soffset(uio)) >> PAGESHIFT; + int err; + + ASSERT(uio->uio_extflg & UIO_DIRECT); + ASSERT3U(page_index, <, uio->uio_dio.npages); + + abd_t *data = abd_alloc_from_pages(&uio->uio_dio.pages[page_index], + offset & (PAGESIZE - 1), size); + err = dmu_read_abd(dn, offset, size, data, DMU_DIRECTIO); + abd_free(data); + + if (err == 0) + zfs_uioskip(uio, size); + + return (err); +} + +int +dmu_write_uio_direct(dnode_t *dn, zfs_uio_t *uio, uint64_t size, dmu_tx_t *tx) +{ + offset_t offset = zfs_uio_offset(uio); + offset_t page_index = (offset - zfs_uio_soffset(uio)) >> PAGESHIFT; + int err; + + ASSERT(uio->uio_extflg & UIO_DIRECT); + ASSERT3U(page_index, <, uio->uio_dio.npages); + + abd_t *data = abd_alloc_from_pages(&uio->uio_dio.pages[page_index], + offset & (PAGESIZE - 1), size); + err = dmu_write_abd(dn, offset, size, data, DMU_DIRECTIO, tx); + abd_free(data); + + if (err == 0) + zfs_uioskip(uio, size); + + return (err); +} +#endif /* _KERNEL */ + +EXPORT_SYMBOL(dmu_read_uio_direct); +EXPORT_SYMBOL(dmu_write_uio_direct); diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c index 8f4fefa4f4dd..f030fba22669 100644 --- a/module/zfs/dmu_objset.c +++ b/module/zfs/dmu_objset.c @@ -350,6 +350,20 @@ smallblk_changed_cb(void *arg, uint64_t newval) os->os_zpl_special_smallblock = newval; } +static void +direct_changed_cb(void *arg, uint64_t newval) +{ + objset_t *os = arg; + + /* + * Inheritance and range checking should have been done by now. + */ + ASSERT(newval == ZFS_DIRECT_DISABLED || newval == ZFS_DIRECT_STANDARD || + newval == ZFS_DIRECT_ALWAYS); + + os->os_direct = newval; +} + static void logbias_changed_cb(void *arg, uint64_t newval) { @@ -633,6 +647,11 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, ZFS_PROP_SPECIAL_SMALL_BLOCKS), smallblk_changed_cb, os); } + if (err == 0) { + err = dsl_prop_register(ds, + zfs_prop_to_name(ZFS_PROP_DIRECT), + direct_changed_cb, os); + } } if (err != 0) { arc_buf_destroy(os->os_phys_buf, &os->os_phys_buf); diff --git a/module/zfs/spa_stats.c b/module/zfs/spa_stats.c index 17ed2a620b1e..45a2f06263a0 100644 --- a/module/zfs/spa_stats.c +++ b/module/zfs/spa_stats.c @@ -895,6 +895,14 @@ static const spa_iostats_t spa_iostats_template = { { "simple_trim_bytes_skipped", KSTAT_DATA_UINT64 }, { "simple_trim_extents_failed", KSTAT_DATA_UINT64 }, { "simple_trim_bytes_failed", KSTAT_DATA_UINT64 }, + { "arc_read_count", KSTAT_DATA_UINT64 }, + { "arc_read_bytes", KSTAT_DATA_UINT64 }, + { "arc_write_count", KSTAT_DATA_UINT64 }, + { "arc_write_bytes", KSTAT_DATA_UINT64 }, + { "direct_read_count", KSTAT_DATA_UINT64 }, + { "direct_read_bytes", KSTAT_DATA_UINT64 }, + { "direct_write_count", KSTAT_DATA_UINT64 }, + { "direct_write_bytes", KSTAT_DATA_UINT64 }, }; #define SPA_IOSTATS_ADD(stat, val) \ @@ -938,6 +946,44 @@ spa_iostats_trim_add(spa_t *spa, trim_type_t type, } } +void +spa_iostats_read_add(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags) +{ + spa_history_kstat_t *shk = &spa->spa_stats.iostats; + kstat_t *ksp = shk->kstat; + + if (ksp == NULL) + return; + + spa_iostats_t *iostats = ksp->ks_data; + if (flags & DMU_DIRECTIO) { + SPA_IOSTATS_ADD(direct_read_count, iops); + SPA_IOSTATS_ADD(direct_read_bytes, size); + } else { + SPA_IOSTATS_ADD(arc_read_count, iops); + SPA_IOSTATS_ADD(arc_read_bytes, size); + } +} + +void +spa_iostats_write_add(spa_t *spa, uint64_t size, uint64_t iops, uint32_t flags) +{ + spa_history_kstat_t *shk = &spa->spa_stats.iostats; + kstat_t *ksp = shk->kstat; + + if (ksp == NULL) + return; + + spa_iostats_t *iostats = ksp->ks_data; + if (flags & DMU_DIRECTIO) { + SPA_IOSTATS_ADD(direct_write_count, iops); + SPA_IOSTATS_ADD(direct_write_bytes, size); + } else { + SPA_IOSTATS_ADD(arc_write_count, iops); + SPA_IOSTATS_ADD(arc_write_bytes, size); + } +} + static int spa_iostats_update(kstat_t *ksp, int rw) { diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index 6ae0a14127bf..9305bd894d6f 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -117,6 +117,11 @@ static unsigned int zfs_slow_io_events_per_second = 20; */ static unsigned int zfs_deadman_events_per_second = 1; +/* + * Rate limit direct write IO verify failures to this many per scond. + */ +static unsigned int zfs_dio_write_verify_events_per_second = 20; + /* * Rate limit checksum events after this many checksum errors per second. */ @@ -153,6 +158,17 @@ int zfs_nocacheflush = 0; uint_t zfs_vdev_max_auto_ashift = 14; uint_t zfs_vdev_min_auto_ashift = ASHIFT_MIN; +/* + * VDEV checksum verification for Direct I/O writes. This is neccessary for + * Linux, because anonymous pages can not be placed under write protection + * during Direct I/O writes. + */ +#if !defined(__FreeBSD__) +uint_t zfs_vdev_direct_write_verify = 1; +#else +uint_t zfs_vdev_direct_write_verify = 0; +#endif + void vdev_dbgmsg(vdev_t *vd, const char *fmt, ...) { @@ -673,6 +689,8 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops) 1); zfs_ratelimit_init(&vd->vdev_deadman_rl, &zfs_deadman_events_per_second, 1); + zfs_ratelimit_init(&vd->vdev_dio_verify_rl, + &zfs_dio_write_verify_events_per_second, 1); zfs_ratelimit_init(&vd->vdev_checksum_rl, &zfs_checksum_events_per_second, 1); @@ -1182,6 +1200,7 @@ vdev_free(vdev_t *vd) zfs_ratelimit_fini(&vd->vdev_delay_rl); zfs_ratelimit_fini(&vd->vdev_deadman_rl); + zfs_ratelimit_fini(&vd->vdev_dio_verify_rl); zfs_ratelimit_fini(&vd->vdev_checksum_rl); if (vd == spa->spa_root_vdev) @@ -4475,6 +4494,7 @@ vdev_clear(spa_t *spa, vdev_t *vd) vd->vdev_stat.vs_read_errors = 0; vd->vdev_stat.vs_write_errors = 0; vd->vdev_stat.vs_checksum_errors = 0; + vd->vdev_stat.vs_dio_verify_errors = 0; vd->vdev_stat.vs_slow_ios = 0; for (int c = 0; c < vd->vdev_children; c++) @@ -6503,7 +6523,14 @@ ZFS_MODULE_PARAM(zfs, zfs_, slow_io_events_per_second, UINT, ZMOD_RW, ZFS_MODULE_PARAM(zfs, zfs_, deadman_events_per_second, UINT, ZMOD_RW, "Rate limit hung IO (deadman) events to this many per second"); +ZFS_MODULE_PARAM(zfs, zfs_, dio_write_verify_events_per_second, UINT, ZMOD_RW, + "Rate Direct I/O write verify events to this many per second"); + /* BEGIN CSTYLED */ +ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, direct_write_verify, UINT, ZMOD_RW, + "Direct I/O writes will perform for checksum verification before " + "commiting write"); + ZFS_MODULE_PARAM(zfs, zfs_, checksum_events_per_second, UINT, ZMOD_RW, "Rate limit checksum events to this many checksum errors per second " "(do not set below ZED threshold)."); diff --git a/module/zfs/vdev_label.c b/module/zfs/vdev_label.c index 47346dd5acff..9d12bc2eb0a2 100644 --- a/module/zfs/vdev_label.c +++ b/module/zfs/vdev_label.c @@ -387,6 +387,10 @@ vdev_config_generate_stats(vdev_t *vd, nvlist_t *nv) /* IO delays */ fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_SLOW_IOS, vs->vs_slow_ios); + /* Direct I/O write verify errors */ + fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_DIO_VERIFY_ERRORS, + vs->vs_dio_verify_errors); + /* Add extended stats nvlist to main nvlist */ fnvlist_add_nvlist(nv, ZPOOL_CONFIG_VDEV_STATS_EX, nvx); diff --git a/module/zfs/zfs_fm.c b/module/zfs/zfs_fm.c index f7cecc9af8a4..25b05abd3650 100644 --- a/module/zfs/zfs_fm.c +++ b/module/zfs/zfs_fm.c @@ -595,6 +595,8 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out, DATA_TYPE_UINT64, vs->vs_checksum_errors, FM_EREPORT_PAYLOAD_ZFS_VDEV_DELAYS, DATA_TYPE_UINT64, vs->vs_slow_ios, + FM_EREPORT_PAYLOAD_ZFS_VDEV_DIO_VERIFY_ERRORS, + DATA_TYPE_UINT64, vs->vs_dio_verify_errors, NULL); } diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index 53366ad49781..e69b98896a28 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -160,7 +160,6 @@ #include #include #include -#include #include #include #include diff --git a/module/zfs/zfs_log.c b/module/zfs/zfs_log.c index 399f5a0117bb..8d0aebbec1db 100644 --- a/module/zfs/zfs_log.c +++ b/module/zfs/zfs_log.c @@ -607,7 +607,7 @@ static int64_t zfs_immediate_write_sz = 32768; void zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, znode_t *zp, offset_t off, ssize_t resid, boolean_t commit, - zil_callback_t callback, void *callback_data) + boolean_t o_direct, zil_callback_t callback, void *callback_data) { dmu_buf_impl_t *db = (dmu_buf_impl_t *)sa_get_db(zp->z_sa_hdl); uint32_t blocksize = zp->z_blksz; @@ -622,7 +622,7 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, return; } - if (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT) + if (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT || o_direct) write_state = WR_INDIRECT; else if (!spa_has_slogs(zilog->zl_spa) && resid >= zfs_immediate_write_sz) diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index f3db953eab46..f9cc5b0109d9 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -35,7 +35,6 @@ #include #include #include -#include #include #include #include @@ -74,6 +73,14 @@ int zfs_bclone_enabled = 1; */ static int zfs_bclone_wait_dirty = 0; +/* + * Enable Direct I/O. If this setting is 0, then all I/O requests will be + * directed through the ARC acting as though the dataset property direct was + * set to disabled. + */ +static int zfs_dio_enabled = 1; + + /* * Maximum bytes to read per chunk in zfs_read(). */ @@ -202,6 +209,77 @@ zfs_access(znode_t *zp, int mode, int flag, cred_t *cr) return (error); } +/* + * Determine if Direct I/O has been requested (either via the O_DIRECT flag or + * the "direct" dataset property). When inherited by the property only apply + * the O_DIRECT flag to correctly aligned IO requests. The rational for this + * is it allows the property to be safely set on a dataset without forcing + * all of the applications to be aware of the alignment restrictions. When + * O_DIRECT is explicitly requested by an application return EINVAL if the + * request is unaligned. In all cases, if the range for this request has + * been mmap'ed then we will perform buffered I/O to keep the mapped region + * synhronized with the ARC. + * + * It is possible that a file's pages could be mmap'ed after it is checked + * here. If so, that is handled coorarding in zfs_write(). See comments in the + * following area for how this is handled: + * zfs_write() -> update_pages() + */ +static int +zfs_setup_direct(struct znode *zp, zfs_uio_t *uio, zfs_uio_rw_t rw, + int *ioflagp) +{ + zfsvfs_t *zfsvfs = ZTOZSB(zp); + objset_t *os = zfsvfs->z_os; + int ioflag = *ioflagp; + int error = 0; + + if (!zfs_dio_enabled || os->os_direct == ZFS_DIRECT_DISABLED || + zn_has_cached_data(zp, zfs_uio_offset(uio), + zfs_uio_offset(uio) + zfs_uio_resid(uio) - 1)) { + /* + * Direct I/O is disabled or the region is mmap'ed. In either + * case the I/O request will just directed through the ARC. + */ + ioflag &= ~O_DIRECT; + goto out; + } else if (os->os_direct == ZFS_DIRECT_ALWAYS && + zfs_uio_page_aligned(uio) && + zfs_uio_aligned(uio, PAGE_SIZE)) { + if ((rw == UIO_WRITE && zfs_uio_resid(uio) >= zp->z_blksz) || + (rw == UIO_READ)) { + ioflag |= O_DIRECT; + } + } else if (os->os_direct == ZFS_DIRECT_ALWAYS && (ioflag & O_DIRECT)) { + /* + * Direct I/O was requested through the direct=always, but it + * is not properly PAGE_SIZE aligned. The request will be + * directed through the ARC. + */ + ioflag &= ~O_DIRECT; + } + + if (ioflag & O_DIRECT) { + if (!zfs_uio_page_aligned(uio) || + !zfs_uio_aligned(uio, PAGE_SIZE)) { + error = SET_ERROR(EINVAL); + goto out; + } + + error = zfs_uio_get_dio_pages_alloc(uio, rw); + if (error) { + goto out; + } + } + + IMPLY(ioflag & O_DIRECT, uio->uio_extflg & UIO_DIRECT); + ASSERT0(error); + +out: + *ioflagp = ioflag; + return (error); +} + /* * Read bytes from specified file into supplied buffer. * @@ -286,24 +364,58 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) error = 0; goto out; } - ASSERT(zfs_uio_offset(uio) < zp->z_size); + + /* + * Setting up Direct I/O if requested. + */ + error = zfs_setup_direct(zp, uio, UIO_READ, &ioflag); + if (error) { + goto out; + } + #if defined(__linux__) ssize_t start_offset = zfs_uio_offset(uio); #endif + ssize_t chunk_size = zfs_vnops_read_chunk_size; ssize_t n = MIN(zfs_uio_resid(uio), zp->z_size - zfs_uio_offset(uio)); ssize_t start_resid = n; + ssize_t dio_remaining_resid = 0; + + if (uio->uio_extflg & UIO_DIRECT) { + /* + * All pages for an O_DIRECT request ahve already been mapped + * so there's no compelling reason to handle this uio in + * smaller chunks. + */ + chunk_size = DMU_MAX_ACCESS; + + /* + * In the event that the O_DIRECT request is reading the entire + * file, it is possible file's length is not page sized + * aligned. However, lower layers expect that the Direct I/O + * request is page-aligned. In this case, as much of the file + * that can be read using Direct I/O happens and the remaining + * amount will be read through the ARC. + * + * This is still consistent with the semantics of Direct I/O in + * ZFS as at a minimum the I/O request must be page-aligned. + */ + dio_remaining_resid = n - P2ALIGN_TYPED(n, PAGE_SIZE, ssize_t); + if (dio_remaining_resid != 0) + n -= dio_remaining_resid; + } while (n > 0) { - ssize_t nbytes = MIN(n, zfs_vnops_read_chunk_size - - P2PHASE(zfs_uio_offset(uio), zfs_vnops_read_chunk_size)); + ssize_t nbytes = MIN(n, chunk_size - + P2PHASE(zfs_uio_offset(uio), chunk_size)); #ifdef UIO_NOCOPY if (zfs_uio_segflg(uio) == UIO_NOCOPY) error = mappedread_sf(zp, nbytes, uio); else #endif if (zn_has_cached_data(zp, zfs_uio_offset(uio), - zfs_uio_offset(uio) + nbytes - 1) && !(ioflag & O_DIRECT)) { + zfs_uio_offset(uio) + nbytes - 1)) { error = mappedread(zp, nbytes, uio); } else { error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), @@ -332,12 +444,40 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) n -= nbytes; } + if (error == 0 && (uio->uio_extflg & UIO_DIRECT) && + dio_remaining_resid != 0) { + /* + * Temporarily remove the UIO_DIRECT flag from the UIO so the + * remainder of the file can be read using the ARC. + */ + uio->uio_extflg &= ~UIO_DIRECT; + + if (zn_has_cached_data(zp, zfs_uio_offset(uio), + zfs_uio_offset(uio) + dio_remaining_resid - 1)) { + error = mappedread(zp, dio_remaining_resid, uio); + } else { + error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), uio, + dio_remaining_resid); + } + uio->uio_extflg |= UIO_DIRECT; + + if (error != 0) + n += dio_remaining_resid; + } else if (error && (uio->uio_extflg & UIO_DIRECT)) { + n += dio_remaining_resid; + } int64_t nread = start_resid - n; + dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, nread); - task_io_account_read(nread); out: zfs_rangelock_exit(lr); + /* + * Cleanup for Direct I/O if requested. + */ + if (uio->uio_extflg & UIO_DIRECT) + zfs_uio_free_dio_pages(uio, UIO_READ); + ZFS_ACCESSTIME_STAMP(zfsvfs, zp); zfs_exit(zfsvfs, FTAG); return (error); @@ -422,6 +562,7 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) int error = 0, error1; ssize_t start_resid = zfs_uio_resid(uio); uint64_t clear_setid_bits_txg = 0; + boolean_t o_direct_defer = B_FALSE; /* * Fasttrack empty write @@ -474,6 +615,15 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) return (SET_ERROR(EINVAL)); } + /* + * Setting up Direct I/O if requested. + */ + error = zfs_setup_direct(zp, uio, UIO_WRITE, &ioflag); + if (error) { + zfs_exit(zfsvfs, FTAG); + return (SET_ERROR(error)); + } + /* * Pre-fault the pages to ensure slow (eg NFS) pages * don't hold up txg. @@ -504,6 +654,12 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) woff = zp->z_size; } zfs_uio_setoffset(uio, woff); + /* + * We need to update the starting offset as well because it is + * set previously in the ZPL (Linux) and VNOPS (FreeBSD) + * layers. + */ + zfs_uio_setsoffset(uio, woff); } else { /* * Note that if the file block size will change as a result of @@ -539,6 +695,33 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) const uint64_t gid = KGID_TO_SGID(ZTOGID(zp)); const uint64_t projid = zp->z_projid; + /* + * In the event we are increasing the file block size + * (lr_length == UINT64_MAX), we will direct the write to the ARC. + * Because zfs_grow_blocksize() will read from the ARC in order to + * grow the dbuf, we avoid doing Direct I/O here as that would cause + * data written to disk to be overwritten by data in the ARC during + * the sync phase. Besides writing data twice to disk, we also + * want to avoid consistency concerns between data in the the ARC and + * on disk while growing the file's blocksize. + * + * We will only temporarily remove Direct I/O and put it back after + * we have grown the blocksize. We do this in the event a request + * is larger than max_blksz, so further requests to + * dmu_write_uio_dbuf() will still issue the requests using Direct + * IO. + * + * As an example: + * The first block to file is being written as a 4k request with + * a recorsize of 1K. The first 1K issued in the loop below will go + * through the ARC; however, the following 3 1K requests will + * use Direct I/O. + */ + if (uio->uio_extflg & UIO_DIRECT && lr->lr_length == UINT64_MAX) { + uio->uio_extflg &= ~UIO_DIRECT; + o_direct_defer = B_TRUE; + } + /* * Write the file in reasonable size chunks. Each chunk is written * in a separate transaction; this keeps the intent log records small @@ -580,6 +763,7 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) ssize_t nbytes = n; if (n >= blksz && woff >= zp->z_size && P2PHASE(woff, blksz) == 0 && + !(uio->uio_extflg & UIO_DIRECT) && (blksz >= SPA_OLD_MAXBLOCKSIZE || n < 4 * blksz)) { /* * This write covers a full block. "Borrow" a buffer @@ -705,9 +889,30 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) zfs_uioskip(uio, nbytes); tx_bytes = nbytes; } + /* + * There is a window where a file's pages can be mmap'ed after + * zfs_setup_direct() is called. This is due to the fact that + * the rangelock in this function is acquired after calling + * zfs_setup_direct(). This is done so that + * zfs_uio_prefaultpages() does not attempt to fault in pages + * on Linux for Direct I/O requests. This is not necessary as + * the pages are pinned in memory and can not be faulted out. + * Ideally, the rangelock would be held before calling + * zfs_setup_direct() and zfs_uio_prefaultpages(); however, + * this can lead to a deadlock as zfs_getpage() also acquires + * the rangelock as a RL_WRITER and prefaulting the pages can + * lead to zfs_getpage() being called. + * + * In the case of the pages being mapped after + * zfs_setup_direct() is called, the call to update_pages() + * will still be made to make sure there is consistency between + * the ARC and the Linux page cache. This is an ufortunate + * situation as the data will be read back into the ARC after + * the Direct I/O write has completed, but this is the penality + * for writing to a mmap'ed region of a file using Direct I/O. + */ if (tx_bytes && - zn_has_cached_data(zp, woff, woff + tx_bytes - 1) && - !(ioflag & O_DIRECT)) { + zn_has_cached_data(zp, woff, woff + tx_bytes - 1)) { update_pages(zp, woff, tx_bytes, zfsvfs->z_os); } @@ -756,10 +961,21 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) * the TX_WRITE records logged here. */ zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, commit, - NULL, NULL); + uio->uio_extflg & UIO_DIRECT ? B_TRUE : B_FALSE, NULL, + NULL); dmu_tx_commit(tx); + /* + * Direct I/O was deferred in order to grow the first block. + * At this point it can be re-enabled for subsequent writes. + */ + if (o_direct_defer) { + ASSERT(ioflag & O_DIRECT); + uio->uio_extflg |= UIO_DIRECT; + o_direct_defer = B_FALSE; + } + if (error != 0) break; ASSERT3S(tx_bytes, ==, nbytes); @@ -767,9 +983,21 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) pfbytes -= nbytes; } + if (o_direct_defer) { + ASSERT(ioflag & O_DIRECT); + uio->uio_extflg |= UIO_DIRECT; + o_direct_defer = B_FALSE; + } + zfs_znode_update_vfs(zp); zfs_rangelock_exit(lr); + /* + * Cleanup for Direct I/O if requested. + */ + if (uio->uio_extflg & UIO_DIRECT) + zfs_uio_free_dio_pages(uio, UIO_WRITE); + /* * If we're in replay mode, or we made no progress, or the * uio data is inaccessible return an error. Otherwise, it's @@ -784,9 +1012,8 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) if (commit) zil_commit(zilog, zp->z_id); - const int64_t nwritten = start_resid - zfs_uio_resid(uio); + int64_t nwritten = start_resid - zfs_uio_resid(uio); dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, nwritten); - task_io_account_write(nwritten); zfs_exit(zfsvfs, FTAG); return (0); @@ -846,7 +1073,6 @@ zfs_get_data(void *arg, uint64_t gen, lr_write_t *lr, char *buf, uint64_t object = lr->lr_foid; uint64_t offset = lr->lr_offset; uint64_t size = lr->lr_length; - dmu_buf_t *db; zgd_t *zgd; int error = 0; uint64_t zp_gen; @@ -890,8 +1116,8 @@ zfs_get_data(void *arg, uint64_t gen, lr_write_t *lr, char *buf, * we don't have to write the data twice. */ if (buf != NULL) { /* immediate write */ - zgd->zgd_lr = zfs_rangelock_enter(&zp->z_rangelock, - offset, size, RL_READER); + zgd->zgd_lr = zfs_rangelock_enter(&zp->z_rangelock, offset, + size, RL_READER); /* test for truncation needs to be done while range locked */ if (offset >= zp->z_size) { error = SET_ERROR(ENOENT); @@ -929,18 +1155,44 @@ zfs_get_data(void *arg, uint64_t gen, lr_write_t *lr, char *buf, zil_fault_io = 0; } #endif + + dmu_buf_t *dbp; if (error == 0) error = dmu_buf_hold_noread(os, object, offset, zgd, - &db); + &dbp); if (error == 0) { - blkptr_t *bp = &lr->lr_blkptr; + zgd->zgd_db = dbp; + dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbp; + boolean_t direct_write = B_FALSE; + mutex_enter(&db->db_mtx); + dbuf_dirty_record_t *dr = + dbuf_find_dirty_eq(db, lr->lr_common.lrc_txg); + if (dr != NULL && dr->dt.dl.dr_diowrite) + direct_write = B_TRUE; + mutex_exit(&db->db_mtx); + + /* + * All Direct I/O writes will have already completed and + * the block pointer can be immediately stored in the + * log record. + */ + if (direct_write) { + /* + * A Direct I/O write always covers an entire + * block. + */ + ASSERT3U(dbp->db_size, ==, zp->z_blksz); + lr->lr_blkptr = dr->dt.dl.dr_overridden_by; + zfs_get_done(zgd, 0); + return (0); + } - zgd->zgd_db = db; + blkptr_t *bp = &lr->lr_blkptr; zgd->zgd_bp = bp; - ASSERT(db->db_offset == offset); - ASSERT(db->db_size == size); + ASSERT3U(dbp->db_offset, ==, offset); + ASSERT3U(dbp->db_size, ==, size); error = dmu_sync(zio, lr->lr_common.lrc_txg, zfs_get_done, zgd); @@ -975,7 +1227,6 @@ zfs_get_data(void *arg, uint64_t gen, lr_write_t *lr, char *buf, return (error); } - static void zfs_get_done(zgd_t *zgd, int error) { @@ -1559,3 +1810,6 @@ ZFS_MODULE_PARAM(zfs, zfs_, bclone_enabled, INT, ZMOD_RW, ZFS_MODULE_PARAM(zfs, zfs_, bclone_wait_dirty, INT, ZMOD_RW, "Wait for dirty blocks when cloning"); + +ZFS_MODULE_PARAM(zfs, zfs_, dio_enabled, INT, ZMOD_RW, + "Enable Direct I/O"); diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 53992931e049..66a8a9fefd8c 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -803,6 +803,12 @@ zio_notify_parent(zio_t *pio, zio_t *zio, enum zio_wait_type wait, pio->io_reexecute |= zio->io_reexecute; ASSERT3U(*countp, >, 0); + if (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR) { + ASSERT3U(*errorp, ==, EIO); + ASSERT3U(pio->io_child_type, ==, ZIO_CHILD_LOGICAL); + pio->io_flags |= ZIO_FLAG_DIO_CHKSUM_ERR; + } + (*countp)--; if (*countp == 0 && pio->io_stall == countp) { @@ -1282,20 +1288,14 @@ zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, zio_flag_t flags, const zbookmark_phys_t *zb) { zio_t *zio; + enum zio_stage pipeline = zp->zp_direct_write == B_TRUE ? + ZIO_DIRECT_WRITE_PIPELINE : (flags & ZIO_FLAG_DDT_CHILD) ? + ZIO_DDT_CHILD_WRITE_PIPELINE : ZIO_WRITE_PIPELINE; - ASSERT(zp->zp_checksum >= ZIO_CHECKSUM_OFF && - zp->zp_checksum < ZIO_CHECKSUM_FUNCTIONS && - zp->zp_compress >= ZIO_COMPRESS_OFF && - zp->zp_compress < ZIO_COMPRESS_FUNCTIONS && - DMU_OT_IS_VALID(zp->zp_type) && - zp->zp_level < 32 && - zp->zp_copies > 0 && - zp->zp_copies <= spa_max_replication(spa)); zio = zio_create(pio, spa, txg, bp, data, lsize, psize, done, private, ZIO_TYPE_WRITE, priority, flags, NULL, 0, zb, - ZIO_STAGE_OPEN, (flags & ZIO_FLAG_DDT_CHILD) ? - ZIO_DDT_CHILD_WRITE_PIPELINE : ZIO_WRITE_PIPELINE); + ZIO_STAGE_OPEN, pipeline); zio->io_ready = ready; zio->io_children_ready = children_ready; @@ -1572,6 +1572,19 @@ zio_vdev_child_io(zio_t *pio, blkptr_t *bp, vdev_t *vd, uint64_t offset, */ pipeline |= ZIO_STAGE_CHECKSUM_VERIFY; pio->io_pipeline &= ~ZIO_STAGE_CHECKSUM_VERIFY; + } else if (type == ZIO_TYPE_WRITE && + pio->io_prop.zp_direct_write == B_TRUE) { + /* + * By default we only will verify checksums for Direct I/O + * writes for Linux. FreeBSD is able to place user pages under + * write protection before issuing them to the ZIO pipeline. + * + * Checksum validation errors will only be reported through + * the top-level VDEV, which is set by this child ZIO. + */ + ASSERT3P(bp, !=, NULL); + ASSERT3U(pio->io_child_type, ==, ZIO_CHILD_LOGICAL); + pipeline |= ZIO_STAGE_DIO_CHECKSUM_VERIFY; } if (vd->vdev_ops->vdev_op_leaf) { @@ -3104,6 +3117,7 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc) zp.zp_nopwrite = B_FALSE; zp.zp_encrypt = gio->io_prop.zp_encrypt; zp.zp_byteorder = gio->io_prop.zp_byteorder; + zp.zp_direct_write = B_FALSE; memset(zp.zp_salt, 0, ZIO_DATA_SALT_LEN); memset(zp.zp_iv, 0, ZIO_DATA_IV_LEN); memset(zp.zp_mac, 0, ZIO_DATA_MAC_LEN); @@ -3577,6 +3591,13 @@ zio_ddt_write(zio_t *zio) ASSERT(BP_GET_CHECKSUM(bp) == zp->zp_checksum); ASSERT(BP_IS_HOLE(bp) || zio->io_bp_override); ASSERT(!(zio->io_bp_override && (zio->io_flags & ZIO_FLAG_RAW))); + /* + * Deduplication will not take place for Direct I/O writes. The + * ddt_tree will be emptied in syncing context. Direct I/O writes take + * place in the open-context. Direct I/O write can not attempt to + * modify the ddt_tree while issuing out a write. + */ + ASSERT3B(zio->io_prop.zp_direct_write, ==, B_FALSE); ddt_enter(ddt); dde = ddt_lookup(ddt, bp); @@ -4509,6 +4530,19 @@ zio_vdev_io_assess(zio_t *zio) zio->io_vsd = NULL; } + /* + * If a Direct I/O write checksum verify error has occurred then this + * I/O should not attempt to be issued again. Instead the EIO will + * be returned. + */ + if (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR) { + ASSERT3U(zio->io_child_type, ==, ZIO_CHILD_LOGICAL); + ASSERT3U(zio->io_error, ==, EIO); + zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; + return (zio); + } + + if (zio_injection_enabled && zio->io_error == 0) zio->io_error = zio_handle_fault_injection(zio, EIO); @@ -4822,6 +4856,49 @@ zio_checksum_verify(zio_t *zio) return (zio); } +static zio_t * +zio_dio_checksum_verify(zio_t *zio) +{ + zio_t *pio = zio_unique_parent(zio); + int error; + + ASSERT3P(zio->io_vd, !=, NULL); + ASSERT3P(zio->io_bp, !=, NULL); + ASSERT3U(zio->io_child_type, ==, ZIO_CHILD_VDEV); + ASSERT3U(zio->io_type, ==, ZIO_TYPE_WRITE); + ASSERT3B(pio->io_prop.zp_direct_write, ==, B_TRUE); + ASSERT3U(pio->io_child_type, ==, ZIO_CHILD_LOGICAL); + + if (zfs_vdev_direct_write_verify == 0 || zio->io_error != 0) + goto out; + + if ((error = zio_checksum_error(zio, NULL)) != 0) { + zio->io_error = error; + if (error == ECKSUM) { + mutex_enter(&zio->io_vd->vdev_stat_lock); + zio->io_vd->vdev_stat.vs_dio_verify_errors++; + mutex_exit(&zio->io_vd->vdev_stat_lock); + zio->io_error = SET_ERROR(EIO); + zio->io_flags |= ZIO_FLAG_DIO_CHKSUM_ERR; + + /* + * The EIO error must be propagated up to the logical + * parent ZIO in zio_notify_parent() so it can be + * returned to dmu_write_abd(). + */ + zio->io_flags &= ~ZIO_FLAG_DONT_PROPAGATE; + + (void) zfs_ereport_post(FM_EREPORT_ZFS_DIO_VERIFY, + zio->io_spa, zio->io_vd, &zio->io_bookmark, + zio, 0); + } + } + +out: + return (zio); +} + + /* * Called by RAID-Z to ensure we don't compute the checksum twice. */ @@ -5152,7 +5229,8 @@ zio_done(zio_t *zio) * device is currently unavailable. */ if (zio->io_error != ECKSUM && zio->io_vd != NULL && - !vdev_is_dead(zio->io_vd)) { + !vdev_is_dead(zio->io_vd) && + !(zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR)) { int ret = zfs_ereport_post(FM_EREPORT_ZFS_IO, zio->io_spa, zio->io_vd, &zio->io_bookmark, zio, 0); if (ret != EALREADY) { @@ -5167,6 +5245,7 @@ zio_done(zio_t *zio) if ((zio->io_error == EIO || !(zio->io_flags & (ZIO_FLAG_SPECULATIVE | ZIO_FLAG_DONT_PROPAGATE))) && + !(zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR) && zio == zio->io_logical) { /* * For logical I/O requests, tell the SPA to log the @@ -5188,7 +5267,8 @@ zio_done(zio_t *zio) ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL); if (IO_IS_ALLOCATING(zio) && - !(zio->io_flags & ZIO_FLAG_CANFAIL)) { + !(zio->io_flags & ZIO_FLAG_CANFAIL) && + !(zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR)) { if (zio->io_error != ENOSPC) zio->io_reexecute |= ZIO_REEXECUTE_NOW; else @@ -5238,6 +5318,14 @@ zio_done(zio_t *zio) zio->io_reexecute &= ~ZIO_REEXECUTE_SUSPEND; if (zio->io_reexecute) { + /* + * A Direct I/O write that has a checksum verify error should + * not attempt to reexecute. Instead, EAGAIN should just be + * propagated back up so the write can be attempt to be issued + * through the ARC. + */ + ASSERT(!(zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR)); + /* * This is a logical I/O that wants to reexecute. * @@ -5398,6 +5486,7 @@ static zio_pipe_stage_t *zio_pipeline[] = { zio_vdev_io_done, zio_vdev_io_assess, zio_checksum_verify, + zio_dio_checksum_verify, zio_done }; diff --git a/scripts/Makefile.am b/scripts/Makefile.am index b43bf97dbdf4..7d9cef83d2c6 100644 --- a/scripts/Makefile.am +++ b/scripts/Makefile.am @@ -28,7 +28,6 @@ endif dist_noinst_DATA += \ %D%/cstyle.pl \ - %D%/enum-extract.pl \ %D%/update_authors.pl \ %D%/zfs2zol-patch.sed \ %D%/zol2zfs-patch.sed diff --git a/scripts/enum-extract.pl b/scripts/enum-extract.pl deleted file mode 100755 index 5dc2e3455145..000000000000 --- a/scripts/enum-extract.pl +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/env perl - -my $usage = <) { - # comments - s/\/\*.*\*\///; - if (m/\/\*/) { - while ($_ .= <>) { - last if s/\/\*.*\*\///s; - } - } - - # preprocessor stuff - next if /^#/; - - # find our enum - $in_enum = 1 if s/^\s*enum\s+${enum}(?:\s|$)//; - next unless $in_enum; - - # remove explicit values - s/\s*=[^,]+,/,/g; - - # extract each identifier - while (m/\b([a-z_][a-z0-9_]*)\b/ig) { - print $1, "\n"; - } - - # - # don't exit: there may be multiple versions of the same enum, e.g. - # inside different #ifdef blocks. Let's explicitly return all of - # them and let external tooling deal with it. - # - $in_enum = 0 if m/}\s*;/; -} - -exit 0; diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index 088e46ce578c..f89a4b3e0aae 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -693,6 +693,14 @@ tests = ['zfs_allow_001_pos', 'zfs_allow_002_pos', 'zfs_allow_003_pos', 'zfs_unallow_007_neg', 'zfs_unallow_008_neg'] tags = ['functional', 'delegate'] +[tests/functional/direct] +tests = ['dio_aligned_block', 'dio_async_always', 'dio_async_fio_ioengines', + 'dio_compression', 'dio_dedup', 'dio_encryption', 'dio_grow_block', + 'dio_max_recordsize', 'dio_mixed', 'dio_mmap', 'dio_overwrites', + 'dio_property', 'dio_random', 'dio_recordsize', 'dio_unaligned_block', + 'dio_unaligned_filesize'] +tags = ['functional', 'direct'] + [tests/functional/exec] tests = ['exec_001_pos', 'exec_002_neg'] tags = ['functional', 'exec'] @@ -735,7 +743,7 @@ pre = tags = ['functional', 'inheritance'] [tests/functional/io] -tests = ['sync', 'psync', 'posixaio', 'mmap'] +tests = ['mmap', 'posixaio', 'psync', 'sync'] tags = ['functional', 'io'] [tests/functional/inuse] diff --git a/tests/runfiles/freebsd.run b/tests/runfiles/freebsd.run index 13696d645850..e1ae0c6b7721 100644 --- a/tests/runfiles/freebsd.run +++ b/tests/runfiles/freebsd.run @@ -30,3 +30,7 @@ tags = ['functional', 'cli_root', 'zfs_jail'] tests = ['pam_basic', 'pam_change_unmounted', 'pam_nounmount', 'pam_recursive', 'pam_short_password'] tags = ['functional', 'pam'] + +[tests/functional/direct:FreeBSD] +tests = ['dio_write_stable_pages'] +tags = ['functional', 'direct'] diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 5817e649003c..4613c895b0cd 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -102,6 +102,10 @@ tags = ['functional', 'compression'] tests = ['devices_001_pos', 'devices_002_neg', 'devices_003_pos'] tags = ['functional', 'devices'] +[tests/functional/direct:Linux] +tests = ['dio_write_verify'] +tags = ['functional', 'direct'] + [tests/functional/events:Linux] tests = ['events_001_pos', 'events_002_pos', 'zed_rc_filter', 'zed_fd_spill', 'zed_cksum_reported', 'zed_cksum_config', 'zed_io_config', diff --git a/tests/zfs-tests/cmd/.gitignore b/tests/zfs-tests/cmd/.gitignore index 0ed0a69eb013..e9e3b8f73e42 100644 --- a/tests/zfs-tests/cmd/.gitignore +++ b/tests/zfs-tests/cmd/.gitignore @@ -16,6 +16,7 @@ /getversion /largest_file /libzfs_input_check +/manipulate_user_buffer /mkbusy /mkfile /mkfiles diff --git a/tests/zfs-tests/cmd/Makefile.am b/tests/zfs-tests/cmd/Makefile.am index a8df06c2e990..5250e72f9fa8 100644 --- a/tests/zfs-tests/cmd/Makefile.am +++ b/tests/zfs-tests/cmd/Makefile.am @@ -60,6 +60,8 @@ scripts_zfs_tests_bin_PROGRAMS += %D%/libzfs_input_check libzfs_core.la \ libnvpair.la +scripts_zfs_tests_bin_PROGRAMS += %D%/manipulate_user_buffer +%C%_manipulate_user_buffer_LDADD = -lpthread scripts_zfs_tests_bin_PROGRAMS += %D%/mkbusy %D%/mkfile %D%/mkfiles %D%/mktree %C%_mkfile_LDADD = $(LTLIBINTL) diff --git a/tests/zfs-tests/cmd/manipulate_user_buffer.c b/tests/zfs-tests/cmd/manipulate_user_buffer.c new file mode 100644 index 000000000000..714f42200557 --- /dev/null +++ b/tests/zfs-tests/cmd/manipulate_user_buffer.c @@ -0,0 +1,272 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2022 by Triad National Security, LLC. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef MIN +#define MIN(a, b) ((a) < (b)) ? (a) : (b) +#endif + +static char *outputfile = NULL; +static int blocksize = 131072; /* 128K */ +static int wr_err_expected = 0; +static int numblocks = 100; +static char *execname = NULL; +static int print_usage = 0; +static int randompattern = 0; +static int ofd; +char *buf = NULL; + +typedef struct { + int entire_file_written; +} pthread_args_t; + +static void +usage(void) +{ + (void) fprintf(stderr, + "usage %s -o outputfile [-b blocksize] [-e wr_error_expected]\n" + " [-n numblocks] [-p randpattern] [-h help]\n" + "\n" + "Testing whether checksum verify works correctly for O_DIRECT.\n" + "when manipulating the contents of a userspace buffer.\n" + "\n" + " outputfile: File to write to.\n" + " blocksize: Size of each block to write (must be at \n" + " least >= 512).\n" + " wr_err_expected: Whether pwrite() is expected to return EIO\n" + " while manipulating the contents of the\n" + " buffer.\n" + " numblocks: Total number of blocksized blocks to\n" + " write.\n" + " randpattern: Fill data buffer with random data. Default\n" + " behavior is to fill the buffer with the \n" + " known data pattern (0xdeadbeef).\n" + " help: Print usage information and exit.\n" + "\n" + " Required parameters:\n" + " outputfile\n" + "\n" + " Default Values:\n" + " blocksize -> 131072\n" + " wr_err_expexted -> false\n" + " numblocks -> 100\n" + " randpattern -> false\n", + execname); + (void) exit(1); +} + +static void +parse_options(int argc, char *argv[]) +{ + int c; + int errflag = 0; + extern char *optarg; + extern int optind, optopt; + execname = argv[0]; + + while ((c = getopt(argc, argv, "b:ehn:o:p")) != -1) { + switch (c) { + case 'b': + blocksize = atoi(optarg); + break; + + case 'e': + wr_err_expected = 1; + break; + + case 'h': + print_usage = 1; + break; + + case 'n': + numblocks = atoi(optarg); + break; + + case 'o': + outputfile = optarg; + break; + + case 'p': + randompattern = 1; + break; + + case ':': + (void) fprintf(stderr, + "Option -%c requires an opertand\n", + optopt); + errflag++; + break; + case '?': + default: + (void) fprintf(stderr, + "Unrecognized option: -%c\n", optopt); + errflag++; + break; + } + } + + if (errflag || print_usage == 1) + (void) usage(); + + if (blocksize < 512 || outputfile == NULL || numblocks <= 0) { + (void) fprintf(stderr, + "Required paramater(s) missing or invalid.\n"); + (void) usage(); + } +} + +/* + * Write blocksize * numblocks to the file using O_DIRECT. + */ +static void * +write_thread(void *arg) +{ + size_t offset = 0; + int total_data = blocksize * numblocks; + int left = total_data; + ssize_t wrote = 0; + pthread_args_t *args = (pthread_args_t *)arg; + + while (!args->entire_file_written) { + wrote = pwrite(ofd, buf, blocksize, offset); + if (wrote != blocksize) { + if (wr_err_expected) + assert(errno == EIO); + else + exit(2); + } + + offset = ((offset + blocksize) % total_data); + left -= blocksize; + + if (left == 0) + args->entire_file_written = 1; + } + + pthread_exit(NULL); +} + +/* + * Update the buffers contents with random data. + */ +static void * +manipulate_buf_thread(void *arg) +{ + size_t rand_offset; + char rand_char; + pthread_args_t *args = (pthread_args_t *)arg; + + while (!args->entire_file_written) { + rand_offset = (rand() % blocksize); + rand_char = (rand() % (126 - 33) + 33); + buf[rand_offset] = rand_char; + } + + pthread_exit(NULL); +} + +int +main(int argc, char *argv[]) +{ + const char *datapattern = "0xdeadbeef"; + int ofd_flags = O_WRONLY | O_CREAT | O_DIRECT; + mode_t mode = S_IRUSR | S_IWUSR; + pthread_t write_thr; + pthread_t manipul_thr; + int left = blocksize; + int offset = 0; + int rc; + pthread_args_t args = { 0 }; + + parse_options(argc, argv); + + ofd = open(outputfile, ofd_flags, mode); + if (ofd == -1) { + (void) fprintf(stderr, "%s, %s\n", execname, outputfile); + perror("open"); + exit(2); + } + + int err = posix_memalign((void **)&buf, sysconf(_SC_PAGE_SIZE), + blocksize); + if (err != 0) { + (void) fprintf(stderr, + "%s: %s\n", execname, strerror(err)); + exit(2); + } + + if (!randompattern) { + /* Putting known data pattern in buffer */ + while (left) { + size_t amt = MIN(strlen(datapattern), left); + memcpy(&buf[offset], datapattern, amt); + offset += amt; + left -= amt; + } + } else { + /* Putting random data in buffer */ + for (int i = 0; i < blocksize; i++) + buf[i] = rand(); + } + + /* + * Writing using O_DIRECT while manipulating the buffer contents until + * the entire file is written. + */ + if ((rc = pthread_create(&manipul_thr, NULL, manipulate_buf_thread, + &args))) { + fprintf(stderr, "error: pthreads_create, manipul_thr, " + "rc: %d\n", rc); + exit(2); + } + + if ((rc = pthread_create(&write_thr, NULL, write_thread, &args))) { + fprintf(stderr, "error: pthreads_create, write_thr, " + "rc: %d\n", rc); + exit(2); + } + + pthread_join(write_thr, NULL); + pthread_join(manipul_thr, NULL); + + assert(args.entire_file_written == 1); + + (void) close(ofd); + + free(buf); + + return (0); +} diff --git a/tests/zfs-tests/cmd/stride_dd.c b/tests/zfs-tests/cmd/stride_dd.c index a20b26131650..e1e45794cf16 100644 --- a/tests/zfs-tests/cmd/stride_dd.c +++ b/tests/zfs-tests/cmd/stride_dd.c @@ -21,12 +21,19 @@ #include #include +static int alignment = 0; static int bsize = 0; static int count = 0; static char *ifile = NULL; static char *ofile = NULL; -static off_t stride = 0; +static off_t stride = 1; static off_t seek = 0; +static int seekbytes = 0; +static int if_o_direct = 0; +static int of_o_direct = 0; +static int skip = 0; +static int skipbytes = 0; +static int entire_file = 0; static const char *execname = "stride_dd"; static void usage(void); @@ -36,8 +43,10 @@ static void usage(void) { (void) fprintf(stderr, - "usage: %s -i inputfile -o outputfile -b blocksize -c count \n" - " -s stride [ -k seekblocks]\n" + "usage: %s -i inputfile -o outputfile -b blocksize [-c count]\n" + " [-s stride] [-k seekblocks] [-K seekbytes]\n" + " [-a alignment] [-d if_o_direct] [-D of_o_direct]\n" + " [-p skipblocks] [-P skipbytes] [-e entire_file]\n" "\n" "Simplified version of dd that supports the stride option.\n" "A stride of n means that for each block written, n - 1 blocks\n" @@ -45,16 +54,47 @@ usage(void) "means that blocks are read and written consecutively.\n" "All numeric parameters must be integers.\n" "\n" - " inputfile: File to read from\n" - " outputfile: File to write to\n" - " blocksize: Size of each block to read/write\n" - " count: Number of blocks to read/write\n" - " stride: Read/write a block then skip (stride - 1) blocks\n" - " seekblocks: Number of blocks to skip at start of output\n", + " inputfile: File to read from\n" + " outputfile: File to write to\n" + " blocksize: Size of each block to read/write\n" + " count: Number of blocks to read/write (Required" + " unless -e is used)\n" + " stride: Read/write a block then skip (stride - 1) blocks" + "\n" + " seekblocks: Number of blocks to skip at start of output\n" + " seekbytes: Treat seekblocks as byte count\n" + " alignment: Alignment passed to posix_memalign() (default" + " PAGE_SIZE)\n" + " if_o_direct: Use O_DIRECT with inputfile (default no O_DIRECT)" + "\n" + " of_o_direct: Use O_DIRECT with outputfile (default no " + " O_DIRECT)\n" + " skipblocks: Number of blocks to skip at start of input " + " (default 0)\n" + " skipbytes: Treat skipblocks as byte count\n" + " entire_file: When used the entire inputfile will be read and" + " count will be ignored\n", execname); (void) exit(1); } +/* + * posix_memalign() only allows for alignments which are postive, powers of two + * and a multiple of sizeof (void *). + */ +static int +invalid_alignment(int alignment) +{ + if ((alignment < 0) || (alignment & (alignment - 1)) || + ((alignment % sizeof (void *)))) { + (void) fprintf(stderr, + "Alignment must be a postive, power of two, and multiple " + "of sizeof (void *).\n"); + return (1); + } + return (0); +} + static void parse_options(int argc, char *argv[]) { @@ -62,12 +102,17 @@ parse_options(int argc, char *argv[]) int errflag = 0; execname = argv[0]; + alignment = sysconf(_SC_PAGE_SIZE); extern char *optarg; extern int optind, optopt; - while ((c = getopt(argc, argv, ":b:c:i:o:s:k:")) != -1) { + while ((c = getopt(argc, argv, "a:b:c:deDi:o:s:k:Kp:P")) != -1) { switch (c) { + case 'a': + alignment = atoi(optarg); + break; + case 'b': bsize = atoi(optarg); break; @@ -76,6 +121,18 @@ parse_options(int argc, char *argv[]) count = atoi(optarg); break; + case 'd': + if_o_direct = 1; + break; + + case 'e': + entire_file = 1; + break; + + case 'D': + of_o_direct = 1; + break; + case 'i': ifile = optarg; break; @@ -92,6 +149,18 @@ parse_options(int argc, char *argv[]) seek = atoi(optarg); break; + case 'K': + seekbytes = 1; + break; + + case 'p': + skip = atoi(optarg); + break; + + case 'P': + skipbytes = 1; + break; + case ':': (void) fprintf(stderr, "Option -%c requires an operand\n", optopt); @@ -111,64 +180,59 @@ parse_options(int argc, char *argv[]) } } - if (bsize <= 0 || count <= 0 || stride <= 0 || ifile == NULL || - ofile == NULL || seek < 0) { + if (bsize <= 0 || stride <= 0 || ifile == NULL || ofile == NULL || + seek < 0 || invalid_alignment(alignment) || skip < 0) { + (void) fprintf(stderr, + "Required parameter(s) missing or invalid.\n"); + (void) usage(); + } + + if (count <= 0 && entire_file == 0) { (void) fprintf(stderr, "Required parameter(s) missing or invalid.\n"); (void) usage(); } } -int -main(int argc, char *argv[]) +static void +read_entire_file(int ifd, int ofd, void *buf) { - int i; - int ifd; - int ofd; - void *buf; int c; - parse_options(argc, argv); - - ifd = open(ifile, O_RDONLY); - if (ifd == -1) { - (void) fprintf(stderr, "%s: %s: ", execname, ifile); - perror("open"); - exit(2); - } - - ofd = open(ofile, O_WRONLY | O_CREAT, 0666); - if (ofd == -1) { - (void) fprintf(stderr, "%s: %s: ", execname, ofile); - perror("open"); - exit(2); - } - - /* - * We use valloc because some character block devices expect a - * page-aligned buffer. - */ - int err = posix_memalign(&buf, 4096, bsize); - if (err != 0) { - (void) fprintf(stderr, - "%s: %s\n", execname, strerror(err)); - exit(2); - } - - if (seek > 0) { - if (lseek(ofd, seek * bsize, SEEK_CUR) == -1) { - perror("output lseek"); + do { + c = read(ifd, buf, bsize); + if (c < 0) { + perror("read"); exit(2); + } else if (c != 0) { + c = write(ofd, buf, bsize); + if (c < 0) { + perror("write"); + exit(2); + } + } - } + if (stride > 1) { + if (lseek(ifd, (stride - 1) * bsize, SEEK_CUR) == -1) { + perror("input lseek"); + exit(2); + } + if (lseek(ofd, (stride - 1) * bsize, SEEK_CUR) == -1) { + perror("output lseek"); + exit(2); + } + } + } while (c != 0); +} + +static void +read_on_count(int ifd, int ofd, void *buf) +{ + int i; + int c; for (i = 0; i < count; i++) { c = read(ifd, buf, bsize); - if (c != bsize) { - - perror("read"); - exit(2); - } if (c != bsize) { if (c < 0) { perror("read"); @@ -205,6 +269,71 @@ main(int argc, char *argv[]) } } } +} + +int +main(int argc, char *argv[]) +{ + int ifd; + int ofd; + int ifd_flags = O_RDONLY; + int ofd_flags = O_WRONLY | O_CREAT; + void *buf; + + parse_options(argc, argv); + + if (if_o_direct) + ifd_flags |= O_DIRECT; + + if (of_o_direct) + ofd_flags |= O_DIRECT; + + ifd = open(ifile, ifd_flags); + if (ifd == -1) { + (void) fprintf(stderr, "%s: %s: ", execname, ifile); + perror("open"); + exit(2); + } + + ofd = open(ofile, ofd_flags, 0666); + if (ofd == -1) { + (void) fprintf(stderr, "%s: %s: ", execname, ofile); + perror("open"); + exit(2); + } + + /* + * We use valloc because some character block devices expect a + * page-aligned buffer. + */ + int err = posix_memalign(&buf, alignment, bsize); + if (err != 0) { + (void) fprintf(stderr, + "%s: %s\n", execname, strerror(err)); + exit(2); + } + + if (skip > 0) { + int skipamt = skipbytes == 1 ? skip : skip * bsize; + if (lseek(ifd, skipamt, SEEK_CUR) == -1) { + perror("input lseek"); + exit(2); + } + } + + if (seek > 0) { + int seekamt = seekbytes == 1 ? seek : seek * bsize; + if (lseek(ofd, seekamt, SEEK_CUR) == -1) { + perror("output lseek"); + exit(2); + } + } + + if (entire_file == 1) + read_entire_file(ifd, ofd, buf); + else + read_on_count(ifd, ofd, buf); + free(buf); (void) close(ofd); diff --git a/tests/zfs-tests/include/commands.cfg b/tests/zfs-tests/include/commands.cfg index 19770138bf14..934aca6f918b 100644 --- a/tests/zfs-tests/include/commands.cfg +++ b/tests/zfs-tests/include/commands.cfg @@ -200,6 +200,7 @@ export ZFSTEST_FILES='badsend getversion largest_file libzfs_input_check + manipulate_user_buffer mkbusy mkfile mkfiles diff --git a/tests/zfs-tests/include/libtest.shlib b/tests/zfs-tests/include/libtest.shlib index a2f42999a31e..1c467ca65dcc 100644 --- a/tests/zfs-tests/include/libtest.shlib +++ b/tests/zfs-tests/include/libtest.shlib @@ -3474,6 +3474,18 @@ function md5digest esac } +# +# Compare the MD5 digest of two files. +# +function cmp_md5s { + typeset file1=$1 + typeset file2=$2 + + typeset sum1=$(md5digest $file1) + typeset sum2=$(md5digest $file2) + test "$sum1" = "$sum2" +} + # # Compute SHA256 digest for given file or stdin if no file given. # Note: file path must not contain spaces diff --git a/tests/zfs-tests/include/tunables.cfg b/tests/zfs-tests/include/tunables.cfg index 96943421f84c..9f436eb4026e 100644 --- a/tests/zfs-tests/include/tunables.cfg +++ b/tests/zfs-tests/include/tunables.cfg @@ -93,6 +93,7 @@ VDEV_FILE_LOGICAL_ASHIFT vdev.file.logical_ashift vdev_file_logical_ashift VDEV_FILE_PHYSICAL_ASHIFT vdev.file.physical_ashift vdev_file_physical_ashift VDEV_MAX_AUTO_ASHIFT vdev.max_auto_ashift zfs_vdev_max_auto_ashift VDEV_MIN_MS_COUNT vdev.min_ms_count zfs_vdev_min_ms_count +VDEV_DIRECT_WR_VERIFY vdev.direct_write_verify zfs_vdev_direct_write_verify VDEV_VALIDATE_SKIP vdev.validate_skip vdev_validate_skip VOL_INHIBIT_DEV UNSUPPORTED zvol_inhibit_dev VOL_MODE vol.mode zvol_volmode @@ -100,6 +101,7 @@ VOL_RECURSIVE vol.recursive UNSUPPORTED VOL_USE_BLK_MQ UNSUPPORTED zvol_use_blk_mq BCLONE_ENABLED bclone_enabled zfs_bclone_enabled BCLONE_WAIT_DIRTY bclone_wait_dirty zfs_bclone_wait_dirty +DIO_ENABLED dio_enabled zfs_dio_enabled XATTR_COMPAT xattr_compat zfs_xattr_compat ZEVENT_LEN_MAX zevent.len_max zfs_zevent_len_max ZEVENT_RETAIN_MAX zevent.retain_max zfs_zevent_retain_max diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am index bbeabc6dfb42..053a2c09f649 100644 --- a/tests/zfs-tests/tests/Makefile.am +++ b/tests/zfs-tests/tests/Makefile.am @@ -265,6 +265,8 @@ nobase_dist_datadir_zfs_tests_tests_DATA += \ functional/delegate/delegate_common.kshlib \ functional/devices/devices.cfg \ functional/devices/devices_common.kshlib \ + functional/direct/dio.cfg \ + functional/direct/dio.kshlib \ functional/events/events.cfg \ functional/events/events_common.kshlib \ functional/fault/fault.cfg \ @@ -1458,6 +1460,26 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/devices/devices_002_neg.ksh \ functional/devices/devices_003_pos.ksh \ functional/devices/setup.ksh \ + functional/direct/dio_aligned_block.ksh \ + functional/direct/dio_async_always.ksh \ + functional/direct/dio_async_fio_ioengines.ksh \ + functional/direct/dio_compression.ksh \ + functional/direct/dio_dedup.ksh \ + functional/direct/dio_encryption.ksh \ + functional/direct/dio_grow_block.ksh \ + functional/direct/dio_max_recordsize.ksh \ + functional/direct/dio_mixed.ksh \ + functional/direct/dio_mmap.ksh \ + functional/direct/dio_overwrites.ksh \ + functional/direct/dio_property.ksh \ + functional/direct/dio_random.ksh \ + functional/direct/dio_recordsize.ksh \ + functional/direct/dio_unaligned_block.ksh \ + functional/direct/dio_unaligned_filesize.ksh \ + functional/direct/dio_write_verify.ksh \ + functional/direct/dio_write_stable_pages.ksh \ + functional/direct/setup.ksh \ + functional/direct/cleanup.ksh \ functional/dos_attributes/cleanup.ksh \ functional/dos_attributes/read_dos_attrs_001.ksh \ functional/dos_attributes/setup.ksh \ diff --git a/tests/zfs-tests/tests/functional/cache/cache_012_pos.ksh b/tests/zfs-tests/tests/functional/cache/cache_012_pos.ksh index 945db71bf113..20498440bea7 100755 --- a/tests/zfs-tests/tests/functional/cache/cache_012_pos.ksh +++ b/tests/zfs-tests/tests/functional/cache/cache_012_pos.ksh @@ -75,7 +75,7 @@ export PERF_COMPPERCENT=66 export PERF_COMPCHUNK=0 export BLOCKSIZE=128K export SYNC_TYPE=0 -export DIRECT=1 +export DIRECT=0 export FILE_SIZE=$(( floor($fill_mb / $NUMJOBS) )) log_must set_tunable32 L2ARC_WRITE_MAX $(( $VCACHE_SZ * 2 )) diff --git a/tests/zfs-tests/tests/functional/compression/l2arc_compressed_arc.ksh b/tests/zfs-tests/tests/functional/compression/l2arc_compressed_arc.ksh index 57f6b6a0242b..1d3cbfc79ee6 100755 --- a/tests/zfs-tests/tests/functional/compression/l2arc_compressed_arc.ksh +++ b/tests/zfs-tests/tests/functional/compression/l2arc_compressed_arc.ksh @@ -36,7 +36,7 @@ export PERF_COMPPERCENT=66 export PERF_COMPCHUNK=0 export BLOCKSIZE=128K export SYNC_TYPE=0 -export DIRECT=1 +export DIRECT=0 # # DESCRIPTION: diff --git a/tests/zfs-tests/tests/functional/compression/l2arc_encrypted.ksh b/tests/zfs-tests/tests/functional/compression/l2arc_encrypted.ksh index f7b8a4b950d5..460c95bb6051 100755 --- a/tests/zfs-tests/tests/functional/compression/l2arc_encrypted.ksh +++ b/tests/zfs-tests/tests/functional/compression/l2arc_encrypted.ksh @@ -37,7 +37,7 @@ export PERF_COMPPERCENT=66 export PERF_COMPCHUNK=0 export BLOCKSIZE=128K export SYNC_TYPE=0 -export DIRECT=1 +export DIRECT=0 # # DESCRIPTION: diff --git a/tests/zfs-tests/tests/functional/compression/l2arc_encrypted_no_compressed_arc.ksh b/tests/zfs-tests/tests/functional/compression/l2arc_encrypted_no_compressed_arc.ksh index 0838b2c93e68..2f352e2af5d4 100755 --- a/tests/zfs-tests/tests/functional/compression/l2arc_encrypted_no_compressed_arc.ksh +++ b/tests/zfs-tests/tests/functional/compression/l2arc_encrypted_no_compressed_arc.ksh @@ -37,7 +37,7 @@ export PERF_COMPPERCENT=66 export PERF_COMPCHUNK=0 export BLOCKSIZE=128K export SYNC_TYPE=0 -export DIRECT=1 +export DIRECT=0 # # DESCRIPTION: diff --git a/tests/zfs-tests/tests/functional/direct/cleanup.ksh b/tests/zfs-tests/tests/functional/direct/cleanup.ksh new file mode 100755 index 000000000000..75fe97f923d2 --- /dev/null +++ b/tests/zfs-tests/tests/functional/direct/cleanup.ksh @@ -0,0 +1,37 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2021 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib + +verify_runnable "global" + +default_cleanup_noexit + +if tunable_exists DIO_ENABLED ; then + log_must restore_tunable DIO_ENABLED +fi + +log_pass diff --git a/tests/zfs-tests/tests/functional/direct/dio.cfg b/tests/zfs-tests/tests/functional/direct/dio.cfg new file mode 100644 index 000000000000..6472610d7b41 --- /dev/null +++ b/tests/zfs-tests/tests/functional/direct/dio.cfg @@ -0,0 +1,26 @@ +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2018 by Lawrence Livermore National Security, LLC. +# + +DIO_VDEV1=$TEST_BASE_DIR/file1 +DIO_VDEV2=$TEST_BASE_DIR/file2 +DIO_VDEV3=$TEST_BASE_DIR/file3 +DIO_VDEVS="$DIO_VDEV1 $DIO_VDEV2 $DIO_VDEV3" + +DIO_FILESIZE=4M +DIO_BS=128K diff --git a/tests/zfs-tests/tests/functional/direct/dio.kshlib b/tests/zfs-tests/tests/functional/direct/dio.kshlib new file mode 100644 index 000000000000..3a70cf293967 --- /dev/null +++ b/tests/zfs-tests/tests/functional/direct/dio.kshlib @@ -0,0 +1,331 @@ +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2021 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/direct/dio.cfg + +function dio_cleanup +{ + if poolexists $TESTPOOL1; then + destroy_pool $TESTPOOL1 + fi + + rm -f $DIO_VDEVS +} + +# +# Generate an IO workload using fio and then verify the resulting data. +# +function dio_and_verify # mode file-size block-size directory ioengine extra-args +{ + typeset mode=$1 + typeset size=$2 + typeset bs=$3 + typeset mntpnt=$4 + typeset ioengine=$5 + typeset extra_args=$6 + + # Invoke an fio workload via Direct I/O and verify with Direct I/O. + log_must fio --directory=$mntpnt --name=direct-$mode \ + --rw=$mode --size=$size --bs=$bs --direct=1 --numjobs=1 \ + --verify=sha1 --ioengine=$ioengine --fallocate=none \ + --group_reporting --minimal --do_verify=1 $extra_args + + # Now just read back the file without Direct I/O into the ARC as an + # additional verfication step. + log_must fio --directory=$mntpnt --name=direct-$mode \ + --rw=read --size=$size --bs=$bs --direct=0 --numjobs=1 \ + --ioengine=$ioengine --group_reporting --minimal + + log_must rm -f "$mntpnt/direct-*" +} + +# +# Get zpool status -d checksum verify failures +# +function get_zpool_status_chksum_verify_failures # pool_name vdev_type +{ + typeset pool=$1 + typeset vdev_type=$2 + + if [[ "$vdev_type" == "stripe" ]]; then + val=$(zpool status -dp $pool | \ + awk '{s+=$6} END {print s}' ) + elif [[ "$vdev_type" == "mirror" || "$vdev_type" == "raidz" || + "$vdev_type" == "draid" ]]; then + val=$(zpool status -dp $pool | \ + awk -v d="$vdev_type" '$0 ~ d {print $6}' ) + else + log_fail "Unsupported VDEV type in \ + get_zpool_status_chksum_verify_failures(): $vdev_type" + fi + echo "$val" +} + +# +# Get ZED dio_verify events +# +function get_zed_dio_verify_events # pool +{ + typeset pool=$1 + + val=$(zpool events $pool | grep -c dio_verify) + + echo "$val" +} + +# +# Checking for checksum verify write failures with: +# zpool status -d +# zpool events +# After getting that counts will clear the out the ZPool errors and events +# +function check_dio_write_chksum_verify_failures # pool vdev_type expect_errors +{ + typeset pool=$1 + typeset vdev_type=$2 + typeset expect_errors=$3 + typeset note_str="expecting none" + + if [[ $expect_errors -ne 0 ]]; then + note_str="expecting some" + fi + + log_note "Checking for Direct I/O write checksum verify errors \ + $note_str on ZPool: $pool" + + status_failures=$(get_zpool_status_chksum_verify_failures $pool $vdev_type) + zed_dio_verify_events=$(get_zed_dio_verify_events $pool) + + if [[ $expect_errors -ne 0 ]]; then + if [[ $status_failures -eq 0 || + $zed_dio_verify_events -eq 0 ]]; then + zpool status -dp $pool + zpool events $pool + log_fail "Checksum verifies in zpool status -d \ + $status_failures. ZED dio_verify events \ + $zed_dio_verify_events. Neither should be 0." + fi + else + if [[ $status_failures -ne 0 || + $zed_dio_verify_events -ne 0 ]]; then + zpool status -dp $pool + zpool events $pool + log_fail "Checksum verifies in zpool status -d \ + $status_failures. ZED dio_verify events \ + $zed_dio_verify_events. Both should be zero." + fi + fi + + log_must zpool clear $pool + log_must zpool events -c + +} + +# +# Get the value of a counter from +# Linux: /proc/spl/kstat/zfs/$pool/iostats file. +# FreeBSD: kstat.zfs.$pool.msic.iostats.$stat +# +function get_iostats_stat # pool stat +{ + typeset pool=$1 + typeset stat=$2 + + if is_linux; then + iostats_file=/proc/spl/kstat/zfs/$pool/iostats + val=$(grep -m1 "$stat" $iostats_file | awk '{ print $3 }') + else + val=$(sysctl -n kstat.zfs.$pool.misc.iostats.$stat) + fi + if [[ -z "$val" ]]; then + log_fail "Unable to read $stat counter" + fi + + echo "$val" +} + +# +# Evict any buffered blocks by overwritting them using an O_DIRECT request. +# +function evict_blocks +{ + typeset pool=$1 + typeset file=$2 + typeset size=$3 + + log_must stride_dd -i /dev/urandom -o $file -b $size -c 1 -D +} + +# +# Perform FIO Direct I/O writes to a file with the given arguments. +# Then verify thae minimum expected number of blocks were written as +# Direct I/O. +# +function verify_dio_write_count #pool bs size mnpnt +{ + typeset pool=$1 + typeset bs=$2 + typeset size=$3 + typeset mntpnt=$4 + typeset dio_wr_expected=$(((size / bs) -1)) + + log_note "Checking for $dio_wr_expected Direct I/O writes" + + prev_dio_wr=$(get_iostats_stat $pool direct_write_count) + dio_and_verify write $size $bs $mntpnt "sync" + curr_dio_wr=$(get_iostats_stat $pool direct_write_count) + dio_wr_actual=$((curr_dio_wr - prev_dio_wr)) + + if [[ $dio_wr_actual -lt $dio_wr_expected ]]; then + if is_linux; then + cat /proc/spl/kstat/zfs/$pool/iostats + else + sysctl kstat.zfs.$pool.misc.iostats + fi + log_fail "Direct writes $dio_wr_actual of $dio_wr_expected" + fi +} + +# +# Perform a stride_dd write command to the file with the given arguments. +# Then verify the minimum expected number of blocks were written as either +# buffered IO (by the ARC), or Direct I/O to the application (dd). +# +function check_write # pool file bs count seek flags buf_wr dio_wr +{ + typeset pool=$1 + typeset file=$2 + typeset bs=$3 + typeset count=$4 + typeset seek=$5 + typeset flags=$6 + typeset buf_wr_expect=$7 + typeset dio_wr_expect=$8 + + log_note "Checking $count * $bs write(s) at offset $seek, $flags" + + prev_buf_wr=$(get_iostats_stat $pool arc_write_count) + prev_dio_wr=$(get_iostats_stat $pool direct_write_count) + + log_must stride_dd -i /dev/urandom -o $file -b $bs -c $count \ + -k $seek $flags + + curr_buf_wr=$(get_iostats_stat $pool arc_write_count) + buf_wr_actual=$((curr_buf_wr - prev_buf_wr)) + + curr_dio_wr=$(get_iostats_stat $pool direct_write_count) + dio_wr_actual=$((curr_dio_wr - prev_dio_wr)) + + if [[ $buf_wr_actual -lt $buf_wr_expect ]]; then + if is_linux; then + cat /proc/spl/kstat/zfs/$pool/iostats + else + sysctl kstat.zfs.$pool.misc.iostats + fi + log_fail "Buffered writes $buf_wr_actual of $buf_wr_expect" + fi + + if [[ $dio_wr_actual -lt $dio_wr_expect ]]; then + if is_linux; then + cat /proc/spl/kstat/zfs/$pool/iostats + else + sysctl kstat.zfs.$pool.misc.iostats + fi + log_fail "Direct writes $dio_wr_actual of $dio_wr_expect" + fi +} + +# +# Perform a stride_dd read command to the file with the given arguments. +# Then verify the minimum expected number of blocks were read as either +# buffered IO (by the ARC), or Direct I/O to the application (dd). +# +function check_read # pool file bs count skip flags buf_rd dio_rd +{ + typeset pool=$1 + typeset file=$2 + typeset bs=$3 + typeset count=$4 + typeset skip=$5 + typeset flags=$6 + typeset buf_rd_expect=$7 + typeset dio_rd_expect=$8 + + log_note "Checking $count * $bs read(s) at offset $skip, $flags" + + prev_buf_rd=$(get_iostats_stat $pool arc_read_count) + prev_dio_rd=$(get_iostats_stat $pool direct_read_count) + + log_must stride_dd -i $file -o /dev/null -b $bs -c $count \ + -p $skip $flags + + curr_buf_rd=$(get_iostats_stat $pool arc_read_count) + buf_rd_actual=$((curr_buf_rd - prev_buf_rd)) + + curr_dio_rd=$(get_iostats_stat $pool direct_read_count) + dio_rd_actual=$((curr_dio_rd - prev_dio_rd)) + + if [[ $buf_rd_actual -lt $buf_rd_expect ]]; then + if is_linux; then + cat /proc/spl/kstat/zfs/$pool/iostats + else + sysctl kstat.zfs.$pool.misc.iostats + fi + log_fail "Buffered reads $buf_rd_actual of $buf_rd_expect" + fi + + if [[ $dio_rd_actual -lt $dio_rd_expect ]]; then + if is_linux; then + cat /proc/spl/kstat/zfs/$pool/iostats + else + sysctl kstat.zfs.$pool.misc.iostats + fi + log_fail "Direct reads $dio_rd_actual of $dio_rd_expect" + fi +} + +function get_file_size +{ + typeset filename="$1" + + if is_linux; then + filesize=$(stat -c %s $filename) + else + filesize=$(stat -s $filename | awk '{print $8}' | grep -o '[0-9]\+') + fi + + echo $filesize +} + +function do_truncate_reduce +{ + typeset filename=$1 + typeset size=$2 + + filesize=$(get_file_size $filename) + eval "echo original filesize: $filesize" + if is_linux; then + truncate $filename -s $((filesize - size)) + else + truncate -s -$size $filename + fi + filesize=$(get_file_size $filename) + eval "echo new filesize after truncate: $filesize" +} diff --git a/tests/zfs-tests/tests/functional/direct/dio_aligned_block.ksh b/tests/zfs-tests/tests/functional/direct/dio_aligned_block.ksh new file mode 100755 index 000000000000..e26fbdfc2569 --- /dev/null +++ b/tests/zfs-tests/tests/functional/direct/dio_aligned_block.ksh @@ -0,0 +1,115 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2021 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/direct/dio.cfg +. $STF_SUITE/tests/functional/direct/dio.kshlib + +# +# DESCRIPTION: +# Verify the number direct/buffered requests for (un)aligned access +# +# STRATEGY: +# 1. Create a multi-block file +# 2. Perform various (un)aligned accesses and verify the result. +# + +verify_runnable "global" + +function cleanup +{ + zfs set recordsize=$rs $TESTPOOL/$TESTFS + log_must rm -f $tmp_file +} + +log_onexit cleanup + +log_assert "Verify the number direct/buffered requests for unaligned access" + +mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS) + +rs=$(get_prop recordsize $TESTPOOL/$TESTFS) +log_must zfs set recordsize=128k $TESTPOOL/$TESTFS + +tmp_file=$mntpnt/tmp_file +file_size=$((rs * 8)) + +log_must stride_dd -i /dev/urandom -o $tmp_file -b $file_size -c 1 + +# N recordsize aligned writes which do not span blocks +check_write $TESTPOOL $tmp_file $rs 1 0 "-D" 0 1 +check_write $TESTPOOL $tmp_file $rs 2 0 "-D" 0 2 +check_write $TESTPOOL $tmp_file $rs 4 0 "-D" 0 4 +check_write $TESTPOOL $tmp_file $rs 8 0 "-D" 0 8 + +# 1 recordsize aligned write which spans multiple blocks at various offsets +check_write $TESTPOOL $tmp_file $((rs * 2)) 1 0 "-D" 0 2 +check_write $TESTPOOL $tmp_file $((rs * 2)) 1 1 "-D" 0 2 +check_write $TESTPOOL $tmp_file $((rs * 2)) 1 2 "-D" 0 2 +check_write $TESTPOOL $tmp_file $((rs * 2)) 1 3 "-D" 0 2 +check_write $TESTPOOL $tmp_file $((rs * 4)) 1 0 "-D" 0 4 +check_write $TESTPOOL $tmp_file $((rs * 4)) 1 1 "-D" 0 4 +check_write $TESTPOOL $tmp_file $((rs * 8)) 1 0 "-D" 0 8 + +# sub-blocksize unaligned writes which do not span blocks. +check_write $TESTPOOL $tmp_file $((rs / 2)) 1 0 "-D" 1 0 +check_write $TESTPOOL $tmp_file $((rs / 2)) 1 1 "-D" 1 0 +check_write $TESTPOOL $tmp_file $((rs / 2)) 1 2 "-D" 1 0 +check_write $TESTPOOL $tmp_file $((rs / 2)) 1 3 "-D" 1 0 + +# large unaligned writes which span multiple blocks +check_write $TESTPOOL $tmp_file $((rs * 2)) 1 $((rs / 2)) "-D -K" 2 1 +check_write $TESTPOOL $tmp_file $((rs * 4)) 2 $((rs / 4)) "-D -K" 4 6 + +# evict any cached blocks by overwriting with O_DIRECT +evict_blocks $TESTPOOL $tmp_file $file_size + +# recordsize aligned reads which do not span blocks +check_read $TESTPOOL $tmp_file $rs 1 0 "-d" 0 1 +check_read $TESTPOOL $tmp_file $rs 2 0 "-d" 0 2 +check_read $TESTPOOL $tmp_file $rs 4 0 "-d" 0 4 +check_read $TESTPOOL $tmp_file $rs 8 0 "-d" 0 8 + +# 1 recordsize aligned read which spans multiple blocks at various offsets +check_read $TESTPOOL $tmp_file $((rs * 2)) 1 0 "-d" 0 2 +check_read $TESTPOOL $tmp_file $((rs * 2)) 1 1 "-d" 0 2 +check_read $TESTPOOL $tmp_file $((rs * 2)) 1 2 "-d" 0 2 +check_read $TESTPOOL $tmp_file $((rs * 2)) 1 3 "-d" 0 2 +check_read $TESTPOOL $tmp_file $((rs * 4)) 1 0 "-d" 0 4 +check_read $TESTPOOL $tmp_file $((rs * 4)) 1 1 "-d" 0 4 +check_read $TESTPOOL $tmp_file $((rs * 8)) 1 0 "-d" 0 8 + +# sub-blocksize unaligned reads which do not span blocks. +check_read $TESTPOOL $tmp_file $((rs / 2)) 1 0 "-d" 0 1 +check_read $TESTPOOL $tmp_file $((rs / 2)) 1 1 "-d" 0 1 +check_read $TESTPOOL $tmp_file $((rs / 2)) 1 2 "-d" 0 1 +check_read $TESTPOOL $tmp_file $((rs / 2)) 1 3 "-d" 0 1 + +# large unaligned reads which span multiple blocks +check_read $TESTPOOL $tmp_file $((rs * 2)) 1 $((rs / 2)) "-d -P" 0 3 +check_read $TESTPOOL $tmp_file $((rs * 4)) 1 $((rs / 4)) "-d -P" 0 5 + +log_pass "Verify the number direct/buffered requests for (un)aligned access" diff --git a/tests/zfs-tests/tests/functional/direct/dio_async_always.ksh b/tests/zfs-tests/tests/functional/direct/dio_async_always.ksh new file mode 100755 index 000000000000..27fd66ccd216 --- /dev/null +++ b/tests/zfs-tests/tests/functional/direct/dio_async_always.ksh @@ -0,0 +1,68 @@ +#!/bin/ksh -p +# +# DDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2021 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/direct/dio.cfg +. $STF_SUITE/tests/functional/direct/dio.kshlib + +# +# DESCRIPTION: +# Verify small async Direct I/O requests +# +# STRATEGY: +# 1. Use fio to issue small read/write requests. Writes are +# smaller than the block size and thus will be buffered, +# reads satisfy the minimum alignment and will be direct. +# + +verify_runnable "global" + +function cleanup +{ + zfs set direct=standard $TESTPOOL/$TESTFS + rm $tmp_file +} + +log_assert "Verify direct=always mixed small async requests" + +log_onexit cleanup + +log_must zfs set direct=always $TESTPOOL/$TESTFS + +mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS) +tmp_file=$mntpnt/tmp_file +page_size=$(getconf PAGESIZE) +file_size=1G +runtime=10 + +log_must truncate -s $file_size $tmp_file + +log_must fio --filename=$tmp_file --name=always-randrw \ + --rw=randwrite --bs=$page_size --size=$file_size --numjobs=1 \ + --ioengine=posixaio --fallocate=none --iodepth=4 --verify=sha1 \ + --group_reporting --minimal --runtime=$runtime --time_based + +log_pass "Verify direct=always mixed small async requests" diff --git a/tests/zfs-tests/tests/functional/direct/dio_async_fio_ioengines.ksh b/tests/zfs-tests/tests/functional/direct/dio_async_fio_ioengines.ksh new file mode 100755 index 000000000000..5492a5a90584 --- /dev/null +++ b/tests/zfs-tests/tests/functional/direct/dio_async_fio_ioengines.ksh @@ -0,0 +1,106 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2022 by Triad National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/include/properties.shlib +. $STF_SUITE/tests/functional/direct/dio.cfg +. $STF_SUITE/tests/functional/direct/dio.kshlib + +# +# DESCRIPTION: +# Verify FIO async engines work using Direct I/O. +# +# STRATEGY: +# 1. Select a FIO async ioengine +# 2. Start sequntial Direct I/O and verify with buffered I/O +# 3. Start mixed Direct I/O and verify with buffered I/O +# + +verify_runnable "global" + +function cleanup +{ + log_must rm -f "$mntpnt/direct-*" +} + +function check_fio_ioengine +{ + fio --ioengine=io_uring --parse-only > /dev/null 2>&1 + return $? +} + +log_assert "Verify FIO async ioengines work using Direct I/O." + +log_onexit cleanup + +typeset -a async_ioengine_args=("--iodepth=4" "--iodepth=4 --thread") + +mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS) +fio_async_ioengines="posixaio" + +if is_linux; then + fio_async_ioengines+=" libaio" + if $(grep -q "CONFIG_IO_URING=y" /boot/config-$(uname -r)); then + if [ -e /etc/os-release ] ; then + source /etc/os-release + if [ $PLATFORM_ID = "platform:el9" ] ; then + log_note "io_uring disabled on RHEL 9 " \ + "variants: fails with " \ + "'Operation not permitted'" + elif $(check_fio_ioengine -eq 0); then + fio_async_ioengines+=" io_uring" + else + log_note "io_uring not supported by fio and " \ + "will not be tested" + fi + else + if $(check_fio_ioengine); then + fio_async_ioengines+=" io_uring" + + else + log_note "io_uring not supported by fio and " \ + "will not be tested" + fi + fi + else + log_note "io_uring not supported by kernel will not " \ + "be tested" + + fi +fi + +for ioengine in $fio_async_ioengines; do + for ioengine_args in "${async_ioengine_args[@]}"; do + for op in "rw" "randrw" "write"; do + log_note "Checking Direct I/O with FIO async ioengine" \ + " $ioengine with args $ioengine_args --rw=$op" + dio_and_verify $op $DIO_FILESIZE $DIO_BS $mntpnt "$ioengine" \ + "$ioengine_args" + done + done +done + +log_pass "Verfied FIO async ioengines work using Direct I/O" diff --git a/tests/zfs-tests/tests/functional/direct/dio_compression.ksh b/tests/zfs-tests/tests/functional/direct/dio_compression.ksh new file mode 100755 index 000000000000..5463715d7bab --- /dev/null +++ b/tests/zfs-tests/tests/functional/direct/dio_compression.ksh @@ -0,0 +1,65 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2021 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/include/properties.shlib +. $STF_SUITE/tests/functional/direct/dio.cfg +. $STF_SUITE/tests/functional/direct/dio.kshlib + +# +# DESCRIPTION: +# Verify compression works using Direct I/O. +# +# STRATEGY: +# 1. Select a random compression algoritm +# 2. Start sequential Direct I/O and verify with buffered I/O +# 3. Start mixed Direct I/O and verify with buffered I/O +# 4. Repeat from 2 for all compression algoritms +# + +verify_runnable "global" + +function cleanup +{ + log_must rm -f "$mntpnt/direct-*" + log_must zfs set compression=off $TESTPOOL/$TESTFS +} + +log_assert "Verify compression works using Direct I/O." + +log_onexit cleanup + +mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS) +compress_args="--buffer_compress_percentage=50" + +for comp in "${compress_prop_vals[@]:1}"; do + log_must zfs set compression=$comp $TESTPOOL/$TESTFS + for op in "rw" "randrw" "write"; do + dio_and_verify $op $DIO_FILESIZE $DIO_BS $mntpnt "sync" $compress_args + done +done + +log_pass "Verfied compression works using Direct I/O" diff --git a/tests/zfs-tests/tests/functional/direct/dio_dedup.ksh b/tests/zfs-tests/tests/functional/direct/dio_dedup.ksh new file mode 100755 index 000000000000..ba2b29eeca4e --- /dev/null +++ b/tests/zfs-tests/tests/functional/direct/dio_dedup.ksh @@ -0,0 +1,63 @@ +#!/bin/ksh -p +# +# DDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2021 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/include/properties.shlib +. $STF_SUITE/tests/functional/direct/dio.cfg +. $STF_SUITE/tests/functional/direct/dio.kshlib + +# +# DESCRIPTION: +# Verify deduplication works. Deduplication is disabled when issuing +# Direct I/O writes. +# +# STRATEGY: +# 1. Enable dedup +# 2. Start sequential Direct I/O and verify with buffered I/O +# 3. Start mixed Direct IO and verify with buffered I/O +# + +verify_runnable "global" + +function cleanup +{ + log_must rm -f "$mntpnt/direct-*" + log_must zfs set dedup=off $TESTPOOL/$TESTFS +} + +log_assert "Verify deduplication works using Direct I/O." + +log_onexit cleanup + +mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS) +dedup_args="--dedupe_percentage=50" + +log_must zfs set dedup=on $TESTPOOL/$TESTFS +for op in "rw" "randrw" "write"; do + dio_and_verify $op $DIO_FILESIZE $DIO_BS $mntpnt "sync" $dedup_args +done + +log_pass "Verfied deduplication works using Direct I/O" diff --git a/tests/zfs-tests/tests/functional/direct/dio_encryption.ksh b/tests/zfs-tests/tests/functional/direct/dio_encryption.ksh new file mode 100755 index 000000000000..b6faa11970b3 --- /dev/null +++ b/tests/zfs-tests/tests/functional/direct/dio_encryption.ksh @@ -0,0 +1,62 @@ +#!/bin/ksh -p +# +# DDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2021 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/direct/dio.cfg +. $STF_SUITE/tests/functional/direct/dio.kshlib + +# +# DESCRIPTION: +# Verify encryption works using Direct I/O. +# +# STRATEGY: +# 1. Create multidisk pool. +# 2. Start some mixed readwrite Direct I/O. +# 3. Verify the results are as expected using buffered I/O. +# + +verify_runnable "global" + +log_assert "Verify encryption works using Direct I/O." + +log_onexit dio_cleanup + +log_must truncate -s $MINVDEVSIZE $DIO_VDEVS + +create_pool $TESTPOOL1 $DIO_VDEVS +log_must eval "echo 'password' | zfs create -o encryption=on \ + -o keyformat=passphrase -o keylocation=prompt -o compression=off \ + $TESTPOOL1/$TESTFS1" + +mntpnt=$(get_prop mountpoint $TESTPOOL1/$TESTFS1) + +for bs in "4k" "128k" "1m"; do + for op in "rw" "randrw" "write"; do + dio_and_verify $op $DIO_FILESIZE $bs $mntpnt "sync" + done +done + +log_pass "Verified encryption works using Direct I/O" diff --git a/tests/zfs-tests/tests/functional/direct/dio_grow_block.ksh b/tests/zfs-tests/tests/functional/direct/dio_grow_block.ksh new file mode 100755 index 000000000000..12b2f2127535 --- /dev/null +++ b/tests/zfs-tests/tests/functional/direct/dio_grow_block.ksh @@ -0,0 +1,86 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2021 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/direct/dio.cfg +. $STF_SUITE/tests/functional/direct/dio.kshlib + +# +# DESCRIPTION: +# Verify the number direct/buffered requests when growing a file +# +# STRATEGY: +# + +verify_runnable "global" + +function cleanup +{ + zfs set recordsize=$rs $TESTPOOL/$TESTFS + log_must rm -f $tmp_file +} + +log_assert "Verify the number direct/buffered requests when growing a file" + +log_onexit cleanup + +mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS) + +tmp_file=$mntpnt/tmp_file + +rs=$(get_prop recordsize $TESTPOOL/$TESTFS) +log_must zfs set recordsize=128k $TESTPOOL/$TESTFS + +# +# Verify the expected number of buffered and Direct I/O's when growing +# the first block of a file up to the maximum recordsize. +# +for bs in "8192" "16384" "32768" "65536" "131072"; do + + # When O_DIRECT is set the first write to a new file, or when the + # block size needs to be grown, it will be done as a buffered write. + check_write $TESTPOOL $tmp_file $bs 1 0 "-D" 1 0 + + # Overwriting the first block of an existing file with O_DIRECT will + # be a buffered write if less than the block size. + check_write $TESTPOOL $tmp_file 4096 1 0 "-D" 1 0 + check_write $TESTPOOL $tmp_file 4096 1 1 "-D" 1 0 + + # Overwriting the first block of an existing file with O_DIRECT will + # be a direct write as long as the block size matches. + check_write $TESTPOOL $tmp_file $bs 1 0 "-D" 0 1 + + # Evict any blocks which may be buffered before the read tests. + evict_blocks $TESTPOOL $tmp_file $bs + + # Reading the first block of an existing file with O_DIRECT will + # be a direct read for part or all of the block size. + check_read $TESTPOOL $tmp_file $bs 1 0 "-d" 0 1 + check_read $TESTPOOL $tmp_file 4096 1 0 "-d" 0 1 + check_read $TESTPOOL $tmp_file 4096 1 1 "-d" 0 1 +done + +log_pass "Verify the number direct/buffered requests when growing a file" diff --git a/tests/zfs-tests/tests/functional/direct/dio_max_recordsize.ksh b/tests/zfs-tests/tests/functional/direct/dio_max_recordsize.ksh new file mode 100755 index 000000000000..2c0ce832b1fe --- /dev/null +++ b/tests/zfs-tests/tests/functional/direct/dio_max_recordsize.ksh @@ -0,0 +1,64 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2022 by Triad National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/direct/dio.cfg +. $STF_SUITE/tests/functional/direct/dio.kshlib + +# +# DESCRIPTION: +# Verify max recordsizes are supported for Direct I/O. +# +# STRATEGY: +# 1. Create a pool from each vdev type with varying recordsizes. +# 2. Start sequential Direct I/O and verify with buffered I/O. +# + +verify_runnable "global" + +log_assert "Verify max recordsizes are supported for Direct I/O." + +log_onexit dio_cleanup + +log_must truncate -s $MINVDEVSIZE $DIO_VDEVS + +for type in "" "mirror" "raidz" "draid"; do; + for recsize in "2097152" "8388608" "16777216"; do + create_pool $TESTPOOL1 $type $DIO_VDEVS + log_must eval "zfs create \ + -o recordsize=$recsize -o compression=off \ + $TESTPOOL1/$TESTFS1" + + mntpnt=$(get_prop mountpoint $TESTPOOL1/$TESTFS1) + + verify_dio_write_count $TESTPOOL1 $recsize $((4 * recsize)) \ + $mntpnt + + destroy_pool $TESTPOOL1 + done +done + +log_pass "Verified max recordsizes are supported for Direct I/O." diff --git a/tests/zfs-tests/tests/functional/direct/dio_mixed.ksh b/tests/zfs-tests/tests/functional/direct/dio_mixed.ksh new file mode 100755 index 000000000000..6f217d91d548 --- /dev/null +++ b/tests/zfs-tests/tests/functional/direct/dio_mixed.ksh @@ -0,0 +1,107 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2021 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/include/properties.shlib +. $STF_SUITE/tests/functional/direct/dio.cfg +. $STF_SUITE/tests/functional/direct/dio.kshlib + +# +# DESCRIPTION: +# Verify mixed buffered and Direct I/O are coherent. +# +# STRATEGY: +# 1. Verify interleaved buffered and Direct I/O +# + +verify_runnable "global" + +function cleanup +{ + log_must rm -f $src_file $new_file $tmp_file +} + +log_assert "Verify mixed buffered and Direct I/O are coherent." + +log_onexit cleanup + +mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS) + +src_file=$mntpnt/src_file +new_file=$mntpnt/new_file +tmp_file=$mntpnt/tmp_file +page_size=$(getconf PAGESIZE) +file_size=1048576 + +log_must stride_dd -i /dev/urandom -o $src_file -b $file_size -c 1 + +# +# Using mixed input and output block sizes verify that buffered and +# Direct I/O can be interleaved and the result with always be coherent. +# +for ibs in "512" "$page_size" "131072"; do + for obs in "512" "$page_size" "131072"; do + iblocks=$(($file_size / $ibs)) + oblocks=$(($file_size / $obs)) + iflags="" + oflags="" + + # Only allow Direct I/O when it is at least page sized. + if [[ $ibs -ge $page_size ]]; then + iflags="-d" + fi + + if [[ $obs -ge $page_size ]]; then + oflags="-D" + fi + + # Verify buffered write followed by a direct read. + log_must stride_dd -i $src_file -o $new_file -b $obs \ + -c $oblocks + log_must stride_dd -i $new_file -o $tmp_file -b $ibs \ + -c $iblocks $iflags + log_must cmp_md5s $new_file $tmp_file + log_must rm -f $new_file $tmp_file + + # Verify direct write followed by a buffered read. + log_must stride_dd -i $src_file -o $new_file -b $obs \ + -c $oblocks $oflags + log_must stride_dd -i $new_file -o $tmp_file -b $ibs \ + -c $iblocks + log_must cmp_md5s $new_file $tmp_file + log_must rm -f $new_file $tmp_file + + # Verify direct write followed by a direct read. + log_must stride_dd -i $src_file -o $new_file -b $obs \ + -c $oblocks $oflags + log_must stride_dd -i $new_file -o $tmp_file -b $ibs \ + -c $iblocks $iflags + log_must cmp_md5s $new_file $tmp_file + log_must rm -f $new_file $tmp_file + done +done + +log_pass "Verify mixed buffered and Direct I/O are coherent." diff --git a/tests/zfs-tests/tests/functional/direct/dio_mmap.ksh b/tests/zfs-tests/tests/functional/direct/dio_mmap.ksh new file mode 100755 index 000000000000..fbd6afd7b391 --- /dev/null +++ b/tests/zfs-tests/tests/functional/direct/dio_mmap.ksh @@ -0,0 +1,92 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2021 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/direct/dio.cfg +. $STF_SUITE/tests/functional/direct/dio.kshlib + +# +# DESCRIPTION: +# Verify mixed Direct I/O and mmap I/O. +# +# STRATEGY: +# 1. Create an empty file. +# 2. Start a background Direct I/O random read/write fio to the +# file. +# 3. Start a background mmap random read/write fio to the file. +# + +verify_runnable "global" + +function cleanup +{ + zfs set recordsize=$rs $TESTPOOL/$TESTFS + log_must rm -f "$tmp_file" +} + +log_assert "Verify mixed Direct I/O and mmap I/O" + +log_onexit cleanup + +mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS) +tmp_file=$mntpnt/file +bs=$((128 * 1024)) +blocks=64 +size=$((bs * blocks)) +runtime=60 + +rs=$(get_prop recordsize $TESTPOOL/$TESTFS) +log_must zfs set recordsize=128k $TESTPOOL/$TESTFS + +log_must stride_dd -i /dev/zero -o $tmp_file -b $bs -c $blocks + +# Direct I/O writes +log_must eval "fio --filename=$tmp_file --name=direct-write \ + --rw=randwrite --size=$size --bs=$bs --direct=1 --numjobs=1 \ + --ioengine=sync --fallocate=none --group_reporting --minimal \ + --runtime=$runtime --time_based --norandommap &" + +# Direct I/O reads +log_must eval "fio --filename=$tmp_file --name=direct-read \ + --rw=randread --size=$size --bs=$bs --direct=1 --numjobs=1 \ + --ioengine=sync --fallocate=none --group_reporting --minimal \ + --runtime=$runtime --time_based --norandommap &" + +# mmap I/O writes +log_must eval "fio --filename=$tmp_file --name=mmap-write \ + --rw=randwrite --size=$size --bs=$bs --numjobs=1 \ + --ioengine=mmap --fallocate=none --group_reporting --minimal \ + --runtime=$runtime --time_based --norandommap &" + +# mmap I/O reads +log_must eval "fio --filename=$tmp_file --name=mmap-read \ + --rw=randread --size=$size --bs=$bs --numjobs=1 \ + --ioengine=mmap --fallocate=none --group_reporting --minimal \ + --runtime=$runtime --time_based --norandommap &" + +wait + +log_pass "Verfied mixed Direct I/O and mmap I/O" diff --git a/tests/zfs-tests/tests/functional/direct/dio_overwrites.ksh b/tests/zfs-tests/tests/functional/direct/dio_overwrites.ksh new file mode 100755 index 000000000000..04973fc88632 --- /dev/null +++ b/tests/zfs-tests/tests/functional/direct/dio_overwrites.ksh @@ -0,0 +1,70 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by Triad National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/direct/dio.cfg +. $STF_SUITE/tests/functional/direct/dio.kshlib + +# +# DESCRIPTION: +# Verify Direct I/O overwrite. +# +# STRATEGY: +# 1. Create an empty file. +# 2. Start a Direct I/O random write fio to the file. +# + +verify_runnable "global" + +function cleanup +{ + zfs set recordsize=$rs $TESTPOOL/$TESTFS + log_must rm -f "$tmp_file" +} + +log_assert "Verify Direct I/O overwrites" + +log_onexit cleanup + +mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS) +tmp_file=$mntpnt/file +bs=$((128 * 1024)) +blocks=64 +size=$((bs * blocks)) +runtime=60 + +rs=$(get_prop recordsize $TESTPOOL/$TESTFS) +log_must zfs set recordsize=128k $TESTPOOL/$TESTFS + +log_must stride_dd -i /dev/zero -o $tmp_file -b $bs -c $blocks + +# Direct I/O overwrites +log_must eval "fio --filename=$tmp_file --name=direct-write \ + --rw=randwrite --size=$size --bs=$bs --direct=1 --numjobs=1 \ + --ioengine=sync --fallocate=none --group_reporting --minimal \ + --runtime=$runtime --time_based --norandommap" + +log_pass "Verfied Direct I/O overwrites" diff --git a/tests/zfs-tests/tests/functional/direct/dio_property.ksh b/tests/zfs-tests/tests/functional/direct/dio_property.ksh new file mode 100755 index 000000000000..9e18f0bf787e --- /dev/null +++ b/tests/zfs-tests/tests/functional/direct/dio_property.ksh @@ -0,0 +1,127 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2021 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/direct/dio.cfg +. $STF_SUITE/tests/functional/direct/dio.kshlib + +# +# DESCRIPTION: +# Verify the direct=always|disabled|standard property +# +# STRATEGY: +# 1. Verify direct=always behavior +# 2. Verify direct=disabled behavior +# 3. Verify direct=standard behavior +# + +verify_runnable "global" + +function cleanup +{ + zfs set direct=standard $TESTPOOL/$TESTFS + log_must rm -f $tmp_file +} + +log_assert "Verify the direct=always|disabled|standard property" + +log_onexit cleanup + +mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS) +rs=$(get_prop recordsize $TESTPOOL/$TESTFS) + +tmp_file=$mntpnt/tmp_file +page_size=$(getconf PAGESIZE) +file_size=1048576 +count=8 + +# +# Check when "direct=always" any aligned IO is done as direct. +# Note that the "-D" and "-d" flags are not set in the following calls to +# stride_dd. +# +log_must zfs set direct=always $TESTPOOL/$TESTFS + +log_note "Aligned writes (buffered, then all direct)" +check_write $TESTPOOL $tmp_file $rs $count 0 "" 1 $((count - 1)) + +log_note "Aligned overwrites" +check_write $TESTPOOL $tmp_file $rs $count 0 "" 0 $count + +log_note "Sub-recordsize unaligned overwrites" +check_write $TESTPOOL $tmp_file $((rs / 2)) $((2 * count)) 0 "" $((2 * count)) 0 + +log_note "Sub-page size aligned overwrites" +check_write $TESTPOOL $tmp_file 512 $count 0 "" $count 0 +evict_blocks $TESTPOOL $tmp_file $file_size + +log_note "Aligned reads" +check_read $TESTPOOL $tmp_file $rs $count 0 "" 0 $count + +log_note "Sub-recordsize unaligned reads" +check_read $TESTPOOL $tmp_file $((rs / 2)) $((count * 2)) 0 "" 0 $((2 * count)) + +log_note "Sub-page size aligned reads (one read then ARC hits)" +check_read $TESTPOOL $tmp_file 512 $count 0 "" 1 0 + +log_must rm -f $tmp_file + + +# +# Check when "direct=disabled" there are never any direct requests. +# Note that the "-D" and "-d" flags are always set in the following calls to +# stride_dd. +# +log_must zfs set direct=disabled $TESTPOOL/$TESTFS + +log_note "Aligned writes (all buffered with an extra for create)" +check_write $TESTPOOL $tmp_file $rs $count 0 "-D" $count 0 + +log_note "Aligned overwrites" +check_write $TESTPOOL $tmp_file $rs $count 0 "-D" $count 0 + +log_note "Aligned reads (all ARC hits)" +check_read $TESTPOOL $tmp_file $rs $count 0 "-d" 0 0 + +log_must rm -f $tmp_file + + +# +# Check when "direct=standard" only requested Direct I/O occur. +# +log_must zfs set direct=standard $TESTPOOL/$TESTFS + +log_note "Aligned writes/overwrites (buffered / direct)" +check_write $TESTPOOL $tmp_file $rs $count 0 "" $count 0 +check_write $TESTPOOL $tmp_file $rs $count 0 "-D" 0 $count + +log_note "Aligned reads (buffered / direct)" +evict_blocks $TESTPOOL $tmp_file $file_size +check_read $TESTPOOL $tmp_file $rs $count 0 "" $count 0 +evict_blocks $TESTPOOL $tmp_file $file_size +check_read $TESTPOOL $tmp_file $rs $count 0 "-d" 0 $count + +log_pass "Verify the direct=always|disabled|standard property" diff --git a/tests/zfs-tests/tests/functional/direct/dio_random.ksh b/tests/zfs-tests/tests/functional/direct/dio_random.ksh new file mode 100755 index 000000000000..abe8d5c0dca1 --- /dev/null +++ b/tests/zfs-tests/tests/functional/direct/dio_random.ksh @@ -0,0 +1,82 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2021 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/direct/dio.cfg +. $STF_SUITE/tests/functional/direct/dio.kshlib + +# +# DESCRIPTION: +# Verify mixed Direct I/O and buffered I/O. A workload of random +# but correctly aligned direct read/writes is mixed with a +# concurrent workload of entirely unaligned buffered read/writes. +# +# STRATEGY: +# 1. Create an empty file. +# 2. Start a background fio randomly issuing direct read/writes. +# 3. Start a background fio randomly issuing buffered read/writes. +# + +verify_runnable "global" + +function cleanup +{ + log_must rm -f "$tmp_file" +} + +log_assert "Verify randomly sized mixed Direct I/O and buffered I/O" + +log_onexit cleanup + +mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS) +tmp_file=$mntpnt/file +bs=$((1024 * 1024)) +blocks=32 +size=$((bs * blocks)) +runtime=10 +page_size=$(getconf PAGESIZE) + +log_must stride_dd -i /dev/zero -o $tmp_file -b $bs -c $blocks + +# Direct random read/write page-aligned IO of varying sizes with +# occasional calls to fsync(2), mixed with... +log_must eval "fio --filename=$tmp_file --name=direct-rwrand \ + --rw=randrw --size=$size --offset_align=$(getconf PAGESIZE) \ + --bsrange=$page_size-1m --direct=1 --fsync=32 --numjobs=2 \ + --ioengine=sync --fallocate=none --verify=sha1 \ + --group_reporting --minimal --runtime=$runtime --time_based &" + +# Buffered random read/write entirely unaligned IO of varying sizes +# occasional calls to fsync(2). +log_must eval "fio --filename=$tmp_file --name=buffered-write \ + --rw=randrw --size=$size --offset_align=512 --bs_unaligned=1 \ + --bsrange=$page_size-1m --direct=0 --fsync=32 --numjobs=2 \ + --ioengine=sync --fallocate=none --verify=sha1 \ + --group_reporting --minimal --runtime=$runtime --time_based &" + +wait + +log_pass "Verfied randomly sized mixed Direct I/O and buffered I/O" diff --git a/tests/zfs-tests/tests/functional/direct/dio_recordsize.ksh b/tests/zfs-tests/tests/functional/direct/dio_recordsize.ksh new file mode 100755 index 000000000000..def46822130d --- /dev/null +++ b/tests/zfs-tests/tests/functional/direct/dio_recordsize.ksh @@ -0,0 +1,68 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2020 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/direct/dio.cfg +. $STF_SUITE/tests/functional/direct/dio.kshlib + +# +# DESCRIPTION: +# Verify different recordsizes are supported for Direct I/O. +# +# STRATEGY: +# 1. Create a pool from each vdev type with varying recordsizes. +# 2. Start sequential Direct I/O and verify with buffered I/O. +# 3. Start mixed Direct I/O and verify with buffered I/O. +# + +verify_runnable "global" + +log_assert "Verify different recordsizes are supported for Direct I/O." + +log_onexit dio_cleanup + +log_must truncate -s $MINVDEVSIZE $DIO_VDEVS + +for type in "" "mirror" "raidz" "draid"; do + for recsize in "1024" "4096" "128k"; do + create_pool $TESTPOOL1 $type $DIO_VDEVS + log_must eval "zfs create \ + -o recordsize=$recsize -o compression=off \ + $TESTPOOL1/$TESTFS1" + + mntpnt=$(get_prop mountpoint $TESTPOOL1/$TESTFS1) + + for bs in "4k" "128k"; do + for op in "rw" "randrw" "write"; do + dio_and_verify $op $DIO_FILESIZE $bs $mntpnt "sync" + done + done + + destroy_pool $TESTPOOL1 + done +done + +log_pass "Verified different recordsizes are supported for Direct I/O." diff --git a/tests/zfs-tests/tests/functional/direct/dio_unaligned_block.ksh b/tests/zfs-tests/tests/functional/direct/dio_unaligned_block.ksh new file mode 100755 index 000000000000..309d35ea0e6d --- /dev/null +++ b/tests/zfs-tests/tests/functional/direct/dio_unaligned_block.ksh @@ -0,0 +1,78 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2021 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/direct/dio.cfg +. $STF_SUITE/tests/functional/direct/dio.kshlib + +# +# DESCRIPTION: +# Verify failure for (un)aligned O_DIRECT +# +# STRATEGY: +# 1. Create a multi-block file +# 2. Perform (un)aligned write/read verify the result. +# + +verify_runnable "global" + +function cleanup +{ + zfs set recordsize=$rs $TESTPOOL/$TESTFS + zfs set direct=standard $TESTPOOL/$TESTFS + log_must rm -f $tmp_file +} + +log_onexit cleanup + +log_assert "Verify direct requests for (un)aligned access" + +mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS) + +rs=$(get_prop recordsize $TESTPOOL/$TESTFS) +log_must zfs set recordsize=128k $TESTPOOL/$TESTFS + +tmp_file=$mntpnt/tmp_file +file_size=$((rs * 8)) + +log_must stride_dd -i /dev/urandom -o $tmp_file -b $file_size -c 1 + +log_must zfs set direct=standard $TESTPOOL/$TESTFS +# sub-pagesize direct writes/read will always fail if direct=standard. +log_mustnot stride_dd -i /dev/urandom -o $tmp_file -b 512 -c 8 -D +log_mustnot stride_dd -i $tmp_file -o /dev/null -b 512 -c 8 -d + +log_must zfs set direct=always $TESTPOOL/$TESTFS +# sub-pagesize direct writes/read will always pass if direct=always. +log_must stride_dd -i /dev/urandom -o $tmp_file -b 512 -c 8 +log_must stride_dd -i $tmp_file -o /dev/null -b 512 -c 8 + +log_must zfs set direct=disabled $TESTPOOL/$TESTFS +# sub-pagesize direct writes/read will always pass if direct=disabled. +log_must stride_dd -i /dev/urandom -o $tmp_file -b 512 -c 8 -D +log_must stride_dd -i $tmp_file -o /dev/null -b 512 -c 8 -d + +log_pass "Verify direct requests for (un)aligned access" diff --git a/tests/zfs-tests/tests/functional/direct/dio_unaligned_filesize.ksh b/tests/zfs-tests/tests/functional/direct/dio_unaligned_filesize.ksh new file mode 100755 index 000000000000..8bb363f1a983 --- /dev/null +++ b/tests/zfs-tests/tests/functional/direct/dio_unaligned_filesize.ksh @@ -0,0 +1,91 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2022 by Triad National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/direct/dio.cfg +. $STF_SUITE/tests/functional/direct/dio.kshlib + +# +# DESCRIPTION: +# Verify Direct I/O reads can read an entire file that is not +# page-aligned in length. When a file is not page-aligned in total +# length, as much that can be read using using O_DIRECT is done so and +# the rest is read using the ARC. O_DIRECT requires page-size alignment. +# +# STRATEGY: +# 1. Write a file that is page-aligned (buffered) +# 2. Truncate the file to be 512 bytes less +# 3. Export then import the Zpool flushing out the ARC +# 4. Read back the file using O_DIRECT +# 5. Verify the file is read back with both Direct I/O and buffered I/O +# + +verify_runnable "global" + +function cleanup +{ + log_must rm -f "$filename" + log_must set recordsize=$rs $TESTPOOL/$TESTFS +} + +log_assert "Verify Direct I/O reads can read an entire file that is not \ + page-aligned" + +log_onexit cleanup + +mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS) + +rs=$(get_prop recordsize $TESTPOOL/$TESTFS) +log_must zfs set recordsize=128k $TESTPOOL/$TESTFS + +bs=$((128 * 1024)) # bs=recordsize (128k) +filename="$mntpnt/testfile.iso" + +log_must stride_dd -i /dev/urandom -o $filename -b $bs -c 2 +# Truncating file so the total length is no longer page-size aligned +log_must do_truncate_reduce $filename 512 + +# Exporting the Zpool to make sure all future reads happen from the ARC +log_must zpool export $TESTPOOL +log_must zpool import $TESTPOOL + +# Reading the file back using Direct I/O +prev_dio_read=$(get_iostats_stat $TESTPOOL direct_read_count) +prev_arc_read=$(get_iostats_stat $TESTPOOL arc_read_count) +log_must stride_dd -i $filename -o /dev/null -b $bs -e -d +curr_dio_read=$(get_iostats_stat $TESTPOOL direct_read_count) +curr_arc_read=$(get_iostats_stat $TESTPOOL arc_read_count) +total_dio_read=$((curr_dio_read - prev_dio_read)) +total_arc_read=$((curr_arc_read - prev_arc_read)) + +# We should see both Direct I/O reads an ARC read to read the entire file that +# is not page-size aligned +if [[ $total_dio_read -lt 2 ]] || [[ $total_arc_read -lt 1 ]]; then + log_fail "Expect 2 reads from Direct I/O and 1 from the ARC but \ + Direct I/O: $total_dio_read ARC: $total_arc_read" +fi + +log_pass "Verified Direct I/O read can read a none page-aligned length file" diff --git a/tests/zfs-tests/tests/functional/direct/dio_write_stable_pages.ksh b/tests/zfs-tests/tests/functional/direct/dio_write_stable_pages.ksh new file mode 100755 index 000000000000..efc9ee639184 --- /dev/null +++ b/tests/zfs-tests/tests/functional/direct/dio_write_stable_pages.ksh @@ -0,0 +1,103 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2022 by Triad National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/direct/dio.cfg +. $STF_SUITE/tests/functional/direct/dio.kshlib + +# +# DESCRIPTION: +# Verify stable pages work for O_DIRECT writes. +# +# STRATEGY: +# 1. Start a Direct I/O write workload while manipulating the user +# buffer. +# 2. Verify we can Read the contents of the file using buffered reads. +# 3. Verify there is no checksum errors reported from zpool status. +# 4. Repeat steps 1 and 2 for 3 iterations. +# 5. Repeat 1-3 but with compression disabled. +# + +verify_runnable "global" + +function cleanup +{ + log_must rm -f "$mntpnt/direct-write.iso" + check_dio_write_chksum_verify_failures $TESTPOOL "raidz" 0 +} + +log_assert "Verify stable pages work for Direct I/O writes." + +if is_linux; then + log_unsupported "Linux does not support stable pages for O_DIRECT \ + writes" +fi + +log_onexit cleanup + +ITERATIONS=3 +NUMBLOCKS=300 +BS=$((128 * 1024)) #128k +mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS) +log_must zfs set recordsize=128k $TESTPOOL/$TESTFS + +for compress in "on" "off"; +do + log_must zfs set compression=$compress $TESTPOOL/$TESTFS + + for i in $(seq 1 $ITERATIONS); do + log_note "Verifying stable pages for Direct I/O writes \ + iteration $i of $ITERATIONS" + + prev_dio_wr=$(get_iostats_stat $TESTPOOL direct_write_count) + + # Manipulate the user's buffer while running O_DIRECT write + # workload with the buffer. + log_must manipulate_user_buffer -o "$mntpnt/direct-write.iso" \ + -n $NUMBLOCKS -b $BS + + # Reading back the contents of the file + log_must stride_dd -i $mntpnt/direct-write.iso -o /dev/null \ + -b $BS -c $NUMBLOCKS + + curr_dio_wr=$(get_iostats_stat $TESTPOOL direct_write_count) + total_dio_wr=$((curr_dio_wr - prev_dio_wr)) + + log_note "Making sure we have Direct I/O writes logged" + if [[ $total_dio_wr -lt 1 ]]; then + log_fail "No Direct I/O writes $total_dio_wr" + fi + + # Making sure there are no data errors for the zpool + log_note "Making sure there are no checksum errors with the ZPool" + log_must check_pool_status $TESTPOOL "errors" \ + "No known data errors" + + log_must rm -f "$mntpnt/direct-write.iso" + done +done + +log_pass "Verified stable pages work for Direct I/O writes." diff --git a/tests/zfs-tests/tests/functional/direct/dio_write_verify.ksh b/tests/zfs-tests/tests/functional/direct/dio_write_verify.ksh new file mode 100755 index 000000000000..536459a35e6c --- /dev/null +++ b/tests/zfs-tests/tests/functional/direct/dio_write_verify.ksh @@ -0,0 +1,196 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2022 by Triad National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/direct/dio.cfg +. $STF_SUITE/tests/functional/direct/dio.kshlib + +# +# DESCRIPTION: +# Verify checksum verify works for Direct I/O writes. +# +# STRATEGY: +# 1. Set the module parameter zfs_vdev_direct_write_verify to 0. +# 2. Check that manipulating the user buffer while Direct I/O writes are +# taking place does not cause any panics with compression turned on. +# 3. Start a Direct I/O write workload while manipulating the user buffer +# without compression. +# 4. Verify there are Direct I/O write verify failures using +# zpool status -d and checking for zevents. We also make sure there +# are reported data errors when reading the file back. +# 5. Repeat steps 3 and 4 for 3 iterations. +# 6. Set zfs_vdev_direct_write_verify set to 1 and repeat 3. +# 7. Verify there are Direct I/O write verify failures using +# zpool status -d and checking for zevents. We also make sure there +# there are no reported data errors when reading the file back because +# with us checking every Direct I/O write and on checksum validation +# failure those writes will not be committed to a VDEV. +# + +verify_runnable "global" + +function cleanup +{ + # Clearing out DIO counts for Zpool + log_must zpool clear $TESTPOOL + # Clearing out dio_verify from event logs + log_must zpool events -c + log_must set_tunable32 VDEV_DIRECT_WR_VERIFY $DIO_WR_VERIFY_TUNABLE +} + +log_assert "Verify checksum verify works for Direct I/O writes." + +if is_freebsd; then + log_unsupported "FreeBSD is capable of stable pages for O_DIRECT writes" +fi + +log_onexit cleanup + +ITERATIONS=3 +NUMBLOCKS=300 +BS=$((128 * 1024)) # 128k +mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS) +typeset DIO_WR_VERIFY_TUNABLE=$(get_tunable VDEV_DIRECT_WR_VERIFY) + +# Get a list of vdevs in our pool +set -A array $(get_disklist_fullpath $TESTPOOL) + +# Get the first vdev +firstvdev=${array[0]} + +log_must zfs set recordsize=128k $TESTPOOL/$TESTFS +log_must set_tunable32 VDEV_DIRECT_WR_VERIFY 0 + +# First we will verify there are no panics while manipulating the contents of +# the user buffer during Direct I/O writes with compression. The contents +# will always be copied out of the ABD and there should never be any ABD ASSERT +# failures +log_note "Verifying no panics for Direct I/O writes with compression" +log_must zfs set compression=on $TESTPOOL/$TESTFS +prev_dio_wr=$(get_iostats_stat $TESTPOOL direct_write_count) +log_must manipulate_user_buffer -o "$mntpnt/direct-write.iso" -n $NUMBLOCKS \ + -b $BS +curr_dio_wr=$(get_iostats_stat $TESTPOOL direct_write_count) +total_dio_wr=$((curr_dio_wr - prev_dio_wr)) + +log_note "Making sure we have Direct I/O writes logged" +if [[ $total_dio_wr -lt 1 ]]; then + log_fail "No Direct I/O writes $total_dio_wr" +fi + +# Clearing out DIO counts for Zpool +log_must zpool clear $TESTPOOL +# Clearing out dio_verify from event logs +log_must zpool events -c +log_must rm -f "$mntpnt/direct-write.iso" + +# Next we will verify there are checksum errors for Direct I/O writes while +# manipulating the contents of the user pages. +log_must zfs set compression=off $TESTPOOL/$TESTFS + +for i in $(seq 1 $ITERATIONS); do + log_note "Verifying Direct I/O write checksums iteration \ + $i of $ITERATIONS with zfs_vdev_direct_write_verify=0" + + prev_dio_wr=$(get_iostats_stat $TESTPOOL direct_write_count) + log_must manipulate_user_buffer -o "$mntpnt/direct-write.iso" \ + -n $NUMBLOCKS -b $BS + + # Reading file back to verify checksum errors + filesize=$(get_file_size "$mntpnt/direct-write.iso") + num_blocks=$((filesize / BS)) + log_mustnot stride_dd -i "$mntpnt/direct-write.iso" -o /dev/null -b $BS \ + -c $num_blocks + + # Getting new Direct I/O and ARC write counts. + curr_dio_wr=$(get_iostats_stat $TESTPOOL direct_write_count) + total_dio_wr=$((curr_dio_wr - prev_dio_wr)) + + # Verifying there are checksum errors + log_note "Making sure there are checksum errors for the ZPool" + cksum=$(zpool status -P -v $TESTPOOL | awk -v v="$firstvdev" '$0 ~ v \ + {print $5}') + if [[ $cksum -eq 0 ]]; then + zpool status -P -v $TESTPOOL + log_fail "No checksum failures for ZPool $TESTPOOL" + fi + + log_note "Making sure we have Direct I/O writes logged" + if [[ $total_dio_wr -lt 1 ]]; then + log_fail "No Direct I/O writes $total_dio_wr" + fi + log_note "Making sure we have no Direct I/O write checksum verifies \ + with ZPool" + check_dio_write_chksum_verify_failures $TESTPOOL "raidz" 0 + + log_must rm -f "$mntpnt/direct-write.iso" +done + +log_must zpool status -v $TESTPOOL +log_must zpool sync $TESTPOOL + + + +# Finally we will verfiy that with checking every Direct I/O write we have no +# errors at all. +# Create the file before trying to manipulate the contents +log_must file_write -o create -f "$mntpnt/direct-write.iso" -b $BS \ + -c $NUMBLOCKS -w +log_must set_tunable32 VDEV_DIRECT_WR_VERIFY 1 + +for i in $(seq 1 $ITERATIONS); do + log_note "Verifying every Direct I/O write checksums iteration $i of \ + $ITERATIONS with zfs_vdev_direct_write_verify=1" + + prev_dio_wr=$(get_iostats_stat $TESTPOOL direct_write_count) + log_must manipulate_user_buffer -o "$mntpnt/direct-write.iso" \ + -n $NUMBLOCKS -b $BS -e + + # Reading file back to verify there no are checksum errors + filesize=$(get_file_size "$mntpnt/direct-write.iso") + num_blocks=$((filesize / BS)) + log_must stride_dd -i "$mntpnt/direct-write.iso" -o /dev/null -b $BS \ + -c $num_blocks + + # Getting new Direct I/O and ARC Write counts. + curr_dio_wr=$(get_iostats_stat $TESTPOOL direct_write_count) + total_dio_wr=$((curr_dio_wr - prev_dio_wr)) + + log_note "Making sure there are no checksum errors with the ZPool" + log_must check_pool_status $TESTPOOL "errors" "No known data errors" + + log_note "Making sure we have Direct I/O writes logged" + if [[ $total_dio_wr -lt 1 ]]; then + log_fail "No Direct I/O writes $total_dio_wr" + fi + + log_note "Making sure we have Direct I/O write checksum verifies with ZPool" + check_dio_write_chksum_verify_failures "$TESTPOOL" "raidz" 1 +done + +log_must rm -f "$mntpnt/direct-write.iso" + +log_pass "Verified checksum verify works for Direct I/O writes." diff --git a/tests/zfs-tests/tests/functional/direct/setup.ksh b/tests/zfs-tests/tests/functional/direct/setup.ksh new file mode 100755 index 000000000000..f66d6531c1db --- /dev/null +++ b/tests/zfs-tests/tests/functional/direct/setup.ksh @@ -0,0 +1,37 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2021 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +verify_runnable "global" + +if tunable_exists DIO_ENABLED ; then + log_must save_tunable DIO_ENABLED + log_must set_tunable32 DIO_ENABLED 1 +fi + +default_raidz_setup_noexit "$DISKS" +log_must zfs set compression=off $TESTPOOL/$TESTFS +log_pass diff --git a/tests/zfs-tests/tests/functional/io/setup.ksh b/tests/zfs-tests/tests/functional/io/setup.ksh index 82aaf5bc91b5..29d267115891 100755 --- a/tests/zfs-tests/tests/functional/io/setup.ksh +++ b/tests/zfs-tests/tests/functional/io/setup.ksh @@ -27,5 +27,5 @@ . $STF_SUITE/include/libtest.shlib verify_runnable "global" -default_setup "$DISKS" +default_raidz_setup "$DISKS" log_must zfs set compression=on $TESTPOOL/$TESTFS diff --git a/tests/zfs-tests/tests/functional/l2arc/l2arc.cfg b/tests/zfs-tests/tests/functional/l2arc/l2arc.cfg index 0302392f4c7f..f79123e5b2e1 100644 --- a/tests/zfs-tests/tests/functional/l2arc/l2arc.cfg +++ b/tests/zfs-tests/tests/functional/l2arc/l2arc.cfg @@ -35,4 +35,4 @@ export PERF_COMPPERCENT=66 export PERF_COMPCHUNK=0 export BLOCKSIZE=128K export SYNC_TYPE=0 -export DIRECT=1 +export DIRECT=0 diff --git a/tests/zfs-tests/tests/functional/rsend/rsend.kshlib b/tests/zfs-tests/tests/functional/rsend/rsend.kshlib index 26e7c2cc25bc..80badd27331a 100644 --- a/tests/zfs-tests/tests/functional/rsend/rsend.kshlib +++ b/tests/zfs-tests/tests/functional/rsend/rsend.kshlib @@ -155,13 +155,6 @@ function cleanup_pool fi } -function cmp_md5s { - typeset file1=$1 - typeset file2=$2 - - [ "$(md5digest $file1)" = "$(md5digest $file2)" ] -} - # # Detect if the given two filesystems have same sub-datasets # diff --git a/tests/zfs-tests/tests/functional/slog/slog_replay_fs_001.ksh b/tests/zfs-tests/tests/functional/slog/slog_replay_fs_001.ksh index 8f3585a5997f..deb963f25894 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_replay_fs_001.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_replay_fs_001.ksh @@ -77,6 +77,14 @@ log_must zfs create $TESTPOOL/$TESTFS log_must dd if=/dev/zero of=/$TESTPOOL/$TESTFS/sync \ conv=fdatasync,fsync bs=1 count=1 +# +# Create a small file for the O_DIRECT test before freezing the pool. This +# allows us to overwrite it after the pool is frozen and avoid the case +# where O_DIRECT is disabled because the first block must be grown. +# +log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/direct \ + oflag=sync,direct bs=4k count=1 + # # 2. Freeze TESTFS # @@ -140,6 +148,10 @@ log_must truncate -s 0 /$TESTPOOL/$TESTFS/truncated_file log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/large \ oflag=sync bs=128k count=64 +# TX_WRITE (O_DIRECT) +log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/direct \ + oflag=sync,direct bs=4k count=1 + # Write zeros, which compress to holes, in the middle of a file log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/holes.1 \ oflag=sync bs=128k count=8 diff --git a/tests/zfs-tests/tests/functional/trim/trim_l2arc.ksh b/tests/zfs-tests/tests/functional/trim/trim_l2arc.ksh index a93d0b3cc803..62563e0dd4cb 100755 --- a/tests/zfs-tests/tests/functional/trim/trim_l2arc.ksh +++ b/tests/zfs-tests/tests/functional/trim/trim_l2arc.ksh @@ -77,7 +77,7 @@ export PERF_COMPCHUNK=0 export RUNTIME=30 export BLOCKSIZE=128K export SYNC_TYPE=0 -export DIRECT=1 +export DIRECT=0 # Write to the pool. log_must fio $FIO_SCRIPTS/mkfiles.fio