From 136ca1de2cd42cef653293f75559223afc4cc636 Mon Sep 17 00:00:00 2001 From: Gionatan Danti Date: Sun, 3 Sep 2023 12:17:43 +0200 Subject: [PATCH] add prefetch property ZFS prefetch is currently governed by the zfs_prefetch_disable tunable. However, this is a module-wide settings - if a specific dataset benefits from prefetch, while others have issue with it, an optimal solution does not exists. This commit introduce the "prefetch" tri-state property, which enable granular control (at dataset/volume level) for prefetching. This patch does not remove the zfs_prefetch_disable, which reimains a system-wide switch for enable/disable prefetch. However, to avoid duplication, it would be preferable to deprecate and then remove the module tunable. Signed-off-by: Gionatan Danti --- include/sys/dmu_objset.h | 1 + include/sys/fs/zfs.h | 7 +++++++ lib/libzfs/libzfs.abi | 3 ++- man/man7/zfsprops.7 | 17 +++++++++++++++++ module/zcommon/zfs_prop.c | 11 +++++++++++ module/zfs/dmu_objset.c | 19 +++++++++++++++++++ module/zfs/dmu_zfetch.c | 7 ++++++- 7 files changed, 63 insertions(+), 2 deletions(-) diff --git a/include/sys/dmu_objset.h b/include/sys/dmu_objset.h index 9f6e0fdd601b..a9123e862af7 100644 --- a/include/sys/dmu_objset.h +++ b/include/sys/dmu_objset.h @@ -132,6 +132,7 @@ struct objset { zfs_logbias_op_t os_logbias; zfs_cache_type_t os_primary_cache; zfs_cache_type_t os_secondary_cache; + zfs_prefetch_type_t os_prefetch; zfs_sync_type_t os_sync; zfs_redundant_metadata_type_t os_redundant_metadata; uint64_t os_recordsize; diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index bc940e8a7929..9d7eca9784b6 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -191,6 +191,7 @@ typedef enum { ZFS_PROP_REDACTED, ZFS_PROP_REDACT_SNAPS, ZFS_PROP_SNAPSHOTS_CHANGED, + ZFS_PROP_PREFETCH, ZFS_NUM_PROPS } zfs_prop_t; @@ -543,6 +544,12 @@ typedef enum zfs_key_location { ZFS_KEYLOCATION_LOCATIONS } zfs_keylocation_t; +typedef enum { + ZFS_PREFETCH_NONE = 0, + ZFS_PREFETCH_METADATA = 1, + ZFS_PREFETCH_ALL = 2 +} zfs_prefetch_type_t; + #define DEFAULT_PBKDF2_ITERATIONS 350000 #define MIN_PBKDF2_ITERATIONS 100000 diff --git a/lib/libzfs/libzfs.abi b/lib/libzfs/libzfs.abi index c1ce3d0f67d8..083c27be7d31 100644 --- a/lib/libzfs/libzfs.abi +++ b/lib/libzfs/libzfs.abi @@ -1867,7 +1867,8 @@ - + + diff --git a/man/man7/zfsprops.7 b/man/man7/zfsprops.7 index e3674b1f8a8d..59f6404379af 100644 --- a/man/man7/zfsprops.7 +++ b/man/man7/zfsprops.7 @@ -1613,6 +1613,23 @@ If this property is set to then only metadata is cached. The default value is .Sy all . +.It Sy prefetch Ns = Ns Sy all Ns | Ns Sy none Ns | Ns Sy metadata +Controls what speculative prefetch does. +If this property is set to +.Sy all , +then both user data and metadata are prefetched. +If this property is set to +.Sy none , +then neither user data nor metadata are prefetched. +If this property is set to +.Sy metadata , +then only metadata are prefetched. +The default value is +.Sy all . +.Pp +Please note that the module parameter zfs_disable_prefetch=1 can +be used to totally disable speculative prefetch, bypassing anything +this property does. .It Sy setuid Ns = Ns Sy on Ns | Ns Sy off Controls whether the setuid bit is respected for the file system. The default value is diff --git a/module/zcommon/zfs_prop.c b/module/zcommon/zfs_prop.c index 3db6fd13f4ae..29764674a31b 100644 --- a/module/zcommon/zfs_prop.c +++ b/module/zcommon/zfs_prop.c @@ -345,6 +345,13 @@ zfs_prop_init(void) { NULL } }; + static const zprop_index_t prefetch_table[] = { + { "none", ZFS_PREFETCH_NONE }, + { "metadata", ZFS_PREFETCH_METADATA }, + { "all", ZFS_PREFETCH_ALL }, + { NULL } + }; + static const zprop_index_t sync_table[] = { { "standard", ZFS_SYNC_STANDARD }, { "always", ZFS_SYNC_ALWAYS }, @@ -453,6 +460,10 @@ zfs_prop_init(void) ZFS_CACHE_ALL, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT | ZFS_TYPE_VOLUME, "all | none | metadata", "SECONDARYCACHE", cache_table, sfeatures); + zprop_register_index(ZFS_PROP_PREFETCH, "prefetch", + ZFS_PREFETCH_ALL, PROP_INHERIT, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT | ZFS_TYPE_VOLUME, + "none | metadata | all", "PREFETCH", prefetch_table, sfeatures); zprop_register_index(ZFS_PROP_LOGBIAS, "logbias", ZFS_LOGBIAS_LATENCY, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "latency | throughput", "LOGBIAS", logbias_table, sfeatures); diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c index d134d4958f7c..76e65b5506a9 100644 --- a/module/zfs/dmu_objset.c +++ b/module/zfs/dmu_objset.c @@ -263,6 +263,19 @@ secondary_cache_changed_cb(void *arg, uint64_t newval) os->os_secondary_cache = newval; } +static void +prefetch_changed_cb(void *arg, uint64_t newval) +{ + objset_t *os = arg; + + /* + * Inheritance should have been done by now. + */ + ASSERT(newval == ZFS_PREFETCH_ALL || newval == ZFS_PREFETCH_NONE || + newval == ZFS_PREFETCH_METADATA); + os->os_prefetch = newval; +} + static void sync_changed_cb(void *arg, uint64_t newval) { @@ -562,6 +575,11 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, zfs_prop_to_name(ZFS_PROP_SECONDARYCACHE), secondary_cache_changed_cb, os); } + if (err == 0) { + err = dsl_prop_register(ds, + zfs_prop_to_name(ZFS_PROP_PREFETCH), + prefetch_changed_cb, os); + } if (!ds->ds_is_snapshot) { if (err == 0) { err = dsl_prop_register(ds, @@ -635,6 +653,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, os->os_primary_cache = ZFS_CACHE_ALL; os->os_secondary_cache = ZFS_CACHE_ALL; os->os_dnodesize = DNODE_MIN_SIZE; + os->os_prefetch = ZFS_PREFETCH_ALL; } if (ds == NULL || !ds->ds_is_snapshot) diff --git a/module/zfs/dmu_zfetch.c b/module/zfs/dmu_zfetch.c index d0acaf502066..2b2d72671001 100644 --- a/module/zfs/dmu_zfetch.c +++ b/module/zfs/dmu_zfetch.c @@ -329,9 +329,14 @@ dmu_zfetch_prepare(zfetch_t *zf, uint64_t blkid, uint64_t nblks, { zstream_t *zs; spa_t *spa = zf->zf_dnode->dn_objset->os_spa; + zfs_prefetch_type_t os_prefetch = zf->zf_dnode->dn_objset->os_prefetch; - if (zfs_prefetch_disable) + if (zfs_prefetch_disable || os_prefetch == ZFS_PREFETCH_NONE) return (NULL); + + if (os_prefetch == ZFS_PREFETCH_METADATA) + fetch_data = B_FALSE; + /* * If we haven't yet loaded the indirect vdevs' mappings, we * can only read from blocks that we carefully ensure are on