Skip to content

Commit

Permalink
Default zfs_max_recordsize to 16M
Browse files Browse the repository at this point in the history
Increase the default allowed maximum recordsize from 1M to 16M.
As described in the zfs(4) man page, there are significant costs
which need to be considered before using very large blocks.
However, there are scenarios where they make good sense and
it should no longer be necessary to artificially restrict their
use behind a module option.

Note that for 32-bit platforms we continue to leave this
restriction in place due to the limited virtual address space
available (256-512MB).  On these systems only a handful
of blocks could be cached at any one time severely impacting
performance and potentially stability.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Signed-off-by: Rich Ercolani <rincebrain@gmail.com>
Closes openzfs#12830
Closes openzfs#13302
  • Loading branch information
rincebrain authored Apr 28, 2022
1 parent 63b18e4 commit f2330bd
Show file tree
Hide file tree
Showing 7 changed files with 20 additions and 28 deletions.
6 changes: 3 additions & 3 deletions man/man4/zfs.4
Original file line number Diff line number Diff line change
Expand Up @@ -1469,15 +1469,15 @@ feature uses to estimate incoming log blocks.
.It Sy zfs_max_logsm_summary_length Ns = Ns Sy 10 Pq ulong
Maximum number of rows allowed in the summary of the spacemap log.
.
.It Sy zfs_max_recordsize Ns = Ns Sy 1048576 Po 1MB Pc Pq int
.It Sy zfs_max_recordsize Ns = Ns Sy 16777216 Po 16MB Pc Pq int
We currently support block sizes from
.Em 512B No to Em 16MB .
The benefits of larger blocks, and thus larger I/O,
need to be weighed against the cost of COWing a giant block to modify one byte.
Additionally, very large blocks can have an impact on I/O latency,
and also potentially on the memory allocator.
Therefore, we do not allow the recordsize to be set larger than this tunable.
Larger blocks can be created by changing it,
Therefore, we formerly forbade creating blocks larger than 1M.
Larger blocks could be created by changing it,
and pools with larger blocks can always be imported and used,
regardless of this setting.
.
Expand Down
25 changes: 13 additions & 12 deletions module/zfs/dsl_dataset.c
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,19 @@
* The SPA supports block sizes up to 16MB. However, very large blocks
* can have an impact on i/o latency (e.g. tying up a spinning disk for
* ~300ms), and also potentially on the memory allocator. Therefore,
* we do not allow the recordsize to be set larger than zfs_max_recordsize
* (default 1MB). Larger blocks can be created by changing this tunable,
* and pools with larger blocks can always be imported and used, regardless
* of this setting.
* we did not allow the recordsize to be set larger than zfs_max_recordsize
* (former default: 1MB). Larger blocks could be created by changing this
* tunable, and pools with larger blocks could always be imported and used,
* regardless of this setting.
*
* We do, however, still limit it by default to 1M on x86_32, because Linux's
* 3/1 memory split doesn't leave much room for 16M chunks.
*/
int zfs_max_recordsize = 1 * 1024 * 1024;
#ifdef _ILP32
int zfs_max_recordsize = 1 * 1024 * 1024;
#else
int zfs_max_recordsize = 16 * 1024 * 1024;
#endif
static int zfs_allow_redacted_dataset_mount = 0;

#define SWITCH64(x, y) \
Expand Down Expand Up @@ -4964,13 +4971,7 @@ dsl_dataset_oldest_snapshot(spa_t *spa, uint64_t head_ds, uint64_t min_txg,
return (0);
}

#if defined(_LP64)
#define RECORDSIZE_PERM ZMOD_RW
#else
/* Limited to 1M on 32-bit platforms due to lack of virtual address space */
#define RECORDSIZE_PERM ZMOD_RD
#endif
ZFS_MODULE_PARAM(zfs, zfs_, max_recordsize, INT, RECORDSIZE_PERM,
ZFS_MODULE_PARAM(zfs, zfs_, max_recordsize, INT, ZMOD_RW,
"Max allowed record size");

ZFS_MODULE_PARAM(zfs, zfs_, allow_redacted_dataset_mount, INT, ZMOD_RW,
Expand Down
9 changes: 0 additions & 9 deletions module/zfs/zio.c
Original file line number Diff line number Diff line change
Expand Up @@ -166,15 +166,6 @@ zio_init(void)
cflags = (zio_exclude_metadata || size > zio_buf_debug_limit) ?
KMC_NODEBUG : 0;

#if defined(_ILP32) && defined(_KERNEL)
/*
* Cache size limited to 1M on 32-bit platforms until ARC
* buffers no longer require virtual address space.
*/
if (size > zfs_max_recordsize)
break;
#endif

while (!ISP2(p2))
p2 &= p2 - 1;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ log_must disk_setup
log_must zpool create $TESTPOOL raidz $ZPOOL_DISKS special mirror \
$CLASS_DISK0 $CLASS_DISK1

for value in 256 1025 2097152
for value in 256 1025 33554432
do
log_mustnot zfs set special_small_blocks=$value $TESTPOOL
done
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ set -A args "ab" "-?" "-cV" "-Vc" "-c -V" "c" "V" "--c" "-e" "-s" \
"-blah" "-cV 12k" "-s -cV 1P" "-sc" "-Vs 5g" "-o" "--o" "-O" "--O" \
"-o QuOta=none" "-o quota=non" "-o quota=abcd" "-o quota=0" "-o quota=" \
"-o ResErVaTi0n=none" "-o reserV=none" "-o reservation=abcd" "-o reserv=" \
"-o recorDSize=64k" "-o recordsize=2048K" "-o recordsize=2M" \
"-o recorDSize=64k" "-o recordsize=32768K" "-o recordsize=32M" \
"-o recordsize=256" "-o recsize=" "-o recsize=zero" "-o recordsize=0" \
"-o mountPoint=/tmp/tmpfile$$" "-o mountpoint=non0" "-o mountpoint=" \
"-o mountpoint=LEGACY" "-o mounpoint=none" \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ while (( i < ${#dataset[@]} )); do
(( j += 1 ))
done
# Additional recordsize
set_n_check_prop "2048K" "recordsize" "${dataset[i]}" false
set_n_check_prop "32768K" "recordsize" "${dataset[i]}" false
set_n_check_prop "128B" "recordsize" "${dataset[i]}" false
(( i += 1 ))
done
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ log_onexit cleanup

set -A args "QuOta=none" "quota=non" "quota=abcd" "quota=0" "quota=" \
"ResErVaTi0n=none" "reserV=none" "reservation=abcd" "reserv=" \
"recorDSize=64k" "recordsize=2M" "recordsize=2048K" \
"recorDSize=64k" "recordsize=32M" "recordsize=32768K" \
"recordsize=256" "recsize=" "recsize=zero" "recordsize=0" \
"mountPoint=/tmp/tmpfile$$" "mountpoint=non0" "mountpoint=" \
"mountpoint=LEGACY" "mounpoint=none" \
Expand Down

0 comments on commit f2330bd

Please sign in to comment.