Skip to content

Commit

Permalink
Optimize small random numbers generation
Browse files Browse the repository at this point in the history
In all places except two spa_get_random() is used for small values,
and the consumers do not require well seeded high quality values.
Switch those two exceptions directly to random_get_pseudo_bytes()
and optimize spa_get_random(), renaming it to random_in_range(),
since it is not related to SPA or ZFS in general.

On FreeBSD directly map random_in_range() to new prng32_bounded() KPI
added in FreeBSD 13.  On Linux and in user-space just reduce the type
used to uint32_t to avoid more expensive 64bit division.

Reviewed-by: Ryan Moeller <ryan@iXsystems.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Alexander Motin <mav@FreeBSD.org>
Sponsored-By: iXsystems, Inc.
Closes #12183
  • Loading branch information
amotin authored Jun 22, 2021
1 parent ba91311 commit 29274c9
Show file tree
Hide file tree
Showing 16 changed files with 83 additions and 45 deletions.
22 changes: 22 additions & 0 deletions include/os/freebsd/spl/sys/random.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@
#define _OPENSOLARIS_SYS_RANDOM_H_

#include_next <sys/random.h>
#if __FreeBSD_version >= 1300108
#include <sys/prng.h>
#endif

static inline int
random_get_bytes(uint8_t *p, size_t s)
Expand All @@ -45,4 +48,23 @@ random_get_pseudo_bytes(uint8_t *p, size_t s)
return (0);
}

static inline uint32_t
random_in_range(uint32_t range)
{
#if __FreeBSD_version >= 1300108
return (prng32_bounded(range));
#else
uint32_t r;

ASSERT(range != 0);

if (range == 1)
return (0);

(void) random_get_pseudo_bytes((void *)&r, sizeof (r));

return (r % range);
#endif
}

#endif /* !_OPENSOLARIS_SYS_RANDOM_H_ */
15 changes: 15 additions & 0 deletions include/os/linux/spl/sys/random.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,19 @@ random_get_bytes(uint8_t *ptr, size_t len)

extern int random_get_pseudo_bytes(uint8_t *ptr, size_t len);

static __inline__ uint32_t
random_in_range(uint32_t range)
{
uint32_t r;

ASSERT(range != 0);

if (range == 1)
return (0);

(void) random_get_pseudo_bytes((void *)&r, sizeof (r));

return (r % range);
}

#endif /* _SPL_RANDOM_H */
1 change: 0 additions & 1 deletion include/sys/spa.h
Original file line number Diff line number Diff line change
Expand Up @@ -1065,7 +1065,6 @@ extern spa_t *spa_by_guid(uint64_t pool_guid, uint64_t device_guid);
extern boolean_t spa_guid_exists(uint64_t pool_guid, uint64_t device_guid);
extern char *spa_strdup(const char *);
extern void spa_strfree(char *);
extern uint64_t spa_get_random(uint64_t range);
extern uint64_t spa_generate_guid(spa_t *spa);
extern void snprintf_blkptr(char *buf, size_t buflen, const blkptr_t *bp);
extern void spa_freeze(spa_t *spa);
Expand Down
15 changes: 15 additions & 0 deletions include/sys/zfs_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -638,6 +638,21 @@ extern int lowbit64(uint64_t i);
extern int random_get_bytes(uint8_t *ptr, size_t len);
extern int random_get_pseudo_bytes(uint8_t *ptr, size_t len);

static __inline__ uint32_t
random_in_range(uint32_t range)
{
uint32_t r;

ASSERT(range != 0);

if (range == 1)
return (0);

(void) random_get_pseudo_bytes((void *)&r, sizeof (r));

return (r % range);
}

extern void kernel_init(int mode);
extern void kernel_fini(void);
extern void random_init(void);
Expand Down
4 changes: 2 additions & 2 deletions module/os/linux/zfs/arc_os.c
Original file line number Diff line number Diff line change
Expand Up @@ -437,7 +437,7 @@ arc_available_memory(void)
int64_t lowest = INT64_MAX;

/* Every 100 calls, free a small amount */
if (spa_get_random(100) == 0)
if (random_in_range(100) == 0)
lowest = -1024;

return (lowest);
Expand All @@ -458,7 +458,7 @@ arc_all_memory(void)
uint64_t
arc_free_memory(void)
{
return (spa_get_random(arc_all_memory() * 20 / 100));
return (random_in_range(arc_all_memory() * 20 / 100));

This comment has been minimized.

Copy link
@mmatuska

mmatuska Jun 23, 2021

Contributor

Is it safe to work with a uint32_t here?

This comment has been minimized.

Copy link
@amotin

amotin Jun 23, 2021

Author Member

It is probably not good, but it is weird to return random value every time it is called. Not sure how ZFS supposed to behave after that, even just for testing.

}

void
Expand Down
2 changes: 1 addition & 1 deletion module/zfs/metaslab.c
Original file line number Diff line number Diff line change
Expand Up @@ -5070,7 +5070,7 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
* damage can result in extremely long reconstruction times. This
* will also test spilling from special to normal.
*/
if (psize >= metaslab_force_ganging && (spa_get_random(100) < 3)) {
if (psize >= metaslab_force_ganging && (random_in_range(100) < 3)) {
metaslab_trace_add(zal, NULL, NULL, psize, d, TRACE_FORCE_GANG,
allocator);
return (SET_ERROR(ENOSPC));
Expand Down
4 changes: 2 additions & 2 deletions module/zfs/mmp.c
Original file line number Diff line number Diff line change
Expand Up @@ -523,9 +523,9 @@ mmp_write_uberblock(spa_t *spa)
mutex_exit(&mmp->mmp_io_lock);

offset = VDEV_UBERBLOCK_OFFSET(vd, VDEV_UBERBLOCK_COUNT(vd) -
MMP_BLOCKS_PER_LABEL + spa_get_random(MMP_BLOCKS_PER_LABEL));
MMP_BLOCKS_PER_LABEL + random_in_range(MMP_BLOCKS_PER_LABEL));

label = spa_get_random(VDEV_LABELS);
label = random_in_range(VDEV_LABELS);
vdev_label_write(zio, vd, label, ub_abd, offset,
VDEV_UBERBLOCK_SIZE(vd), mmp_write_done, mmp,
flags | ZIO_FLAG_DONT_PROPAGATE);
Expand Down
5 changes: 1 addition & 4 deletions module/zfs/multilist.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,6 @@
#include <sys/multilist.h>
#include <sys/trace_zfs.h>

/* needed for spa_get_random() */
#include <sys/spa.h>

/*
* This overrides the number of sublists in each multilist_t, which defaults
* to the number of CPUs in the system (see multilist_create()).
Expand Down Expand Up @@ -275,7 +272,7 @@ multilist_get_num_sublists(multilist_t *ml)
unsigned int
multilist_get_random_index(multilist_t *ml)
{
return (spa_get_random(ml->ml_num_sublists));
return (random_in_range(ml->ml_num_sublists));
}

/* Lock and return the sublist specified at the given index */
Expand Down
6 changes: 3 additions & 3 deletions module/zfs/spa.c
Original file line number Diff line number Diff line change
Expand Up @@ -3175,7 +3175,7 @@ spa_activity_check(spa_t *spa, uberblock_t *ub, nvlist_t *config)
import_delay = spa_activity_check_duration(spa, ub);

/* Add a small random factor in case of simultaneous imports (0-25%) */
import_delay += import_delay * spa_get_random(250) / 1000;
import_delay += import_delay * random_in_range(250) / 1000;

import_expire = gethrtime() + import_delay;

Expand Down Expand Up @@ -4619,7 +4619,7 @@ spa_ld_checkpoint_rewind(spa_t *spa)
vdev_t *svd[SPA_SYNC_MIN_VDEVS] = { NULL };
int svdcount = 0;
int children = rvd->vdev_children;
int c0 = spa_get_random(children);
int c0 = random_in_range(children);

for (int c = 0; c < children; c++) {
vdev_t *vd = rvd->vdev_child[(c0 + c) % children];
Expand Down Expand Up @@ -9111,7 +9111,7 @@ spa_sync_rewrite_vdev_config(spa_t *spa, dmu_tx_t *tx)
vdev_t *svd[SPA_SYNC_MIN_VDEVS] = { NULL };
int svdcount = 0;
int children = rvd->vdev_children;
int c0 = spa_get_random(children);
int c0 = random_in_range(children);

for (int c = 0; c < children; c++) {
vdev_t *vd =
Expand Down
30 changes: 9 additions & 21 deletions module/zfs/spa_misc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1494,32 +1494,21 @@ spa_strfree(char *s)
kmem_free(s, strlen(s) + 1);
}

uint64_t
spa_get_random(uint64_t range)
{
uint64_t r;

ASSERT(range != 0);

if (range == 1)
return (0);

(void) random_get_pseudo_bytes((void *)&r, sizeof (uint64_t));

return (r % range);
}

uint64_t
spa_generate_guid(spa_t *spa)
{
uint64_t guid = spa_get_random(-1ULL);
uint64_t guid;

if (spa != NULL) {
while (guid == 0 || spa_guid_exists(spa_guid(spa), guid))
guid = spa_get_random(-1ULL);
do {
(void) random_get_pseudo_bytes((void *)&guid,
sizeof (guid));
} while (guid == 0 || spa_guid_exists(spa_guid(spa), guid));
} else {
while (guid == 0 || spa_guid_exists(guid, 0))
guid = spa_get_random(-1ULL);
do {
(void) random_get_pseudo_bytes((void *)&guid,
sizeof (guid));
} while (guid == 0 || spa_guid_exists(guid, 0));
}

return (guid);
Expand Down Expand Up @@ -2888,7 +2877,6 @@ EXPORT_SYMBOL(spa_maxdnodesize);
EXPORT_SYMBOL(spa_guid_exists);
EXPORT_SYMBOL(spa_strdup);
EXPORT_SYMBOL(spa_strfree);
EXPORT_SYMBOL(spa_get_random);
EXPORT_SYMBOL(spa_generate_guid);
EXPORT_SYMBOL(snprintf_blkptr);
EXPORT_SYMBOL(spa_freeze);
Expand Down
2 changes: 1 addition & 1 deletion module/zfs/space_map.c
Original file line number Diff line number Diff line change
Expand Up @@ -726,7 +726,7 @@ space_map_write_impl(space_map_t *sm, range_tree_t *rt, maptype_t maptype,
length > SM_RUN_MAX ||
vdev_id != SM_NO_VDEVID ||
(zfs_force_some_double_word_sm_entries &&
spa_get_random(100) == 0)))
random_in_range(100) == 0)))
words = 2;

space_map_write_seg(sm, rs_get_start(rs, rt), rs_get_end(rs,
Expand Down
4 changes: 2 additions & 2 deletions module/zfs/vdev_indirect.c
Original file line number Diff line number Diff line change
Expand Up @@ -1572,7 +1572,7 @@ vdev_indirect_splits_enumerate_randomly(indirect_vsd_t *iv, zio_t *zio)
indirect_child_t *ic = list_head(&is->is_unique_child);
int children = is->is_unique_children;

for (int i = spa_get_random(children); i > 0; i--)
for (int i = random_in_range(children); i > 0; i--)
ic = list_next(&is->is_unique_child, ic);

ASSERT3P(ic, !=, NULL);
Expand Down Expand Up @@ -1736,7 +1736,7 @@ vdev_indirect_reconstruct_io_done(zio_t *zio)
* Known_good will be TRUE when reconstruction is known to be possible.
*/
if (zfs_reconstruct_indirect_damage_fraction != 0 &&
spa_get_random(zfs_reconstruct_indirect_damage_fraction) == 0)
random_in_range(zfs_reconstruct_indirect_damage_fraction) == 0)
known_good = (vdev_indirect_splits_damage(iv, zio) == 0);

/*
Expand Down
2 changes: 1 addition & 1 deletion module/zfs/vdev_mirror.c
Original file line number Diff line number Diff line change
Expand Up @@ -496,7 +496,7 @@ vdev_mirror_preferred_child_randomize(zio_t *zio)
int p;

if (mm->mm_root) {
p = spa_get_random(mm->mm_preferred_cnt);
p = random_in_range(mm->mm_preferred_cnt);
return (vdev_mirror_dva_select(zio, p));
}

Expand Down
6 changes: 4 additions & 2 deletions module/zfs/zil.c
Original file line number Diff line number Diff line change
Expand Up @@ -205,8 +205,10 @@ zil_init_log_chain(zilog_t *zilog, blkptr_t *bp)
{
zio_cksum_t *zc = &bp->blk_cksum;

zc->zc_word[ZIL_ZC_GUID_0] = spa_get_random(-1ULL);
zc->zc_word[ZIL_ZC_GUID_1] = spa_get_random(-1ULL);
(void) random_get_pseudo_bytes((void *)&zc->zc_word[ZIL_ZC_GUID_0],
sizeof (zc->zc_word[ZIL_ZC_GUID_0]));
(void) random_get_pseudo_bytes((void *)&zc->zc_word[ZIL_ZC_GUID_1],
sizeof (zc->zc_word[ZIL_ZC_GUID_1]));
zc->zc_word[ZIL_ZC_OBJSET] = dmu_objset_id(zilog->zl_os);
zc->zc_word[ZIL_ZC_SEQ] = 1ULL;
}
Expand Down
2 changes: 1 addition & 1 deletion module/zfs/zio_compress.c
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ zio_decompress_data(enum zio_compress c, abd_t *src, void *dst,
* in non-ECC RAM), we handle this error (and test it).
*/
if (zio_decompress_fail_fraction != 0 &&
spa_get_random(zio_decompress_fail_fraction) == 0)
random_in_range(zio_decompress_fail_fraction) == 0)
ret = SET_ERROR(EINVAL);

return (ret);
Expand Down
8 changes: 4 additions & 4 deletions module/zfs/zio_inject.c
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ freq_triggered(uint32_t frequency)
*/
uint32_t maximum = (frequency <= 100) ? 100 : ZI_PERCENTAGE_MAX;

return (spa_get_random(maximum) < frequency);
return (random_in_range(maximum) < frequency);
}

/*
Expand Down Expand Up @@ -347,12 +347,12 @@ zio_inject_bitflip_cb(void *data, size_t len, void *private)
{
zio_t *zio __maybe_unused = private;
uint8_t *buffer = data;
uint_t byte = spa_get_random(len);
uint_t byte = random_in_range(len);

ASSERT(zio->io_type == ZIO_TYPE_READ);

/* flip a single random bit in an abd data buffer */
buffer[byte] ^= 1 << spa_get_random(8);
buffer[byte] ^= 1 << random_in_range(8);

return (1); /* stop after first flip */
}
Expand Down Expand Up @@ -493,7 +493,7 @@ zio_handle_ignored_writes(zio_t *zio)
}

/* Have a "problem" writing 60% of the time */
if (spa_get_random(100) < 60)
if (random_in_range(100) < 60)
zio->io_pipeline &= ~ZIO_VDEV_IO_STAGES;
break;
}
Expand Down

0 comments on commit 29274c9

Please sign in to comment.