Skip to content

Commit

Permalink
fix abd, have taskq_wait_synced() wait for threads to be created
Browse files Browse the repository at this point in the history
taskq_wait_synced() did a VERIFY() on whether the taskq's
threads were the requested number, but taskq_create() can
ultimately return early because taskq_thread_create() is
allowed to return when two desired threads are created.

Fix this race panic.  Also, taskq_wait_synced() may fail if
if num_ecores is nonzero (on Apple Silicon), so create a
flag that lets taskq_create_common() deal with the
max_ncpus.

Make boot_ncpus a variable that's MAX(1, (int)max_ncores - num_ecores).
boot_ncpus is used in common code.

Modify the alignments and quanta/import sizes of the abd
kmem and vmem cache creations.  Make DEBUG builds work
with KMF_LITE | KMF_BUFCTL on the abd kmem caches.

Signed-off-by: Sean Doran <smd@use.net>
  • Loading branch information
rottegift committed Dec 6, 2023
1 parent 33ad46e commit a72d543
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 23 deletions.
2 changes: 1 addition & 1 deletion include/os/macos/spl/sys/sysmacros.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ extern "C" {
#define is_system_labeled() 0

extern unsigned int max_ncpus;
#define boot_ncpus max_ncpus
extern unsigned int boot_ncpus;
extern unsigned int num_ecores;

#ifndef RLIM64_INFINITY
Expand Down
1 change: 1 addition & 0 deletions include/os/macos/spl/sys/taskq.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ struct taskq_ent;
#ifdef __APPLE__
#define TASKQ_TIMESHARE 0x0020 /* macOS dynamic thread priority */
#define TASKQ_REALLY_DYNAMIC 0x0040 /* don't filter out TASKQ_DYNAMIC */
#define TASKQ_CREATE_SYNCED 0x0080 /* don't deflate ncpus */
#endif

/*
Expand Down
4 changes: 4 additions & 0 deletions module/os/macos/spl/spl-osx.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
static utsname_t utsname_static = { { 0 } };

unsigned int max_ncpus = 0;
unsigned int boot_ncpus = 0;
unsigned int num_ecores = 0;
uint64_t total_memory = 0;
uint64_t real_total_memory = 0;
Expand Down Expand Up @@ -495,6 +496,9 @@ spl_start(kmod_info_t *ki, void *d)

#if defined(__arm64__)
num_ecores = (max_ncpus > 4) ? 4 : 0;
boot_ncpus = MAX(1, (int)max_ncpus - (int)num_ecores);
#else
boot_ncpus = max_ncpus;
#endif

/*
Expand Down
32 changes: 15 additions & 17 deletions module/os/macos/spl/spl-seg_kmem.c
Original file line number Diff line number Diff line change
Expand Up @@ -281,31 +281,27 @@ segkmem_abd_init()
/*
* OpenZFS does not segregate the abd kmem cache out of the general
* heap, leading to large numbers of short-lived slabs exchanged
* between the kmem cache and it's parent. XNU absorbs this with a
* qcache, following its history of absorbing the pre-ABD zio file and
* metadata caches being qcached (which raises the exchanges with the
* general heap from PAGESIZE to 256k).
* between the kmem cache and its parent. XNU absorbs this with a a
* large minimum request to the parent vmem_caches on large-memory
* MacOS systems.
*/

extern vmem_t *spl_heap_arena;

#define BIG_SLAB 131072
#ifdef __arm64__
#define BIG_BIG_SLAB (BIG_SLAB * 2)
#else
#define BIG_BIG_SLAB BIG_SLAB
#endif
#define BIG_SLAB (PAGESIZE * 16)

#define SMALL_RAM_MACHINE (4ULL * 1024ULL * 1024ULL * 1024ULL)

if (total_memory >= SMALL_RAM_MACHINE) {
abd_arena = vmem_create("abd_cache", NULL, 0,
PAGESIZE, vmem_alloc_impl, vmem_free_impl, spl_heap_arena,
BIG_BIG_SLAB, VM_SLEEP | VMC_NO_QCACHE);
sizeof (void *),
vmem_alloc_impl, vmem_free_impl, spl_heap_arena,
BIG_SLAB, VM_SLEEP | VMC_NO_QCACHE);
} else {
abd_arena = vmem_create("abd_cache", NULL, 0,
PAGESIZE, vmem_alloc_impl, vmem_free_impl, spl_heap_arena,
131072, VM_SLEEP | VMC_NO_QCACHE);
sizeof (void *),
vmem_alloc_impl, vmem_free_impl, spl_heap_arena,
PAGESIZE, VM_SLEEP | VMC_NO_QCACHE);
}

VERIFY3P(abd_arena, !=, NULL);
Expand All @@ -322,13 +318,15 @@ segkmem_abd_init()

if (total_memory >= SMALL_RAM_MACHINE) {
abd_subpage_arena = vmem_create("abd_subpage_cache", NULL, 0,
sizeof (void *), vmem_alloc_impl, vmem_free_impl,
sizeof (void *),
vmem_alloc_impl, vmem_free_impl,
spl_heap_arena,
BIG_SLAB, VM_SLEEP | VMC_NO_QCACHE);
} else {
abd_subpage_arena = vmem_create("abd_subpage_cache", NULL, 0,
512, vmem_alloc_impl, vmem_free_impl, abd_arena,
131072, VM_SLEEP | VMC_NO_QCACHE);
sizeof (void *),
vmem_alloc_impl, vmem_free_impl, abd_arena,
PAGESIZE, VM_SLEEP | VMC_NO_QCACHE);
}

VERIFY3P(abd_subpage_arena, !=, NULL);
Expand Down
21 changes: 18 additions & 3 deletions module/os/macos/spl/spl-taskq.c
Original file line number Diff line number Diff line change
Expand Up @@ -2401,7 +2401,9 @@ taskq_create_common(const char *name, int instance, int nthreads, pri_t pri,
{
taskq_t *tq = kmem_cache_alloc(taskq_cache, KM_SLEEP);
#ifdef __APPLE__
uint_t ncpus = max_ncpus - num_ecores;
uint_t ncpus = max_ncpus;
if (!(flags & TASKQ_CREATE_SYNCED))
ncpus = boot_ncpus; /* possibly deflated by num_ecores */
#else
uint_t ncpus = ((boot_max_ncpus == -1) ? max_ncpus : boot_max_ncpus);
#endif
Expand Down Expand Up @@ -2861,9 +2863,22 @@ taskq_create_synced(const char *name, int nthreads, pri_t pri,
flags &= ~(TASKQ_DYNAMIC | TASKQ_THREADS_CPU_PCT | TASKQ_DC_BATCH);

tq = taskq_create(name, nthreads, minclsyspri, nthreads, INT_MAX,
flags | TASKQ_PREPOPULATE);
flags | TASKQ_PREPOPULATE | TASKQ_CREATE_SYNCED);

VERIFY(tq != NULL);
VERIFY(tq->tq_nthreads == nthreads);

/* wait until our minalloc (nthreads) threads are created */
mutex_enter(&tq->tq_lock);
for (int i = 1; tq->tq_nthreads != nthreads; i++) {
printf("SPL: %s:%d: waiting for tq_nthreads (%d)"
" to be nthreads (%d), (target = %d, pass %d)\n",
__func__, __LINE__,
tq->tq_nthreads, tq->tq_nthreads_target, nthreads, i);
cv_wait(&tq->tq_wait_cv, &tq->tq_lock);
}
mutex_exit(&tq->tq_lock);

VERIFY3U(tq->tq_nthreads, ==, nthreads);

/* spawn all syncthreads */
for (int i = 0; i < nthreads; i++) {
Expand Down
6 changes: 4 additions & 2 deletions module/os/macos/zfs/abd_os.c
Original file line number Diff line number Diff line change
Expand Up @@ -401,15 +401,17 @@ abd_init(void)
* const int cflags = KMF_BUFTAG | KMF_LITE;
* or
* const int cflags = KMC_ARENA_SLAB;
* (the latter tests larger exchanges of memory with the kernel)
*/

int cflags = KMC_ARENA_SLAB;
int cflags = KMF_BUFTAG | KMF_LITE;
// int cflags = KMC_ARENA_SLAB;
#else
int cflags = KMC_NOTOUCH;
#endif

abd_chunk_cache = kmem_cache_create("abd_chunk", zfs_abd_chunk_size,
ABD_PGSIZE,
sizeof (void *),
NULL, NULL, NULL, NULL, abd_arena, cflags);

wmsum_init(&abd_sums.abdstat_struct_size, 0);
Expand Down

0 comments on commit a72d543

Please sign in to comment.