From 3d835dddd777754203f23f805428b9fb1fac87af Mon Sep 17 00:00:00 2001 From: Don Brady Date: Thu, 22 Aug 2024 22:27:45 +0000 Subject: [PATCH] Changed arc evict to prioritize unloaded spas When there are active async flushes, then the eviction thread can focus exclusively on buffers belonging to any spa that is being flushed. Sponsored-by: Klara, Inc. Sponsored-by: Wasabi Technology, Inc. Signed-off-by: Don Brady --- module/zfs/arc.c | 76 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 64 insertions(+), 12 deletions(-) diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 5a5e3498bdfc..8f93d5ef441c 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -789,6 +789,8 @@ typedef struct arc_async_flush { list_node_t af_node; } arc_async_flush_t; +static unsigned int arc_async_flush_init_spa_list(uint64_t spa_list[], + unsigned int list_len); /* * Level 2 ARC @@ -3884,9 +3886,20 @@ arc_set_need_free(void) } } +static boolean_t +arc_spa_is_list_member(uint64_t spa_guid, uint64_t spa_list[], + unsigned int spa_cnt) +{ + for (int i = 0; i < spa_cnt; i++) { + if (spa_list[i] == spa_guid) + return (B_TRUE); + } + return (B_FALSE); +} + static uint64_t arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker, - uint64_t spa, uint64_t bytes) + uint64_t bytes, uint64_t spa_list[], unsigned int spa_cnt) { multilist_sublist_t *mls; uint64_t bytes_evicted = 0, real_evicted = 0; @@ -3928,8 +3941,13 @@ arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker, if (hdr->b_spa == 0) continue; - /* we're only interested in evicting buffers of a certain spa */ - if (spa != 0 && hdr->b_spa != spa) { + /* + * Check if we're only interested in evicting buffers from + * a specifc list of spas. This would typically be from + * spas that are being unloaded. + */ + if (spa_cnt > 0 && + !arc_spa_is_list_member(hdr->b_spa, spa_list, spa_cnt)) { ARCSTAT_BUMP(arcstat_evict_skip); continue; } @@ -4065,8 +4083,8 @@ arc_state_free_markers(arc_buf_hdr_t **markers, int count) * the given arc state; which is used by arc_flush(). */ static uint64_t -arc_evict_state(arc_state_t *state, arc_buf_contents_t type, uint64_t spa, - uint64_t bytes) +arc_evict_state(arc_state_t *state, arc_buf_contents_t type, uint64_t bytes, + uint64_t spa_list[], unsigned int spa_cnt) { uint64_t total_evicted = 0; multilist_t *ml = &state->arcs_list[type]; @@ -4121,7 +4139,8 @@ arc_evict_state(arc_state_t *state, arc_buf_contents_t type, uint64_t spa, break; bytes_evicted = arc_evict_state_impl(ml, sublist_idx, - markers[sublist_idx], spa, bytes_remaining); + markers[sublist_idx], bytes_remaining, spa_list, + spa_cnt); scan_evicted += bytes_evicted; total_evicted += bytes_evicted; @@ -4186,9 +4205,11 @@ arc_flush_state(arc_state_t *state, uint64_t spa, arc_buf_contents_t type, boolean_t retry) { uint64_t evicted = 0; + uint64_t spa_list[1] = {spa}; while (zfs_refcount_count(&state->arcs_esize[type]) != 0) { - evicted += arc_evict_state(state, type, spa, ARC_EVICT_ALL); + evicted += arc_evict_state(state, type, ARC_EVICT_ALL, + spa_list, spa == 0 ? 0 : 1); if (!retry) break; @@ -4212,7 +4233,15 @@ arc_evict_impl(arc_state_t *state, arc_buf_contents_t type, int64_t bytes) if (bytes > 0 && zfs_refcount_count(&state->arcs_esize[type]) > 0) { delta = MIN(zfs_refcount_count(&state->arcs_esize[type]), bytes); - return (arc_evict_state(state, type, 0, delta)); + /* + * Create a list of guids from any active ARC async flushes. + * The underlying arc_evict_state() function will target + * only spa guids from this list when it is not empty. + */ + uint64_t spa_list[16]; + unsigned int spa_cnt = + arc_async_flush_init_spa_list(spa_list, 16); + return (arc_evict_state(state, type, delta, spa_list, spa_cnt)); } return (0); @@ -4516,8 +4545,8 @@ arc_flush_async(spa_t *spa) * unlikely, but if we couldn't dispatch then use an inline flush */ if (tid == TASKQID_INVALID) { - arc_async_flush_remove(spa_guid, TASKQID_INVALID); arc_flush_impl(spa_guid, B_FALSE); + arc_async_flush_remove(spa_guid, TASKQID_INVALID); } } @@ -4539,6 +4568,30 @@ arc_async_flush_guid_inuse(uint64_t spa_guid) return (B_FALSE); } +/* + * Initialize a list of spa guids that are being flushed. + * + * Used by arc_evict_state() to target headers belonging to spas on this list. + */ +static unsigned int +arc_async_flush_init_spa_list(uint64_t spa_list[], unsigned int list_len) +{ + unsigned int init_cnt = 0; + + /* + * Iterate until the end of the list or array slots are full. + */ + mutex_enter(&arc_async_flush_lock); + for (arc_async_flush_t *af = list_head(&arc_async_flush_list); + init_cnt < list_len && af != NULL; + af = list_next(&arc_async_flush_list, af)) { + spa_list[init_cnt++] = af->af_spa_guid; + } + mutex_exit(&arc_async_flush_lock); + + return (init_cnt); +} + uint64_t arc_reduce_target_size(uint64_t to_free) { @@ -9914,8 +9967,7 @@ l2arc_device_teardown(void *arg) (u_longlong_t)elaspsed); } - if (rva->rva_task_id != TASKQID_INVALID) - arc_async_flush_remove(rva->rva_spa_gid, rva->rva_task_id); + arc_async_flush_remove(rva->rva_spa_gid, rva->rva_task_id); kmem_free(rva, sizeof (remove_vdev_args_t)); } @@ -9990,8 +10042,8 @@ l2arc_remove_vdev(vdev_t *vd) mutex_exit(&arc_async_flush_lock); if (tid == TASKQID_INVALID) { - arc_async_flush_remove(spa_guid, TASKQID_INVALID); l2arc_device_teardown(rva); + arc_async_flush_remove(spa_guid, TASKQID_INVALID); } }