From 50366c9887b506a576bb5b0cd06d0635c909c261 Mon Sep 17 00:00:00 2001 From: shaan1337 Date: Wed, 24 Nov 2021 23:51:52 +0400 Subject: [PATCH] If we're doing a writeback with WB_SYNC_ALL and there's an active writeback, do a commit to speed up its completion If we're doing a writeback with WB_SYNC_NONE, add a small delay to make sure we catch any WB_SYNC_ALL waiters due to a race condition in filemap_write_and_wait_range() Introduce a parameter named zfs_page_writeback_no_sync_delay_us to control the small delay. The default value is 0 since even without the artificial delay, the issue occurs much more rarely now. Signed-off-by: Shaan Nobee Closes #12662 --- module/os/linux/zfs/zfs_vnops_os.c | 31 ++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c index e0dc6ed95747..3ce9e68770da 100644 --- a/module/os/linux/zfs/zfs_vnops_os.c +++ b/module/os/linux/zfs/zfs_vnops_os.c @@ -175,6 +175,8 @@ * return (error); // done, report error */ +unsigned int zfs_page_writeback_no_sync_delay_us = 0; + /* ARGSUSED */ int zfs_open(struct inode *ip, int mode, int flag, cred_t *cr) @@ -3530,6 +3532,14 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc) zfs_rangelock_exit(lr); if (wbc->sync_mode != WB_SYNC_NONE) { + /* + * Do a commit to speed up the registered commit callback in order to get + * the page out of the writeback state as soon as possible otherwise we may + * need to wait for several seconds until the transaction group closes if the + * currently active writeback was done with WB_SYNC_NONE. + */ + zil_commit(zfsvfs->z_log, zp->z_id); + if (PageWriteback(pp)) wait_on_page_bit(pp, PG_writeback); } @@ -3604,6 +3614,22 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc) * performance reasons. */ zil_commit(zfsvfs->z_log, zp->z_id); + } else { + /* + * This is a workaround for an inherent race condition in filemap_write_and_wait_range() + * where a new page writeback with WB_SYNC_NONE (ourself - the current thread) can start after + * __filemap_fdatawrite_range() (the above WB_SYNC_ALL zil_commit() to be more precise) + * but before filemap_fdatawait_range() (wait_on_page_writeback() to be more precise). + * + * By adding a small delay, we ensure that we can catch any waiters. If there are any, + * we immediately do a commit to avoid making them wait for potentially several seconds + * until the transaction group closes. + */ + if (zfs_page_writeback_no_sync_delay_us > 0) + udelay(MIN(zfs_page_writeback_no_sync_delay_us, 10)); + + if (PageWaiters(pp)) + zil_commit(zfsvfs->z_log, zp->z_id); } ZFS_EXIT(zfsvfs); @@ -3997,3 +4023,8 @@ MODULE_PARM_DESC(zfs_delete_blocks, "Delete files larger than N blocks async"); /* END CSTYLED */ #endif + +/* BEGIN CSTYLED */ +ZFS_MODULE_PARAM(zfs, zfs_page_writeback_, no_sync_delay_us, UINT, ZMOD_RW, + "Number of microseconds to delay non-sync page writebacks to catch any concurrent sync page writeback waiters"); +/* END CSTYLED */ \ No newline at end of file