diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index 72d511b0680f..c5dd0c242ddc 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c @@ -973,12 +973,11 @@ ztest_object_unlock(ztest_ds_t *zd, uint64_t object) } static rl_t * -ztest_range_lock(ztest_ds_t *zd, uint64_t object, uint64_t offset, +ztest_range_lock(rl_t *rl, ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size, rl_type_t type) { uint64_t hash = object ^ (offset % (ZTEST_RANGE_LOCKS + 1)); rll_t *rll = &zd->zd_range_lock[hash & (ZTEST_RANGE_LOCKS - 1)]; - rl_t *rl; rl = umem_alloc(sizeof (*rl), UMEM_NOFAIL); rl->rl_object = object; @@ -1389,7 +1388,7 @@ ztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap) dmu_tx_t *tx; dmu_buf_t *db; arc_buf_t *abuf = NULL; - rl_t *rl; + rl_t rl; if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); @@ -1413,7 +1412,7 @@ ztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap) bt = NULL; ztest_object_lock(zd, lr->lr_foid, RL_READER); - rl = ztest_range_lock(zd, lr->lr_foid, offset, length, RL_WRITER); + ztest_range_lock(&rl, zd, lr->lr_foid, offset, length, RL_WRITER); VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db)); @@ -1438,7 +1437,7 @@ ztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap) if (abuf != NULL) dmu_return_arcbuf(abuf); dmu_buf_rele(db, FTAG); - ztest_range_unlock(rl); + ztest_range_unlock(&rl); ztest_object_unlock(zd, lr->lr_foid); return (ENOSPC); } @@ -1495,7 +1494,7 @@ ztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap) dmu_tx_commit(tx); - ztest_range_unlock(rl); + ztest_range_unlock(&rl); ztest_object_unlock(zd, lr->lr_foid); return (0); @@ -1507,13 +1506,13 @@ ztest_replay_truncate(ztest_ds_t *zd, lr_truncate_t *lr, boolean_t byteswap) objset_t *os = zd->zd_os; dmu_tx_t *tx; uint64_t txg; - rl_t *rl; + rl_t rl; if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); ztest_object_lock(zd, lr->lr_foid, RL_READER); - rl = ztest_range_lock(zd, lr->lr_foid, lr->lr_offset, lr->lr_length, + ztest_range_lock(&rl, zd, lr->lr_foid, lr->lr_offset, lr->lr_length, RL_WRITER); tx = dmu_tx_create(os); @@ -1522,7 +1521,7 @@ ztest_replay_truncate(ztest_ds_t *zd, lr_truncate_t *lr, boolean_t byteswap) txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); if (txg == 0) { - ztest_range_unlock(rl); + ztest_range_unlock(&rl); ztest_object_unlock(zd, lr->lr_foid); return (ENOSPC); } @@ -1534,7 +1533,7 @@ ztest_replay_truncate(ztest_ds_t *zd, lr_truncate_t *lr, boolean_t byteswap) dmu_tx_commit(tx); - ztest_range_unlock(rl); + ztest_range_unlock(&rl); ztest_object_unlock(zd, lr->lr_foid); return (0); @@ -1670,6 +1669,8 @@ ztest_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) dmu_object_info_t doi; dmu_buf_t *db; zgd_t *zgd; + rl_t rl; + int error; ztest_object_lock(zd, object, RL_READER); @@ -1694,9 +1695,10 @@ ztest_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) zgd = umem_zalloc(sizeof (*zgd), UMEM_NOFAIL); zgd->zgd_zilog = zd->zd_zilog; zgd->zgd_private = zd; + zgd->zgd_rl = &rl; if (buf != NULL) { /* immediate write */ - zgd->zgd_rl = ztest_range_lock(zd, object, offset, size, + ztest_range_lock(zgd->zgd_rl, zd, object, offset, size, RL_READER); error = dmu_read(os, object, offset, size, buf, @@ -1711,7 +1713,7 @@ ztest_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) offset = 0; } - zgd->zgd_rl = ztest_range_lock(zd, object, offset, size, + ztest_range_lock(zgd->zgd_rl, zd, object, offset, size, RL_READER); error = dmu_buf_hold(os, object, offset, zgd, &db, @@ -1953,12 +1955,12 @@ ztest_prealloc(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size) objset_t *os = zd->zd_os; dmu_tx_t *tx; uint64_t txg; - rl_t *rl; + rl_t rl; txg_wait_synced(dmu_objset_pool(os), 0); ztest_object_lock(zd, object, RL_READER); - rl = ztest_range_lock(zd, object, offset, size, RL_WRITER); + ztest_range_lock(&rl, zd, object, offset, size, RL_WRITER); tx = dmu_tx_create(os); @@ -1974,7 +1976,7 @@ ztest_prealloc(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size) (void) dmu_free_long_range(os, object, offset, size); } - ztest_range_unlock(rl); + ztest_range_unlock(&rl); ztest_object_unlock(zd, object); } diff --git a/config/kernel.m4 b/config/kernel.m4 index 7f07c90638c4..f6c145715783 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -319,28 +319,9 @@ AC_DEFUN([ZFS_AC_KERNEL_CONFIG], [ [Define to 1 if licensed under the GPL]) ]) - ZFS_AC_KERNEL_CONFIG_PREEMPT ZFS_AC_KERNEL_CONFIG_DEBUG_LOCK_ALLOC ]) -dnl # -dnl # Check CONFIG_PREEMPT -dnl # -dnl # Premptible kernels will be supported in the future. But at the -dnl # moment there are a few places in the code which need to be updated -dnl # to accomidate them. Until that work occurs we should detect this -dnl # at configure time and fail with a sensible message. Otherwise, -dnl # people will be able to build successfully, however they will have -dnl # stability problems. See https://github.com/zfsonlinux/zfs/issues/83 -dnl # -AC_DEFUN([ZFS_AC_KERNEL_CONFIG_PREEMPT], [ - - ZFS_LINUX_CONFIG([PREEMPT], - AC_MSG_ERROR([ - *** Kernel built with CONFIG_PREEMPT which is not supported. - *** You must rebuild your kernel without this option.]), []) -]) - dnl # dnl # Check CONFIG_DEBUG_LOCK_ALLOC dnl # diff --git a/include/sys/zfs_rlock.h b/include/sys/zfs_rlock.h index da18b1f18efc..85dc16acd84a 100644 --- a/include/sys/zfs_rlock.h +++ b/include/sys/zfs_rlock.h @@ -63,7 +63,7 @@ typedef struct rl { * is converted to WRITER that specified to lock from the start of the * end of file. zfs_range_lock() returns the range lock structure. */ -rl_t *zfs_range_lock(znode_t *zp, uint64_t off, uint64_t len, rl_type_t type); +rl_t *zfs_range_lock(rl_t *rl, znode_t *zp, uint64_t off, uint64_t len, rl_type_t type); /* * Unlock range and destroy range lock structure. diff --git a/module/zfs/fm.c b/module/zfs/fm.c index e4ecfea19c7d..360dfac848c6 100644 --- a/module/zfs/fm.c +++ b/module/zfs/fm.c @@ -65,6 +65,7 @@ #include #include #ifdef _KERNEL +#include #include #include #include @@ -1413,7 +1414,17 @@ fm_ena_generate_cpu(uint64_t timestamp, processorid_t cpuid, uchar_t format) uint64_t fm_ena_generate(uint64_t timestamp, uchar_t format) { - return (fm_ena_generate_cpu(timestamp, getcpuid(), format)); + processorid_t cpuid; + +#ifdef _KERNEL + preempt_disable(); + cpuid = getcpuid(); + preempt_enable(); +#else + cpuid = getcpuid(); +#endif + + return (fm_ena_generate_cpu(timestamp, cpuid, format)); } uint64_t diff --git a/module/zfs/txg.c b/module/zfs/txg.c index 6e64adf9376e..54d13fd25d53 100644 --- a/module/zfs/txg.c +++ b/module/zfs/txg.c @@ -218,9 +218,17 @@ uint64_t txg_hold_open(dsl_pool_t *dp, txg_handle_t *th) { tx_state_t *tx = &dp->dp_tx; - tx_cpu_t *tc = &tx->tx_cpu[CPU_SEQID]; + tx_cpu_t *tc; uint64_t txg; +#ifdef _KERNEL + preempt_disable(); + tc = &tx->tx_cpu[CPU_SEQID]; + preempt_enable(); +#else + tc = &tx->tx_cpu[CPU_SEQID]; +#endif /* _KERNEL */ + mutex_enter(&tc->tc_lock); txg = tx->tx_open_txg; diff --git a/module/zfs/zfs_rlock.c b/module/zfs/zfs_rlock.c index f3ada1706419..6e9afc0a1bdf 100644 --- a/module/zfs/zfs_rlock.c +++ b/module/zfs/zfs_rlock.c @@ -31,9 +31,9 @@ * Interface * --------- * Defined in zfs_rlock.h but essentially: - * rl = zfs_range_lock(zp, off, len, lock_type); - * zfs_range_unlock(rl); - * zfs_range_reduce(rl, off, len); + * zfs_range_lock(&rl, zp, off, len, lock_type); + * zfs_range_unlock(&rl); + * zfs_range_reduce(&rl, off, len); * * AVL tree * -------- @@ -420,13 +420,11 @@ zfs_range_lock_reader(znode_t *zp, rl_t *new) * previously locked as RL_WRITER). */ rl_t * -zfs_range_lock(znode_t *zp, uint64_t off, uint64_t len, rl_type_t type) +zfs_range_lock(rl_t *new, znode_t *zp, uint64_t off, uint64_t len, rl_type_t type) { - rl_t *new; ASSERT(type == RL_READER || type == RL_WRITER || type == RL_APPEND); - new = kmem_alloc(sizeof (rl_t), KM_SLEEP); new->r_zp = zp; new->r_off = off; if (len + off < off) /* overflow */ diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index 2da5fec8672c..039269a0145b 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -370,7 +370,7 @@ zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) objset_t *os; ssize_t n, nbytes; int error = 0; - rl_t *rl; + rl_t rl; #ifdef HAVE_UIO_ZEROCOPY xuio_t *xuio = NULL; #endif /* HAVE_UIO_ZEROCOPY */ @@ -418,7 +418,7 @@ zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) /* * Lock the range against changes. */ - rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER); + zfs_range_lock(&rl, zp, uio->uio_loffset, uio->uio_resid, RL_READER); /* * If we are reading past end-of-file we can skip @@ -482,7 +482,7 @@ zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) n -= nbytes; } out: - zfs_range_unlock(rl); + zfs_range_unlock(&rl); ZFS_ACCESSTIME_STAMP(zsb, zp); zfs_inode_update(zp); @@ -524,7 +524,7 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) zilog_t *zilog; offset_t woff; ssize_t n, nbytes; - rl_t *rl; + rl_t rl; int max_blksz = zsb->z_max_blksz; int error = 0; arc_buf_t *abuf; @@ -608,9 +608,9 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) * Obtain an appending range lock to guarantee file append * semantics. We reset the write offset once we have the lock. */ - rl = zfs_range_lock(zp, 0, n, RL_APPEND); - woff = rl->r_off; - if (rl->r_len == UINT64_MAX) { + zfs_range_lock(&rl, zp, 0, n, RL_APPEND); + woff = rl.r_off; + if (rl.r_len == UINT64_MAX) { /* * We overlocked the file because this write will cause * the file block size to increase. @@ -625,11 +625,11 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) * this write, then this range lock will lock the entire file * so that we can re-write the block safely. */ - rl = zfs_range_lock(zp, woff, n, RL_WRITER); + zfs_range_lock(&rl, zp, woff, n, RL_WRITER); } if (woff >= limit) { - zfs_range_unlock(rl); + zfs_range_unlock(&rl); ZFS_EXIT(zsb); return (EFBIG); } @@ -719,7 +719,7 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) * on the first iteration since zfs_range_reduce() will * shrink down r_len to the appropriate size. */ - if (rl->r_len == UINT64_MAX) { + if (rl.r_len == UINT64_MAX) { uint64_t new_blksz; if (zp->z_blksz > max_blksz) { @@ -729,7 +729,7 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) new_blksz = MIN(end_size, max_blksz); } zfs_grow_blocksize(zp, new_blksz, tx); - zfs_range_reduce(rl, woff, n); + zfs_range_reduce(&rl, woff, n); } /* @@ -842,7 +842,7 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) uio_prefaultpages(MIN(n, max_blksz), uio); } - zfs_range_unlock(rl); + zfs_range_unlock(&rl); /* * If we're in replay mode, or we made no progress, return error. @@ -946,7 +946,7 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) * we don't have to write the data twice. */ if (buf != NULL) { /* immediate write */ - zgd->zgd_rl = zfs_range_lock(zp, offset, size, RL_READER); + zfs_range_lock(zgd->zgd_rl, zp, offset, size, RL_READER); /* test for truncation needs to be done while range locked */ if (offset >= zp->z_size) { error = ENOENT; @@ -967,7 +967,7 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) size = zp->z_blksz; blkoff = ISP2(size) ? P2PHASE(offset, size) : offset; offset -= blkoff; - zgd->zgd_rl = zfs_range_lock(zp, offset, size, + zfs_range_lock(zgd->zgd_rl, zp, offset, size, RL_READER); if (zp->z_blksz == size) break; diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c index 3a6872f3e45e..e363839c9dc1 100644 --- a/module/zfs/zfs_znode.c +++ b/module/zfs/zfs_znode.c @@ -1158,20 +1158,20 @@ zfs_extend(znode_t *zp, uint64_t end) { zfs_sb_t *zsb = ZTOZSB(zp); dmu_tx_t *tx; - rl_t *rl; + rl_t rl; uint64_t newblksz; int error; /* * We will change zp_size, lock the whole file. */ - rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER); + zfs_range_lock(&rl, zp, 0, UINT64_MAX, RL_WRITER); /* * Nothing to do if file already at desired length. */ if (end <= zp->z_size) { - zfs_range_unlock(rl); + zfs_range_unlock(&rl); return (0); } top: @@ -1202,7 +1202,7 @@ zfs_extend(znode_t *zp, uint64_t end) goto top; } dmu_tx_abort(tx); - zfs_range_unlock(rl); + zfs_range_unlock(&rl); return (error); } @@ -1214,7 +1214,7 @@ zfs_extend(znode_t *zp, uint64_t end) VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(ZTOZSB(zp)), &zp->z_size, sizeof (zp->z_size), tx)); - zfs_range_unlock(rl); + zfs_range_unlock(&rl); dmu_tx_commit(tx); @@ -1235,19 +1235,19 @@ static int zfs_free_range(znode_t *zp, uint64_t off, uint64_t len) { zfs_sb_t *zsb = ZTOZSB(zp); - rl_t *rl; + rl_t rl; int error; /* * Lock the range being freed. */ - rl = zfs_range_lock(zp, off, len, RL_WRITER); + zfs_range_lock(&rl, zp, off, len, RL_WRITER); /* * Nothing to do if file already at desired length. */ if (off >= zp->z_size) { - zfs_range_unlock(rl); + zfs_range_unlock(&rl); return (0); } @@ -1256,7 +1256,7 @@ zfs_free_range(znode_t *zp, uint64_t off, uint64_t len) error = dmu_free_long_range(zsb->z_os, zp->z_id, off, len); - zfs_range_unlock(rl); + zfs_range_unlock(&rl); return (error); } @@ -1275,7 +1275,7 @@ zfs_trunc(znode_t *zp, uint64_t end) { zfs_sb_t *zsb = ZTOZSB(zp); dmu_tx_t *tx; - rl_t *rl; + rl_t rl; int error; sa_bulk_attr_t bulk[2]; int count = 0; @@ -1283,19 +1283,19 @@ zfs_trunc(znode_t *zp, uint64_t end) /* * We will change zp_size, lock the whole file. */ - rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER); + zfs_range_lock(&rl, zp, 0, UINT64_MAX, RL_WRITER); /* * Nothing to do if file already at desired length. */ if (end >= zp->z_size) { - zfs_range_unlock(rl); + zfs_range_unlock(&rl); return (0); } error = dmu_free_long_range(zsb->z_os, zp->z_id, end, -1); if (error) { - zfs_range_unlock(rl); + zfs_range_unlock(&rl); return (error); } top: @@ -1310,7 +1310,7 @@ zfs_trunc(znode_t *zp, uint64_t end) goto top; } dmu_tx_abort(tx); - zfs_range_unlock(rl); + zfs_range_unlock(&rl); return (error); } @@ -1327,7 +1327,7 @@ zfs_trunc(znode_t *zp, uint64_t end) dmu_tx_commit(tx); - zfs_range_unlock(rl); + zfs_range_unlock(&rl); return (0); } diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index 125d58de918b..307ca2fb62ec 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -537,7 +537,7 @@ zvol_write(void *arg) uint64_t size = blk_rq_bytes(req); int error = 0; dmu_tx_t *tx; - rl_t *rl; + rl_t rl; if (req->cmd_flags & VDEV_REQ_FLUSH) zil_commit(zv->zv_zilog, ZVOL_OBJ); @@ -550,7 +550,7 @@ zvol_write(void *arg) return; } - rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_WRITER); + zfs_range_lock(&rl, &zv->zv_znode, offset, size, RL_WRITER); tx = dmu_tx_create(zv->zv_objset); dmu_tx_hold_write(tx, ZVOL_OBJ, offset, size); @@ -559,7 +559,7 @@ zvol_write(void *arg) error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); - zfs_range_unlock(rl); + zfs_range_unlock(&rl); blk_end_request(req, -error, size); return; } @@ -570,7 +570,7 @@ zvol_write(void *arg) req->cmd_flags & VDEV_REQ_FUA); dmu_tx_commit(tx); - zfs_range_unlock(rl); + zfs_range_unlock(&rl); if ((req->cmd_flags & VDEV_REQ_FUA) || zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS) @@ -589,7 +589,8 @@ zvol_discard(void *arg) uint64_t offset = blk_rq_pos(req) << 9; uint64_t size = blk_rq_bytes(req); int error; - rl_t *rl; + rl_t rl; + dmu_tx_t *tx; if (offset + size > zv->zv_volsize) { blk_end_request(req, -EIO, size); @@ -601,7 +602,7 @@ zvol_discard(void *arg) return; } - rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_WRITER); + rl = zfs_range_lock(&rl, &zv->zv_znode, offset, size, RL_WRITER); error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, offset, size); @@ -609,7 +610,7 @@ zvol_discard(void *arg) * TODO: maybe we should add the operation to the log. */ - zfs_range_unlock(rl); + zfs_range_unlock(&rl); blk_end_request(req, -error, size); } @@ -630,18 +631,18 @@ zvol_read(void *arg) uint64_t offset = blk_rq_pos(req) << 9; uint64_t size = blk_rq_bytes(req); int error; - rl_t *rl; + rl_t rl; if (size == 0) { blk_end_request(req, 0, size); return; } - rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_READER); + zfs_range_lock(&rl, &zv->zv_znode, offset, size, RL_READER); error = dmu_read_req(zv->zv_objset, ZVOL_OBJ, req); - zfs_range_unlock(rl); + zfs_range_unlock(&rl); /* convert checksum errors into IO errors */ if (error == ECKSUM) @@ -744,6 +745,7 @@ zvol_get_done(zgd_t *zgd, int error) if (error == 0 && zgd->zgd_bp) zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); + kmem_free(zgd->zgd_rl, sizeof(rl_t)); kmem_free(zgd, sizeof (zgd_t)); } @@ -766,7 +768,8 @@ zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP); zgd->zgd_zilog = zv->zv_zilog; - zgd->zgd_rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_READER); + zgd->zgd_rl = kmem_alloc(sizeof (rl_t), KM_SLEEP); + zfs_range_lock(zgd->zgd_rl, &zv->zv_znode, offset, size, RL_READER); /* * Write records come in two flavors: immediate and indirect.