Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Preemption Support #674

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 17 additions & 15 deletions cmd/ztest/ztest.c
Original file line number Diff line number Diff line change
Expand Up @@ -973,12 +973,11 @@ ztest_object_unlock(ztest_ds_t *zd, uint64_t object)
}

static rl_t *
ztest_range_lock(ztest_ds_t *zd, uint64_t object, uint64_t offset,
ztest_range_lock(rl_t *rl, ztest_ds_t *zd, uint64_t object, uint64_t offset,
uint64_t size, rl_type_t type)
{
uint64_t hash = object ^ (offset % (ZTEST_RANGE_LOCKS + 1));
rll_t *rll = &zd->zd_range_lock[hash & (ZTEST_RANGE_LOCKS - 1)];
rl_t *rl;

rl = umem_alloc(sizeof (*rl), UMEM_NOFAIL);
rl->rl_object = object;
Expand Down Expand Up @@ -1389,7 +1388,7 @@ ztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap)
dmu_tx_t *tx;
dmu_buf_t *db;
arc_buf_t *abuf = NULL;
rl_t *rl;
rl_t rl;

if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
Expand All @@ -1413,7 +1412,7 @@ ztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap)
bt = NULL;

ztest_object_lock(zd, lr->lr_foid, RL_READER);
rl = ztest_range_lock(zd, lr->lr_foid, offset, length, RL_WRITER);
ztest_range_lock(&rl, zd, lr->lr_foid, offset, length, RL_WRITER);

VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db));

Expand All @@ -1438,7 +1437,7 @@ ztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap)
if (abuf != NULL)
dmu_return_arcbuf(abuf);
dmu_buf_rele(db, FTAG);
ztest_range_unlock(rl);
ztest_range_unlock(&rl);
ztest_object_unlock(zd, lr->lr_foid);
return (ENOSPC);
}
Expand Down Expand Up @@ -1495,7 +1494,7 @@ ztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap)

dmu_tx_commit(tx);

ztest_range_unlock(rl);
ztest_range_unlock(&rl);
ztest_object_unlock(zd, lr->lr_foid);

return (0);
Expand All @@ -1507,13 +1506,13 @@ ztest_replay_truncate(ztest_ds_t *zd, lr_truncate_t *lr, boolean_t byteswap)
objset_t *os = zd->zd_os;
dmu_tx_t *tx;
uint64_t txg;
rl_t *rl;
rl_t rl;

if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));

ztest_object_lock(zd, lr->lr_foid, RL_READER);
rl = ztest_range_lock(zd, lr->lr_foid, lr->lr_offset, lr->lr_length,
ztest_range_lock(&rl, zd, lr->lr_foid, lr->lr_offset, lr->lr_length,
RL_WRITER);

tx = dmu_tx_create(os);
Expand All @@ -1522,7 +1521,7 @@ ztest_replay_truncate(ztest_ds_t *zd, lr_truncate_t *lr, boolean_t byteswap)

txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
if (txg == 0) {
ztest_range_unlock(rl);
ztest_range_unlock(&rl);
ztest_object_unlock(zd, lr->lr_foid);
return (ENOSPC);
}
Expand All @@ -1534,7 +1533,7 @@ ztest_replay_truncate(ztest_ds_t *zd, lr_truncate_t *lr, boolean_t byteswap)

dmu_tx_commit(tx);

ztest_range_unlock(rl);
ztest_range_unlock(&rl);
ztest_object_unlock(zd, lr->lr_foid);

return (0);
Expand Down Expand Up @@ -1670,6 +1669,8 @@ ztest_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
dmu_object_info_t doi;
dmu_buf_t *db;
zgd_t *zgd;
rl_t rl;

int error;

ztest_object_lock(zd, object, RL_READER);
Expand All @@ -1694,9 +1695,10 @@ ztest_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
zgd = umem_zalloc(sizeof (*zgd), UMEM_NOFAIL);
zgd->zgd_zilog = zd->zd_zilog;
zgd->zgd_private = zd;
zgd->zgd_rl = &rl;

if (buf != NULL) { /* immediate write */
zgd->zgd_rl = ztest_range_lock(zd, object, offset, size,
ztest_range_lock(zgd->zgd_rl, zd, object, offset, size,
RL_READER);

error = dmu_read(os, object, offset, size, buf,
Expand All @@ -1711,7 +1713,7 @@ ztest_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
offset = 0;
}

zgd->zgd_rl = ztest_range_lock(zd, object, offset, size,
ztest_range_lock(zgd->zgd_rl, zd, object, offset, size,
RL_READER);

error = dmu_buf_hold(os, object, offset, zgd, &db,
Expand Down Expand Up @@ -1953,12 +1955,12 @@ ztest_prealloc(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size)
objset_t *os = zd->zd_os;
dmu_tx_t *tx;
uint64_t txg;
rl_t *rl;
rl_t rl;

txg_wait_synced(dmu_objset_pool(os), 0);

ztest_object_lock(zd, object, RL_READER);
rl = ztest_range_lock(zd, object, offset, size, RL_WRITER);
ztest_range_lock(&rl, zd, object, offset, size, RL_WRITER);

tx = dmu_tx_create(os);

Expand All @@ -1974,7 +1976,7 @@ ztest_prealloc(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size)
(void) dmu_free_long_range(os, object, offset, size);
}

ztest_range_unlock(rl);
ztest_range_unlock(&rl);
ztest_object_unlock(zd, object);
}

Expand Down
19 changes: 0 additions & 19 deletions config/kernel.m4
Original file line number Diff line number Diff line change
Expand Up @@ -319,28 +319,9 @@ AC_DEFUN([ZFS_AC_KERNEL_CONFIG], [
[Define to 1 if licensed under the GPL])
])

ZFS_AC_KERNEL_CONFIG_PREEMPT
ZFS_AC_KERNEL_CONFIG_DEBUG_LOCK_ALLOC
])

dnl #
dnl # Check CONFIG_PREEMPT
dnl #
dnl # Premptible kernels will be supported in the future. But at the
dnl # moment there are a few places in the code which need to be updated
dnl # to accomidate them. Until that work occurs we should detect this
dnl # at configure time and fail with a sensible message. Otherwise,
dnl # people will be able to build successfully, however they will have
dnl # stability problems. See https://github.com/zfsonlinux/zfs/issues/83
dnl #
AC_DEFUN([ZFS_AC_KERNEL_CONFIG_PREEMPT], [

ZFS_LINUX_CONFIG([PREEMPT],
AC_MSG_ERROR([
*** Kernel built with CONFIG_PREEMPT which is not supported.
*** You must rebuild your kernel without this option.]), [])
])

dnl #
dnl # Check CONFIG_DEBUG_LOCK_ALLOC
dnl #
Expand Down
2 changes: 1 addition & 1 deletion include/sys/zfs_rlock.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ typedef struct rl {
* is converted to WRITER that specified to lock from the start of the
* end of file. zfs_range_lock() returns the range lock structure.
*/
rl_t *zfs_range_lock(znode_t *zp, uint64_t off, uint64_t len, rl_type_t type);
rl_t *zfs_range_lock(rl_t *rl, znode_t *zp, uint64_t off, uint64_t len, rl_type_t type);

/*
* Unlock range and destroy range lock structure.
Expand Down
13 changes: 12 additions & 1 deletion module/zfs/fm.c
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
#include <sys/kstat.h>
#include <sys/zfs_context.h>
#ifdef _KERNEL
#include <linux/preempt.h>
#include <sys/atomic.h>
#include <sys/condvar.h>
#include <sys/cpuvar.h>
Expand Down Expand Up @@ -1413,7 +1414,17 @@ fm_ena_generate_cpu(uint64_t timestamp, processorid_t cpuid, uchar_t format)
uint64_t
fm_ena_generate(uint64_t timestamp, uchar_t format)
{
return (fm_ena_generate_cpu(timestamp, getcpuid(), format));
processorid_t cpuid;

#ifdef _KERNEL
preempt_disable();
cpuid = getcpuid();
preempt_enable();
#else
cpuid = getcpuid();
#endif

return (fm_ena_generate_cpu(timestamp, cpuid, format));
}

uint64_t
Expand Down
10 changes: 9 additions & 1 deletion module/zfs/txg.c
Original file line number Diff line number Diff line change
Expand Up @@ -218,9 +218,17 @@ uint64_t
txg_hold_open(dsl_pool_t *dp, txg_handle_t *th)
{
tx_state_t *tx = &dp->dp_tx;
tx_cpu_t *tc = &tx->tx_cpu[CPU_SEQID];
tx_cpu_t *tc;
uint64_t txg;

#ifdef _KERNEL
preempt_disable();
tc = &tx->tx_cpu[CPU_SEQID];
preempt_enable();
#else
tc = &tx->tx_cpu[CPU_SEQID];
#endif /* _KERNEL */

mutex_enter(&tc->tc_lock);

txg = tx->tx_open_txg;
Expand Down
10 changes: 4 additions & 6 deletions module/zfs/zfs_rlock.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@
* Interface
* ---------
* Defined in zfs_rlock.h but essentially:
* rl = zfs_range_lock(zp, off, len, lock_type);
* zfs_range_unlock(rl);
* zfs_range_reduce(rl, off, len);
* zfs_range_lock(&rl, zp, off, len, lock_type);
* zfs_range_unlock(&rl);
* zfs_range_reduce(&rl, off, len);
*
* AVL tree
* --------
Expand Down Expand Up @@ -420,13 +420,11 @@ zfs_range_lock_reader(znode_t *zp, rl_t *new)
* previously locked as RL_WRITER).
*/
rl_t *
zfs_range_lock(znode_t *zp, uint64_t off, uint64_t len, rl_type_t type)
zfs_range_lock(rl_t *new, znode_t *zp, uint64_t off, uint64_t len, rl_type_t type)
{
rl_t *new;

ASSERT(type == RL_READER || type == RL_WRITER || type == RL_APPEND);

new = kmem_alloc(sizeof (rl_t), KM_SLEEP);
new->r_zp = zp;
new->r_off = off;
if (len + off < off) /* overflow */
Expand Down
28 changes: 14 additions & 14 deletions module/zfs/zfs_vnops.c
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,7 @@ zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
objset_t *os;
ssize_t n, nbytes;
int error = 0;
rl_t *rl;
rl_t rl;
#ifdef HAVE_UIO_ZEROCOPY
xuio_t *xuio = NULL;
#endif /* HAVE_UIO_ZEROCOPY */
Expand Down Expand Up @@ -418,7 +418,7 @@ zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
/*
* Lock the range against changes.
*/
rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER);
zfs_range_lock(&rl, zp, uio->uio_loffset, uio->uio_resid, RL_READER);

/*
* If we are reading past end-of-file we can skip
Expand Down Expand Up @@ -482,7 +482,7 @@ zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
n -= nbytes;
}
out:
zfs_range_unlock(rl);
zfs_range_unlock(&rl);

ZFS_ACCESSTIME_STAMP(zsb, zp);
zfs_inode_update(zp);
Expand Down Expand Up @@ -524,7 +524,7 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
zilog_t *zilog;
offset_t woff;
ssize_t n, nbytes;
rl_t *rl;
rl_t rl;
int max_blksz = zsb->z_max_blksz;
int error = 0;
arc_buf_t *abuf;
Expand Down Expand Up @@ -608,9 +608,9 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
* Obtain an appending range lock to guarantee file append
* semantics. We reset the write offset once we have the lock.
*/
rl = zfs_range_lock(zp, 0, n, RL_APPEND);
woff = rl->r_off;
if (rl->r_len == UINT64_MAX) {
zfs_range_lock(&rl, zp, 0, n, RL_APPEND);
woff = rl.r_off;
if (rl.r_len == UINT64_MAX) {
/*
* We overlocked the file because this write will cause
* the file block size to increase.
Expand All @@ -625,11 +625,11 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
* this write, then this range lock will lock the entire file
* so that we can re-write the block safely.
*/
rl = zfs_range_lock(zp, woff, n, RL_WRITER);
zfs_range_lock(&rl, zp, woff, n, RL_WRITER);
}

if (woff >= limit) {
zfs_range_unlock(rl);
zfs_range_unlock(&rl);
ZFS_EXIT(zsb);
return (EFBIG);
}
Expand Down Expand Up @@ -719,7 +719,7 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
* on the first iteration since zfs_range_reduce() will
* shrink down r_len to the appropriate size.
*/
if (rl->r_len == UINT64_MAX) {
if (rl.r_len == UINT64_MAX) {
uint64_t new_blksz;

if (zp->z_blksz > max_blksz) {
Expand All @@ -729,7 +729,7 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
new_blksz = MIN(end_size, max_blksz);
}
zfs_grow_blocksize(zp, new_blksz, tx);
zfs_range_reduce(rl, woff, n);
zfs_range_reduce(&rl, woff, n);
}

/*
Expand Down Expand Up @@ -842,7 +842,7 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
uio_prefaultpages(MIN(n, max_blksz), uio);
}

zfs_range_unlock(rl);
zfs_range_unlock(&rl);

/*
* If we're in replay mode, or we made no progress, return error.
Expand Down Expand Up @@ -946,7 +946,7 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
* we don't have to write the data twice.
*/
if (buf != NULL) { /* immediate write */
zgd->zgd_rl = zfs_range_lock(zp, offset, size, RL_READER);
zfs_range_lock(zgd->zgd_rl, zp, offset, size, RL_READER);
/* test for truncation needs to be done while range locked */
if (offset >= zp->z_size) {
error = ENOENT;
Expand All @@ -967,7 +967,7 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
size = zp->z_blksz;
blkoff = ISP2(size) ? P2PHASE(offset, size) : offset;
offset -= blkoff;
zgd->zgd_rl = zfs_range_lock(zp, offset, size,
zfs_range_lock(zgd->zgd_rl, zp, offset, size,
RL_READER);
if (zp->z_blksz == size)
break;
Expand Down
Loading