Skip to content

Commit

Permalink
Merge branch 'kmem-rework'
Browse files Browse the repository at this point in the history
The core motivation behind these changes is to minimize the
memory management differences between ZFS on Linux and other
platforms.  This simplifies the process of porting changes to
Linux from other platforms.  This is good for code quality
and is expected to reduce the number of defects accidentally
introduced due to porting.  The following key Linux specific
changes have been reverted.

* KM_PUSHPAGE changed back to KM_SLEEP.  All contexts where
  it is unsafe to perform IO have been marked with PF_FSTRANS.
  This context specific mechanism is now used exclusively
  and the KM_PUSHPAGE mechanism has been retired.

* The KM_NODEBUG flag has been retired.  Allocations larger
  than 32K should use vmem_alloc()/vmem_free().  Depending
  on the size of the allocation either kmalloc() or vmalloc()
  will be used internally, but no warning will be printed.

* Pre-allocated vdev IO buffers and the dedicated SA spill
  block cache have been retired.  It is now safe and reliable
  to allocate buffers of the needed size without fear of
  deadlocking.  This reduces our memory footprint and paves
  the way for larger block sizes.

Depends on openzfs/spl#414.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Richard Yao <ryao@gentoo.org>
Signed-off-by: Tim Chase <tim@chase2k.com>
Closes openzfs#2918
  • Loading branch information
behlendorf committed Jan 16, 2015
2 parents d958324 + 81971b1 commit 6e9710f
Show file tree
Hide file tree
Showing 74 changed files with 369 additions and 447 deletions.
12 changes: 6 additions & 6 deletions include/sys/dbuf.h
Original file line number Diff line number Diff line change
Expand Up @@ -342,13 +342,13 @@ boolean_t dbuf_is_metadata(dmu_buf_impl_t *db);
} \
_NOTE(CONSTCOND) } while (0)

#define dprintf_dbuf_bp(db, bp, fmt, ...) do { \
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
char *__blkbuf = kmem_alloc(BP_SPRINTF_LEN, KM_PUSHPAGE); \
#define dprintf_dbuf_bp(db, bp, fmt, ...) do { \
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
char *__blkbuf = kmem_alloc(BP_SPRINTF_LEN, KM_SLEEP); \
snprintf_blkptr(__blkbuf, BP_SPRINTF_LEN, bp); \
dprintf_dbuf(db, fmt " %s\n", __VA_ARGS__, __blkbuf); \
kmem_free(__blkbuf, BP_SPRINTF_LEN); \
} \
dprintf_dbuf(db, fmt " %s\n", __VA_ARGS__, __blkbuf); \
kmem_free(__blkbuf, BP_SPRINTF_LEN); \
} \
_NOTE(CONSTCOND) } while (0)

#define DBUF_VERIFY(db) dbuf_verify(db)
Expand Down
2 changes: 1 addition & 1 deletion include/sys/dsl_dataset.h
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ int dsl_dataset_rollback(const char *fsname, void *owner, nvlist_t *result);
#ifdef ZFS_DEBUG
#define dprintf_ds(ds, fmt, ...) do { \
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
char *__ds_name = kmem_alloc(MAXNAMELEN, KM_PUSHPAGE); \
char *__ds_name = kmem_alloc(MAXNAMELEN, KM_SLEEP); \
dsl_dataset_name(ds, __ds_name); \
dprintf("ds=%s " fmt, __ds_name, __VA_ARGS__); \
kmem_free(__ds_name, MAXNAMELEN); \
Expand Down
2 changes: 1 addition & 1 deletion include/sys/dsl_dir.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ void dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value,
#define dprintf_dd(dd, fmt, ...) do { \
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
char *__ds_name = kmem_alloc(MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, \
KM_PUSHPAGE); \
KM_SLEEP); \
dsl_dir_name(dd, __ds_name); \
dprintf("dd=%s " fmt, __ds_name, __VA_ARGS__); \
kmem_free(__ds_name, MAXNAMELEN + strlen(MOS_DIR_NAME) + 1); \
Expand Down
2 changes: 0 additions & 2 deletions include/sys/sa.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,6 @@ int sa_replace_all_by_template_locked(sa_handle_t *, sa_bulk_attr_t *,
boolean_t sa_enabled(objset_t *);
void sa_cache_init(void);
void sa_cache_fini(void);
void *sa_spill_alloc(int);
void sa_spill_free(void *);
int sa_set_sa_object(objset_t *, uint64_t);
int sa_hdrsize(void *);
void sa_handle_lock(sa_handle_t *);
Expand Down
13 changes: 7 additions & 6 deletions include/sys/spa.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

#include <sys/avl.h>
#include <sys/zfs_context.h>
#include <sys/kstat.h>
#include <sys/nvpair.h>
#include <sys/sysmacros.h>
#include <sys/types.h>
Expand Down Expand Up @@ -876,12 +877,12 @@ extern void spa_configfile_set(spa_t *, nvlist_t *, boolean_t);
extern void spa_event_notify(spa_t *spa, vdev_t *vdev, const char *name);

#ifdef ZFS_DEBUG
#define dprintf_bp(bp, fmt, ...) do { \
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
char *__blkbuf = kmem_alloc(BP_SPRINTF_LEN, KM_PUSHPAGE); \
snprintf_blkptr(__blkbuf, BP_SPRINTF_LEN, (bp)); \
dprintf(fmt " %s\n", __VA_ARGS__, __blkbuf); \
kmem_free(__blkbuf, BP_SPRINTF_LEN); \
#define dprintf_bp(bp, fmt, ...) do { \
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
char *__blkbuf = kmem_alloc(BP_SPRINTF_LEN, KM_SLEEP); \
snprintf_blkptr(__blkbuf, BP_SPRINTF_LEN, (bp)); \
dprintf(fmt " %s\n", __VA_ARGS__, __blkbuf); \
kmem_free(__blkbuf, BP_SPRINTF_LEN); \
} \
_NOTE(CONSTCOND) } while (0)
#else
Expand Down
7 changes: 0 additions & 7 deletions include/sys/vdev_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ extern "C" {
* Forward declarations that lots of things need.
*/
typedef struct vdev_queue vdev_queue_t;
typedef struct vdev_io vdev_io_t;
typedef struct vdev_cache vdev_cache_t;
typedef struct vdev_cache_entry vdev_cache_entry_t;

Expand Down Expand Up @@ -117,16 +116,10 @@ struct vdev_queue {
uint64_t vq_last_offset;
hrtime_t vq_io_complete_ts; /* time last i/o completed */
hrtime_t vq_io_delta_ts;
list_t vq_io_list;
zio_t vq_io_search; /* used as local for stack reduction */
kmutex_t vq_lock;
};

struct vdev_io {
char vi_buffer[SPA_MAXBLOCKSIZE]; /* Must be first */
list_node_t vi_node;
};

/*
* Virtual device descriptor
*/
Expand Down
11 changes: 8 additions & 3 deletions include/sys/zfs_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@
#include <sys/bitmap.h>
#include <sys/cmn_err.h>
#include <sys/kmem.h>
#include <sys/kmem_cache.h>
#include <sys/vmem.h>
#include <sys/taskq.h>
#include <sys/buf.h>
#include <sys/param.h>
Expand Down Expand Up @@ -391,7 +393,6 @@ extern void kstat_set_raw_ops(kstat_t *ksp,
#define KM_SLEEP UMEM_NOFAIL
#define KM_PUSHPAGE KM_SLEEP
#define KM_NOSLEEP UMEM_DEFAULT
#define KM_NODEBUG 0x0
#define KMC_NODEBUG UMC_NODEBUG
#define KMC_KMEM 0x0
#define KMC_VMEM 0x0
Expand Down Expand Up @@ -447,7 +448,6 @@ typedef struct taskq_ent {

#define TQ_SLEEP KM_SLEEP /* Can block for memory */
#define TQ_NOSLEEP KM_NOSLEEP /* cannot block for memory; may fail */
#define TQ_PUSHPAGE KM_PUSHPAGE /* Cannot perform I/O */
#define TQ_NOQUEUE 0x02 /* Do not enqueue if can't dispatch */
#define TQ_FRONT 0x08 /* Queue in front */

Expand Down Expand Up @@ -733,6 +733,11 @@ void ksiddomain_rele(ksiddomain_t *);
(void) nanosleep(&ts, NULL); \
} while (0)

#endif /* _KERNEL */
typedef int fstrans_cookie_t;

extern fstrans_cookie_t spl_fstrans_mark(void);
extern void spl_fstrans_unmark(fstrans_cookie_t);
extern int spl_fstrans_check(void);

#endif /* _KERNEL */
#endif /* _SYS_ZFS_CONTEXT_H */
2 changes: 0 additions & 2 deletions include/sys/zio.h
Original file line number Diff line number Diff line change
Expand Up @@ -522,8 +522,6 @@ extern void *zio_buf_alloc(size_t size);
extern void zio_buf_free(void *buf, size_t size);
extern void *zio_data_buf_alloc(size_t size);
extern void zio_data_buf_free(void *buf, size_t size);
extern void *zio_vdev_alloc(void);
extern void zio_vdev_free(void *buf);

extern void zio_resubmit_stage_async(void *);

Expand Down
17 changes: 17 additions & 0 deletions lib/libzpool/kernel.c
Original file line number Diff line number Diff line change
Expand Up @@ -1275,3 +1275,20 @@ zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data)
{
return (0);
}

fstrans_cookie_t
spl_fstrans_mark(void)
{
return ((fstrans_cookie_t) 0);
}

void
spl_fstrans_unmark(fstrans_cookie_t cookie)
{
}

int
spl_fstrans_check(void)
{
return (0);
}
4 changes: 2 additions & 2 deletions module/nvpair/nvpair_alloc_spl.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,13 @@
static void *
nv_alloc_sleep_spl(nv_alloc_t *nva, size_t size)
{
return (kmem_alloc(size, KM_SLEEP | KM_NODEBUG));
return (kmem_alloc(size, KM_SLEEP));
}

static void *
nv_alloc_pushpage_spl(nv_alloc_t *nva, size_t size)
{
return (kmem_alloc(size, KM_PUSHPAGE | KM_NODEBUG));
return (kmem_alloc(size, KM_PUSHPAGE));
}

static void *
Expand Down
2 changes: 1 addition & 1 deletion module/zcommon/zprop_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ zprop_iter_common(zprop_func func, void *cb, boolean_t show_all,
size = num_props * sizeof (zprop_desc_t *);

#if defined(_KERNEL)
order = kmem_alloc(size, KM_PUSHPAGE);
order = kmem_alloc(size, KM_SLEEP);
#else
if ((order = malloc(size)) == NULL)
return (ZPROP_CONT);
Expand Down
16 changes: 8 additions & 8 deletions module/zfs/arc.c
Original file line number Diff line number Diff line change
Expand Up @@ -978,7 +978,7 @@ arc_cksum_compute(arc_buf_t *buf, boolean_t force)
return;
}
buf->b_hdr->b_freeze_cksum = kmem_alloc(sizeof (zio_cksum_t),
KM_PUSHPAGE);
KM_SLEEP);
fletcher_2_native(buf->b_data, buf->b_hdr->b_size,
buf->b_hdr->b_freeze_cksum);
mutex_exit(&buf->b_hdr->b_freeze_lock);
Expand Down Expand Up @@ -1477,7 +1477,7 @@ arc_buf_data_free(arc_buf_t *buf, void (*free_func)(void *, size_t))

if (HDR_L2_WRITING(hdr)) {
l2arc_data_free_t *df;
df = kmem_alloc(sizeof (l2arc_data_free_t), KM_PUSHPAGE);
df = kmem_alloc(sizeof (l2arc_data_free_t), KM_SLEEP);
df->l2df_data = buf->b_data;
df->l2df_size = hdr->b_size;
df->l2df_func = free_func;
Expand Down Expand Up @@ -3142,7 +3142,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done,
arc_callback_t *acb = NULL;

acb = kmem_zalloc(sizeof (arc_callback_t),
KM_PUSHPAGE);
KM_SLEEP);
acb->acb_done = done;
acb->acb_private = private;
if (pio != NULL)
Expand Down Expand Up @@ -3284,7 +3284,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done,

ASSERT(!GHOST_STATE(hdr->b_state));

acb = kmem_zalloc(sizeof (arc_callback_t), KM_PUSHPAGE);
acb = kmem_zalloc(sizeof (arc_callback_t), KM_SLEEP);
acb->acb_done = done;
acb->acb_private = private;

Expand Down Expand Up @@ -3341,7 +3341,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done,
atomic_inc_32(&hdr->b_l2hdr->b_hits);

cb = kmem_zalloc(sizeof (l2arc_read_callback_t),
KM_PUSHPAGE);
KM_SLEEP);
cb->l2rcb_buf = buf;
cb->l2rcb_spa = spa;
cb->l2rcb_bp = *bp;
Expand Down Expand Up @@ -3871,7 +3871,7 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
hdr->b_flags |= ARC_L2CACHE;
if (l2arc_compress)
hdr->b_flags |= ARC_L2COMPRESS;
callback = kmem_zalloc(sizeof (arc_write_callback_t), KM_PUSHPAGE);
callback = kmem_zalloc(sizeof (arc_write_callback_t), KM_SLEEP);
callback->awcb_ready = ready;
callback->awcb_physdone = physdone;
callback->awcb_done = done;
Expand Down Expand Up @@ -4979,7 +4979,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
list_insert_head(dev->l2ad_buflist, head);

cb = kmem_alloc(sizeof (l2arc_write_callback_t),
KM_PUSHPAGE);
KM_SLEEP);
cb->l2wcb_dev = dev;
cb->l2wcb_head = head;
pio = zio_root(spa, l2arc_write_done, cb,
Expand All @@ -4989,7 +4989,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
/*
* Create and add a new L2ARC header.
*/
l2hdr = kmem_cache_alloc(l2arc_hdr_cache, KM_PUSHPAGE);
l2hdr = kmem_cache_alloc(l2arc_hdr_cache, KM_SLEEP);
l2hdr->b_dev = dev;
l2hdr->b_daddr = 0;
arc_space_consume(L2HDR_SIZE, ARC_SPACE_L2HDRS);
Expand Down
2 changes: 1 addition & 1 deletion module/zfs/bplist.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ bplist_destroy(bplist_t *bpl)
void
bplist_append(bplist_t *bpl, const blkptr_t *bp)
{
bplist_entry_t *bpe = kmem_alloc(sizeof (*bpe), KM_PUSHPAGE);
bplist_entry_t *bpe = kmem_alloc(sizeof (*bpe), KM_SLEEP);

mutex_enter(&bpl->bpl_lock);
bpe->bpe_blk = *bp;
Expand Down
2 changes: 1 addition & 1 deletion module/zfs/bptree.c
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ bptree_add(objset_t *os, uint64_t obj, blkptr_t *bp, uint64_t birth_txg,
VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db));
bt = db->db_data;

bte = kmem_zalloc(sizeof (*bte), KM_PUSHPAGE);
bte = kmem_zalloc(sizeof (*bte), KM_SLEEP);
bte->be_birth_txg = birth_txg;
bte->be_bp = *bp;
dmu_write(os, obj, bt->bt_end * sizeof (*bte), sizeof (*bte), bte, tx);
Expand Down
10 changes: 5 additions & 5 deletions module/zfs/dbuf.c
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ dbuf_init(void)
* Large allocations which do not require contiguous pages
* should be using vmem_alloc() in the linux kernel
*/
h->hash_table = vmem_zalloc(hsize * sizeof (void *), KM_PUSHPAGE);
h->hash_table = vmem_zalloc(hsize * sizeof (void *), KM_SLEEP);
#else
h->hash_table = kmem_zalloc(hsize * sizeof (void *), KM_NOSLEEP);
#endif
Expand Down Expand Up @@ -1121,7 +1121,7 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
dn->dn_dirtyctx =
(dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN);
ASSERT(dn->dn_dirtyctx_firstset == NULL);
dn->dn_dirtyctx_firstset = kmem_alloc(1, KM_PUSHPAGE);
dn->dn_dirtyctx_firstset = kmem_alloc(1, KM_SLEEP);
}
mutex_exit(&dn->dn_mtx);

Expand Down Expand Up @@ -1198,7 +1198,7 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
* to make a copy of it so that the changes we make in this
* transaction group won't leak out when we sync the older txg.
*/
dr = kmem_zalloc(sizeof (dbuf_dirty_record_t), KM_PUSHPAGE);
dr = kmem_zalloc(sizeof (dbuf_dirty_record_t), KM_SLEEP);
list_link_init(&dr->dr_dirty_node);
if (db->db_level == 0) {
void *data_old = db->db_buf;
Expand Down Expand Up @@ -1764,7 +1764,7 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid,
ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
ASSERT(dn->dn_type != DMU_OT_NONE);

db = kmem_cache_alloc(dbuf_cache, KM_PUSHPAGE);
db = kmem_cache_alloc(dbuf_cache, KM_SLEEP);

db->db_objset = os;
db->db.db_object = dn->dn_object;
Expand Down Expand Up @@ -2059,7 +2059,7 @@ dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid, int fail_sparse,
int error;

dh = kmem_zalloc(sizeof (struct dbuf_hold_impl_data) *
DBUF_HOLD_IMPL_MAX_DEPTH, KM_PUSHPAGE);
DBUF_HOLD_IMPL_MAX_DEPTH, KM_SLEEP);
__dbuf_hold_impl_init(dh, dn, level, blkid, fail_sparse, tag, dbp, 0);

error = __dbuf_hold_impl(dh);
Expand Down
8 changes: 4 additions & 4 deletions module/zfs/ddt.c
Original file line number Diff line number Diff line change
Expand Up @@ -517,7 +517,7 @@ ddt_get_dedup_stats(spa_t *spa, ddt_stat_t *dds_total)
{
ddt_histogram_t *ddh_total;

ddh_total = kmem_zalloc(sizeof (ddt_histogram_t), KM_PUSHPAGE);
ddh_total = kmem_zalloc(sizeof (ddt_histogram_t), KM_SLEEP);
ddt_get_dedup_histogram(spa, ddh_total);
ddt_histogram_stat(dds_total, ddh_total);
kmem_free(ddh_total, sizeof (ddt_histogram_t));
Expand Down Expand Up @@ -685,7 +685,7 @@ ddt_alloc(const ddt_key_t *ddk)
{
ddt_entry_t *dde;

dde = kmem_cache_alloc(ddt_entry_cache, KM_PUSHPAGE);
dde = kmem_cache_alloc(ddt_entry_cache, KM_SLEEP);
bzero(dde, sizeof (ddt_entry_t));
cv_init(&dde->dde_cv, NULL, CV_DEFAULT, NULL);

Expand Down Expand Up @@ -834,7 +834,7 @@ ddt_table_alloc(spa_t *spa, enum zio_checksum c)
{
ddt_t *ddt;

ddt = kmem_cache_alloc(ddt_cache, KM_PUSHPAGE | KM_NODEBUG);
ddt = kmem_cache_alloc(ddt_cache, KM_SLEEP);
bzero(ddt, sizeof (ddt_t));

mutex_init(&ddt->ddt_lock, NULL, MUTEX_DEFAULT, NULL);
Expand Down Expand Up @@ -937,7 +937,7 @@ ddt_class_contains(spa_t *spa, enum ddt_class max_class, const blkptr_t *bp)
return (B_TRUE);

ddt = spa->spa_ddt[BP_GET_CHECKSUM(bp)];
dde = kmem_cache_alloc(ddt_entry_cache, KM_PUSHPAGE);
dde = kmem_cache_alloc(ddt_entry_cache, KM_SLEEP);

ddt_key_fill(&(dde->dde_key), bp);

Expand Down
2 changes: 1 addition & 1 deletion module/zfs/ddt_zap.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ ddt_zap_lookup(objset_t *os, uint64_t object, ddt_entry_t *dde)
uint64_t one, csize;
int error;

cbuf = kmem_alloc(sizeof (dde->dde_phys) + 1, KM_PUSHPAGE);
cbuf = kmem_alloc(sizeof (dde->dde_phys) + 1, KM_SLEEP);

error = zap_length_uint64(os, object, (uint64_t *)&dde->dde_key,
DDT_KEY_WORDS, &one, &csize);
Expand Down
Loading

0 comments on commit 6e9710f

Please sign in to comment.