Skip to content

Commit

Permalink
Add visibility in to arc_read
Browse files Browse the repository at this point in the history
This change is an attempt to add visibility into the arc_read calls
occurring on a system, in real time. To do this, a list was added to the
in memory SPA data structure for a pool, with each element on the list
corresponding to a call to arc_read. These entries are then exported
through the kstat interface, which can then be interpreted in userspace.

For each arc_read call, the following information is exported:

 * A unique identifier (uint64_t)
 * The time the entry was added to the list (hrtime_t)
   (*not* wall clock time; relative to the other entries on the list)
 * The objset ID (uint64_t)
 * The object number (uint64_t)
 * The indirection level (uint64_t)
 * The block ID (uint64_t)
 * The name of the function originating the arc_read call (char[24])
 * The arc_flags from the arc_read call (uint32_t)
 * The PID of the reading thread (pid_t)
 * The command or name of thread originating read (char[16])

From this exported information one can see, in real time, exactly what
is being read, what function is generating the read, and whether or not
the read was found to be already cached.

There is still some work to be done, but this should serve as a good
starting point.

Specifically, dbuf_read's are not accounted for in the currently
exported information. Thus, a follow up patch should probably be added
to export these calls that never call into arc_read (they only hit the
dbuf hash table). In addition, it might be nice to create a utility
similar to "arcstat.py" to digest the exported information and display
it in a more readable format. Or perhaps, log the information and allow
for it to be "replayed" at a later time.

Signed-off-by: Prakash Surya <surya1@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
  • Loading branch information
Prakash Surya authored and behlendorf committed Oct 25, 2013
1 parent 76463d4 commit 1421c89
Show file tree
Hide file tree
Showing 11 changed files with 310 additions and 13 deletions.
21 changes: 21 additions & 0 deletions include/sys/spa.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ typedef struct zilog zilog_t;
typedef struct spa_aux_vdev spa_aux_vdev_t;
typedef struct ddt ddt_t;
typedef struct ddt_entry ddt_entry_t;
typedef struct zbookmark zbookmark_t;

struct dsl_pool;
struct dsl_dataset;

Expand Down Expand Up @@ -534,6 +536,25 @@ extern boolean_t spa_refcount_zero(spa_t *spa);
#define SCL_ALL ((1 << SCL_LOCKS) - 1)
#define SCL_STATE_ALL (SCL_STATE | SCL_L2ARC | SCL_ZIO)

/* Historical pool statistics */
typedef struct spa_stats_history {
kmutex_t lock;
uint64_t count;
uint64_t size;
kstat_t *kstat;
void *private;
list_t list;
} spa_stats_history_t;

typedef struct spa_stats {
spa_stats_history_t read_history;
} spa_stats_t;

extern void spa_stats_init(spa_t *spa);
extern void spa_stats_destroy(spa_t *spa);
extern void spa_read_history_add(spa_t *spa, const zbookmark_t *zb,
uint32_t aflags);

/* Pool configuration locks */
extern int spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw);
extern void spa_config_enter(spa_t *spa, int locks, void *tag, krw_t rw);
Expand Down
2 changes: 2 additions & 0 deletions include/sys/spa_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,8 @@ struct spa {
uint64_t spa_deadman_calls; /* number of deadman calls */
uint64_t spa_sync_starttime; /* starting time fo spa_sync */
uint64_t spa_deadman_synctime; /* deadman expiration timer */
spa_stats_t spa_stats; /* assorted spa statistics */

/*
* spa_refcnt & spa_config_lock must be the last elements
* because refcount_t changes size based on compilation options.
Expand Down
5 changes: 5 additions & 0 deletions include/sys/zfs_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ typedef struct kthread {
} kthread_t;

#define curthread zk_thread_current()
#define getcomm() "unknown"
#define thread_exit zk_thread_exit
#define thread_create(stk, stksize, func, arg, len, pp, state, pri) \
zk_thread_create(stk, stksize, (thread_func_t)func, arg, \
Expand Down Expand Up @@ -347,6 +348,10 @@ extern kstat_t *kstat_create(char *, int,
char *, char *, uchar_t, ulong_t, uchar_t);
extern void kstat_install(kstat_t *);
extern void kstat_delete(kstat_t *);
extern void kstat_set_raw_ops(kstat_t *ksp,
int (*headers)(char *buf, size_t size),
int (*data)(char *buf, size_t size, void *data),
void *(*addr)(kstat_t *ksp, loff_t index));

/*
* Kernel memory
Expand Down
6 changes: 4 additions & 2 deletions include/sys/zio.h
Original file line number Diff line number Diff line change
Expand Up @@ -256,19 +256,21 @@ extern char *zio_type_name[ZIO_TYPES];
* Therefore it must not change size or alignment between 32/64 bit
* compilation options.
*/
typedef struct zbookmark {
struct zbookmark {
uint64_t zb_objset;
uint64_t zb_object;
int64_t zb_level;
uint64_t zb_blkid;
} zbookmark_t;
char * zb_func;
};

#define SET_BOOKMARK(zb, objset, object, level, blkid) \
{ \
(zb)->zb_objset = objset; \
(zb)->zb_object = object; \
(zb)->zb_level = level; \
(zb)->zb_blkid = blkid; \
(zb)->zb_func = FTAG; \
}

#define ZB_DESTROYED_OBJSET (-1ULL)
Expand Down
1 change: 1 addition & 0 deletions lib/libzpool/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ libzpool_la_SOURCES = \
$(top_srcdir)/module/zfs/spa_errlog.c \
$(top_srcdir)/module/zfs/spa_history.c \
$(top_srcdir)/module/zfs/spa_misc.c \
$(top_srcdir)/module/zfs/spa_stats.c \
$(top_srcdir)/module/zfs/space_map.c \
$(top_srcdir)/module/zfs/txg.c \
$(top_srcdir)/module/zfs/uberblock.c \
Expand Down
8 changes: 8 additions & 0 deletions lib/libzpool/kernel.c
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,14 @@ void
kstat_delete(kstat_t *ksp)
{}

/*ARGSUSED*/
void
kstat_set_raw_ops(kstat_t *ksp,
int (*headers)(char *buf, size_t size),
int (*data)(char *buf, size_t size, void *data),
void *(*addr)(kstat_t *ksp, loff_t index))
{}

/*
* =========================================================================
* mutexes
Expand Down
1 change: 1 addition & 0 deletions module/zfs/Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ $(MODULE)-objs += @top_srcdir@/module/zfs/spa_config.o
$(MODULE)-objs += @top_srcdir@/module/zfs/spa_errlog.o
$(MODULE)-objs += @top_srcdir@/module/zfs/spa_history.o
$(MODULE)-objs += @top_srcdir@/module/zfs/spa_misc.o
$(MODULE)-objs += @top_srcdir@/module/zfs/spa_stats.o
$(MODULE)-objs += @top_srcdir@/module/zfs/space_map.o
$(MODULE)-objs += @top_srcdir@/module/zfs/txg.o
$(MODULE)-objs += @top_srcdir@/module/zfs/uberblock.o
Expand Down
20 changes: 13 additions & 7 deletions module/zfs/arc.c
Original file line number Diff line number Diff line change
Expand Up @@ -2943,6 +2943,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done,
kmutex_t *hash_lock;
zio_t *rzio;
uint64_t guid = spa_load_guid(spa);
int rc = 0;

top:
hdr = buf_hash_find(guid, BP_IDENTITY(bp), BP_PHYSICAL_BIRTH(bp),
Expand Down Expand Up @@ -2976,10 +2977,10 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done,
hdr->b_acb = acb;
add_reference(hdr, hash_lock, private);
mutex_exit(hash_lock);
return (0);
goto out;
}
mutex_exit(hash_lock);
return (0);
goto out;
}

ASSERT(hdr->b_state == arc_mru || hdr->b_state == arc_mfu);
Expand Down Expand Up @@ -3174,12 +3175,12 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done,

if (*arc_flags & ARC_NOWAIT) {
zio_nowait(rzio);
return (0);
goto out;
}

ASSERT(*arc_flags & ARC_WAIT);
if (zio_wait(rzio) == 0)
return (0);
goto out;

/* l2arc read error; goto zio_read() */
} else {
Expand All @@ -3203,13 +3204,18 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done,
rzio = zio_read(pio, spa, bp, buf->b_data, size,
arc_read_done, buf, priority, zio_flags, zb);

if (*arc_flags & ARC_WAIT)
return (zio_wait(rzio));
if (*arc_flags & ARC_WAIT) {
rc = zio_wait(rzio);
goto out;
}

ASSERT(*arc_flags & ARC_NOWAIT);
zio_nowait(rzio);
}
return (0);

out:
spa_read_history_add(spa, zb, *arc_flags);
return (rc);
}

arc_prune_t *
Expand Down
7 changes: 4 additions & 3 deletions module/zfs/dmu_traverse.c
Original file line number Diff line number Diff line change
Expand Up @@ -517,6 +517,9 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
mutex_init(&pd->pd_mtx, NULL, MUTEX_DEFAULT, NULL);
cv_init(&pd->pd_cv, NULL, CV_DEFAULT, NULL);

SET_BOOKMARK(czb, td->td_objset,
ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);

/* See comment on ZIL traversal in dsl_scan_visitds. */
if (ds != NULL && !dsl_dataset_is_snapshot(ds) && !BP_IS_HOLE(rootbp)) {
uint32_t flags = ARC_WAIT;
Expand All @@ -525,7 +528,7 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,

err = arc_read(NULL, td->td_spa, rootbp,
arc_getbuf_func, &buf,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, NULL);
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, czb);
if (err != 0)
return (err);

Expand All @@ -539,8 +542,6 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
td, TQ_NOQUEUE))
pd->pd_exited = B_TRUE;

SET_BOOKMARK(czb, td->td_objset,
ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
err = traverse_visitbp(td, NULL, rootbp, czb);

mutex_enter(&pd->pd_mtx);
Expand Down
4 changes: 3 additions & 1 deletion module/zfs/spa_misc.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
#include <sys/metaslab_impl.h>
#include <sys/arc.h>
#include <sys/ddt.h>
#include <sys/kstat.h>
#include "zfs_prop.h"
#include "zfeature_common.h"

Expand Down Expand Up @@ -253,7 +254,6 @@ unsigned long zfs_deadman_synctime = 1000ULL;
*/
int zfs_deadman_enabled = 1;


/*
* ==========================================================================
* SPA config locking
Expand Down Expand Up @@ -495,6 +495,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)

refcount_create(&spa->spa_refcount);
spa_config_lock_init(spa);
spa_stats_init(spa);

avl_add(&spa_namespace_avl, spa);

Expand Down Expand Up @@ -580,6 +581,7 @@ spa_remove(spa_t *spa)

refcount_destroy(&spa->spa_refcount);

spa_stats_destroy(spa);
spa_config_lock_destroy(spa);

for (t = 0; t < TXG_SIZE; t++)
Expand Down
Loading

0 comments on commit 1421c89

Please sign in to comment.