Skip to content

Commit

Permalink
spa_load_verify() may consume too much memory
Browse files Browse the repository at this point in the history
When a pool is imported it will scan the pool to verify the integrity 
of the data and metadata. The amount it scans will depend on the 
import flags provided. On systems with small amounts of memory or 
when importing a pool from the crash kernel, it's possible for 
spa_load_verify to issue too many I/Os that it consumes all the memory 
of the system resulting in an OOM message or a hang.

To prevent this, we limit the amount of memory that the initial pool
scan can consume. This change will, by default, use 1/16th of the ARC
for scan I/Os to prevent running the system out of memory during import.

Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Serapheim Dimitropoulos <serapheim@delphix.com>
Signed-off-by: George Wilson george.wilson@delphix.com
External-issue: DLPX-65237
External-issue: DLPX-65238
Closes #9146
  • Loading branch information
grwilson authored and behlendorf committed Aug 13, 2019
1 parent a43570c commit c8242a9
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 21 deletions.
15 changes: 8 additions & 7 deletions cmd/zdb/zdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size);

uint64_t *zopt_object = NULL;
static unsigned zopt_objects = 0;
uint64_t max_inflight = 1000;
uint64_t max_inflight_bytes = 256 * 1024 * 1024; /* 256MB */
static int leaked_objects = 0;
static range_tree_t *mos_refd_objs;

Expand Down Expand Up @@ -3806,7 +3806,7 @@ zdb_blkptr_done(zio_t *zio)
abd_free(zio->io_abd);

mutex_enter(&spa->spa_scrub_lock);
spa->spa_load_verify_ios--;
spa->spa_load_verify_bytes -= BP_GET_PSIZE(bp);
cv_broadcast(&spa->spa_scrub_io_cv);

if (ioerr && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
Expand Down Expand Up @@ -3877,9 +3877,9 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
flags |= ZIO_FLAG_SPECULATIVE;

mutex_enter(&spa->spa_scrub_lock);
while (spa->spa_load_verify_ios > max_inflight)
while (spa->spa_load_verify_bytes > max_inflight_bytes)
cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
spa->spa_load_verify_ios++;
spa->spa_load_verify_bytes += size;
mutex_exit(&spa->spa_scrub_lock);

zio_nowait(zio_read(NULL, spa, bp, abd, size,
Expand Down Expand Up @@ -4895,6 +4895,7 @@ dump_block_stats(spa_t *spa)
ZIO_FLAG_GODFATHER);
}
}
ASSERT0(spa->spa_load_verify_bytes);

/*
* Done after zio_wait() since zcb_haderrors is modified in
Expand Down Expand Up @@ -6681,10 +6682,10 @@ main(int argc, char **argv)
break;
/* NB: Sort single match options below. */
case 'I':
max_inflight = strtoull(optarg, NULL, 0);
if (max_inflight == 0) {
max_inflight_bytes = strtoull(optarg, NULL, 0);
if (max_inflight_bytes == 0) {
(void) fprintf(stderr, "maximum number "
"of inflight I/Os must be greater "
"of inflight bytes must be greater "
"than 0\n");
usage();
}
Expand Down
4 changes: 3 additions & 1 deletion include/sys/spa_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,9 @@ struct spa {
boolean_t spa_extreme_rewind; /* rewind past deferred frees */
kmutex_t spa_scrub_lock; /* resilver/scrub lock */
uint64_t spa_scrub_inflight; /* in-flight scrub bytes */
uint64_t spa_load_verify_ios; /* in-flight verification IOs */

/* in-flight verification bytes */
uint64_t spa_load_verify_bytes;
kcondvar_t spa_scrub_io_cv; /* scrub I/O completion */
uint8_t spa_scrub_active; /* active or suspended? */
uint8_t spa_scrub_type; /* type of scrub we're doing */
Expand Down
8 changes: 4 additions & 4 deletions man/man5/zfs-module-parameters.5
Original file line number Diff line number Diff line change
Expand Up @@ -543,13 +543,13 @@ Default value: \fB1\fR.
.sp
.ne 2
.na
\fBspa_load_verify_maxinflight\fR (int)
\fBspa_load_verify_shift\fR (int)
.ad
.RS 12n
Maximum concurrent I/Os during the traversal performed during an "extreme
rewind" (\fB-X\fR) pool import.
Sets the maximum number of bytes to consume during pool import to the log2
fraction of the target arc size.
.sp
Default value: \fB10000\fR.
Default value: \fB4\fR.
.RE

.sp
Expand Down
24 changes: 15 additions & 9 deletions module/zfs/spa.c
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@
#include <sys/fm/util.h>
#include <sys/callb.h>
#include <sys/zone.h>
#include <sys/vmsystm.h>
#endif /* _KERNEL */

#include "zfs_prop.h"
Expand Down Expand Up @@ -2197,16 +2198,16 @@ spa_load_verify_done(zio_t *zio)
}

mutex_enter(&spa->spa_scrub_lock);
spa->spa_load_verify_ios--;
spa->spa_load_verify_bytes -= BP_GET_PSIZE(bp);
cv_broadcast(&spa->spa_scrub_io_cv);
mutex_exit(&spa->spa_scrub_lock);
}

/*
* Maximum number of concurrent scrub i/os to create while verifying
* a pool while importing it.
* Maximum number of inflight bytes is the log2 faction of the arc size.
* By default, we set it to 1/16th of the arc.
*/
int spa_load_verify_maxinflight = 10000;
int spa_load_verify_shift = 4;
int spa_load_verify_metadata = B_TRUE;
int spa_load_verify_data = B_TRUE;

Expand All @@ -2228,13 +2229,14 @@ spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
if (!BP_IS_METADATA(bp) && !spa_load_verify_data)
return (0);

int maxinflight_bytes = arc_target_bytes() >> spa_load_verify_shift;
zio_t *rio = arg;
size_t size = BP_GET_PSIZE(bp);

mutex_enter(&spa->spa_scrub_lock);
while (spa->spa_load_verify_ios >= spa_load_verify_maxinflight)
while (spa->spa_load_verify_bytes >= maxinflight_bytes)
cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
spa->spa_load_verify_ios++;
spa->spa_load_verify_bytes += size;
mutex_exit(&spa->spa_scrub_lock);

zio_nowait(zio_read(rio, spa, bp, abd_alloc_for_io(size, B_FALSE), size,
Expand Down Expand Up @@ -2287,12 +2289,14 @@ spa_load_verify(spa_t *spa)
"spa_load_verify_metadata=%u)",
spa_load_verify_data, spa_load_verify_metadata);
}

error = traverse_pool(spa, spa->spa_verify_min_txg,
TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA |
TRAVERSE_NO_DECRYPT, spa_load_verify_cb, rio);
}

(void) zio_wait(rio);
ASSERT0(spa->spa_load_verify_bytes);

spa->spa_load_meta_errors = sle.sle_meta_count;
spa->spa_load_data_errors = sle.sle_data_count;
Expand Down Expand Up @@ -9309,9 +9313,11 @@ EXPORT_SYMBOL(spa_event_notify);
#endif

#if defined(_KERNEL)
module_param(spa_load_verify_maxinflight, int, 0644);
MODULE_PARM_DESC(spa_load_verify_maxinflight,
"Max concurrent traversal I/Os while verifying pool during import -X");
/* BEGIN CSTYLED */
module_param(spa_load_verify_shift, int, 0644);
MODULE_PARM_DESC(spa_load_verify_shift, "log2(fraction of arc that can "
"be used by inflight I/Os when verifying pool during import");
/* END CSTYLED */

module_param(spa_load_verify_metadata, int, 0644);
MODULE_PARM_DESC(spa_load_verify_metadata,
Expand Down

0 comments on commit c8242a9

Please sign in to comment.